diff options
Diffstat (limited to 'llvm/test')
45 files changed, 5792 insertions, 1773 deletions
diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll index 8c1a763..aef7810 100644 --- a/llvm/test/Bitcode/attributes.ll +++ b/llvm/test/Bitcode/attributes.ll @@ -516,6 +516,11 @@ define void @f93() sanitize_realtime_blocking { ret void; } +; CHECK: define void @f_sanitize_alloc_token() #55 +define void @f_sanitize_alloc_token() sanitize_alloc_token { + ret void; +} + ; CHECK: define void @f87() [[FNRETTHUNKEXTERN:#[0-9]+]] define void @f87() fn_ret_thunk_extern { ret void } @@ -627,6 +632,7 @@ define void @dead_on_return(ptr dead_on_return %p) { ; CHECK: attributes #52 = { nosanitize_bounds } ; CHECK: attributes #53 = { sanitize_realtime } ; CHECK: attributes #54 = { sanitize_realtime_blocking } +; CHECK: attributes #55 = { sanitize_alloc_token } ; CHECK: attributes [[FNRETTHUNKEXTERN]] = { fn_ret_thunk_extern } ; CHECK: attributes [[SKIPPROFILE]] = { skipprofile } ; CHECK: attributes [[OPTDEBUG]] = { optdebug } diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index 0b5ce08..e21786e 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -1718,7 +1718,7 @@ exit: ; CHECK: select <2 x i1> <i1 true, i1 false>, <2 x i8> <i8 2, i8 3>, <2 x i8> <i8 3, i8 2> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #54 + ; CHECK: call void @f.nobuiltin() #55 call fastcc noalias ptr @f.noalias() noinline ; CHECK: call fastcc noalias ptr @f.noalias() #12 @@ -2151,6 +2151,9 @@ declare void @f.sanitize_realtime() sanitize_realtime declare void @f.sanitize_realtime_blocking() sanitize_realtime_blocking ; CHECK: declare void @f.sanitize_realtime_blocking() #53 +declare void @f.sanitize_alloc_token() sanitize_alloc_token +; CHECK: declare void @f.sanitize_alloc_token() #54 + ; CHECK: declare nofpclass(snan) float @nofpclass_snan(float nofpclass(snan)) declare nofpclass(snan) float @nofpclass_snan(float nofpclass(snan)) @@ -2284,7 +2287,8 @@ define float @nofpclass_callsites(float %arg, { float } %arg1) { ; CHECK: attributes #51 = { sanitize_numerical_stability } ; CHECK: attributes #52 = { sanitize_realtime } ; CHECK: attributes #53 = { sanitize_realtime_blocking } -; CHECK: attributes #54 = { builtin } +; CHECK: attributes #54 = { sanitize_alloc_token } +; CHECK: attributes #55 = { builtin } ;; Metadata diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.form.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.form.ll index 87a7c2e..cc4cc8e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.form.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.form.ll @@ -72,5 +72,206 @@ define <4 x float> @request_no_agpr(<8 x half> %arg0, <8 x half> %arg1, <4 x flo ret <4 x float> %result } +; Make sure this selects the VGPR form, if AGPRs available, but not +; enough. +define amdgpu_kernel void @not_enough_agprs(ptr addrspace(1) %arg) #2 { +; HEURRC-LABEL: not_enough_agprs: +; HEURRC: ; %bb.0: ; %bb +; HEURRC-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 +; HEURRC-NEXT: v_mov_b32_e32 v33, 1.0 +; HEURRC-NEXT: v_mov_b32_e32 v34, 2.0 +; HEURRC-NEXT: v_mov_b32_e32 v32, 0 +; HEURRC-NEXT: s_waitcnt lgkmcnt(0) +; HEURRC-NEXT: s_load_dwordx16 s[16:31], s[34:35], 0x0 +; HEURRC-NEXT: s_load_dwordx16 s[0:15], s[34:35], 0x40 +; HEURRC-NEXT: s_waitcnt lgkmcnt(0) +; HEURRC-NEXT: v_mov_b32_e32 v0, s16 +; HEURRC-NEXT: v_mov_b32_e32 v1, s17 +; HEURRC-NEXT: v_mov_b32_e32 v2, s18 +; HEURRC-NEXT: v_mov_b32_e32 v3, s19 +; HEURRC-NEXT: v_mov_b32_e32 v4, s20 +; HEURRC-NEXT: v_mov_b32_e32 v5, s21 +; HEURRC-NEXT: v_mov_b32_e32 v6, s22 +; HEURRC-NEXT: v_mov_b32_e32 v7, s23 +; HEURRC-NEXT: v_mov_b32_e32 v8, s24 +; HEURRC-NEXT: v_mov_b32_e32 v9, s25 +; HEURRC-NEXT: v_mov_b32_e32 v10, s26 +; HEURRC-NEXT: v_mov_b32_e32 v11, s27 +; HEURRC-NEXT: v_mov_b32_e32 v12, s28 +; HEURRC-NEXT: v_mov_b32_e32 v13, s29 +; HEURRC-NEXT: v_mov_b32_e32 v14, s30 +; HEURRC-NEXT: v_mov_b32_e32 v15, s31 +; HEURRC-NEXT: v_mov_b32_e32 v16, s0 +; HEURRC-NEXT: v_mov_b32_e32 v17, s1 +; HEURRC-NEXT: v_mov_b32_e32 v18, s2 +; HEURRC-NEXT: v_mov_b32_e32 v19, s3 +; HEURRC-NEXT: v_mov_b32_e32 v20, s4 +; HEURRC-NEXT: v_mov_b32_e32 v21, s5 +; HEURRC-NEXT: v_mov_b32_e32 v22, s6 +; HEURRC-NEXT: v_mov_b32_e32 v23, s7 +; HEURRC-NEXT: v_mov_b32_e32 v24, s8 +; HEURRC-NEXT: v_mov_b32_e32 v25, s9 +; HEURRC-NEXT: v_mov_b32_e32 v26, s10 +; HEURRC-NEXT: v_mov_b32_e32 v27, s11 +; HEURRC-NEXT: v_mov_b32_e32 v28, s12 +; HEURRC-NEXT: v_mov_b32_e32 v29, s13 +; HEURRC-NEXT: v_mov_b32_e32 v30, s14 +; HEURRC-NEXT: v_mov_b32_e32 v31, s15 +; HEURRC-NEXT: s_nop 1 +; HEURRC-NEXT: v_mfma_f32_32x32x1_2b_f32 v[0:31], v33, v34, v[0:31] cbsz:1 abid:2 blgp:3 +; HEURRC-NEXT: s_nop 15 +; HEURRC-NEXT: s_nop 1 +; HEURRC-NEXT: global_store_dwordx4 v32, v[24:27], s[34:35] offset:96 +; HEURRC-NEXT: global_store_dwordx4 v32, v[28:31], s[34:35] offset:112 +; HEURRC-NEXT: global_store_dwordx4 v32, v[16:19], s[34:35] offset:64 +; HEURRC-NEXT: global_store_dwordx4 v32, v[20:23], s[34:35] offset:80 +; HEURRC-NEXT: global_store_dwordx4 v32, v[8:11], s[34:35] offset:32 +; HEURRC-NEXT: global_store_dwordx4 v32, v[12:15], s[34:35] offset:48 +; HEURRC-NEXT: global_store_dwordx4 v32, v[0:3], s[34:35] +; HEURRC-NEXT: global_store_dwordx4 v32, v[4:7], s[34:35] offset:16 +; HEURRC-NEXT: s_endpgm +; +; VGPRRC-LABEL: not_enough_agprs: +; VGPRRC: ; %bb.0: ; %bb +; VGPRRC-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 +; VGPRRC-NEXT: v_mov_b32_e32 v33, 1.0 +; VGPRRC-NEXT: v_mov_b32_e32 v34, 2.0 +; VGPRRC-NEXT: v_mov_b32_e32 v32, 0 +; VGPRRC-NEXT: s_waitcnt lgkmcnt(0) +; VGPRRC-NEXT: s_load_dwordx16 s[16:31], s[34:35], 0x0 +; VGPRRC-NEXT: s_load_dwordx16 s[0:15], s[34:35], 0x40 +; VGPRRC-NEXT: s_waitcnt lgkmcnt(0) +; VGPRRC-NEXT: v_mov_b32_e32 v0, s16 +; VGPRRC-NEXT: v_mov_b32_e32 v1, s17 +; VGPRRC-NEXT: v_mov_b32_e32 v2, s18 +; VGPRRC-NEXT: v_mov_b32_e32 v3, s19 +; VGPRRC-NEXT: v_mov_b32_e32 v4, s20 +; VGPRRC-NEXT: v_mov_b32_e32 v5, s21 +; VGPRRC-NEXT: v_mov_b32_e32 v6, s22 +; VGPRRC-NEXT: v_mov_b32_e32 v7, s23 +; VGPRRC-NEXT: v_mov_b32_e32 v8, s24 +; VGPRRC-NEXT: v_mov_b32_e32 v9, s25 +; VGPRRC-NEXT: v_mov_b32_e32 v10, s26 +; VGPRRC-NEXT: v_mov_b32_e32 v11, s27 +; VGPRRC-NEXT: v_mov_b32_e32 v12, s28 +; VGPRRC-NEXT: v_mov_b32_e32 v13, s29 +; VGPRRC-NEXT: v_mov_b32_e32 v14, s30 +; VGPRRC-NEXT: v_mov_b32_e32 v15, s31 +; VGPRRC-NEXT: v_mov_b32_e32 v16, s0 +; VGPRRC-NEXT: v_mov_b32_e32 v17, s1 +; VGPRRC-NEXT: v_mov_b32_e32 v18, s2 +; VGPRRC-NEXT: v_mov_b32_e32 v19, s3 +; VGPRRC-NEXT: v_mov_b32_e32 v20, s4 +; VGPRRC-NEXT: v_mov_b32_e32 v21, s5 +; VGPRRC-NEXT: v_mov_b32_e32 v22, s6 +; VGPRRC-NEXT: v_mov_b32_e32 v23, s7 +; VGPRRC-NEXT: v_mov_b32_e32 v24, s8 +; VGPRRC-NEXT: v_mov_b32_e32 v25, s9 +; VGPRRC-NEXT: v_mov_b32_e32 v26, s10 +; VGPRRC-NEXT: v_mov_b32_e32 v27, s11 +; VGPRRC-NEXT: v_mov_b32_e32 v28, s12 +; VGPRRC-NEXT: v_mov_b32_e32 v29, s13 +; VGPRRC-NEXT: v_mov_b32_e32 v30, s14 +; VGPRRC-NEXT: v_mov_b32_e32 v31, s15 +; VGPRRC-NEXT: s_nop 1 +; VGPRRC-NEXT: v_mfma_f32_32x32x1_2b_f32 v[0:31], v33, v34, v[0:31] cbsz:1 abid:2 blgp:3 +; VGPRRC-NEXT: s_nop 15 +; VGPRRC-NEXT: s_nop 1 +; VGPRRC-NEXT: global_store_dwordx4 v32, v[24:27], s[34:35] offset:96 +; VGPRRC-NEXT: global_store_dwordx4 v32, v[28:31], s[34:35] offset:112 +; VGPRRC-NEXT: global_store_dwordx4 v32, v[16:19], s[34:35] offset:64 +; VGPRRC-NEXT: global_store_dwordx4 v32, v[20:23], s[34:35] offset:80 +; VGPRRC-NEXT: global_store_dwordx4 v32, v[8:11], s[34:35] offset:32 +; VGPRRC-NEXT: global_store_dwordx4 v32, v[12:15], s[34:35] offset:48 +; VGPRRC-NEXT: global_store_dwordx4 v32, v[0:3], s[34:35] +; VGPRRC-NEXT: global_store_dwordx4 v32, v[4:7], s[34:35] offset:16 +; VGPRRC-NEXT: s_endpgm +bb: + %in.1 = load <32 x float>, ptr addrspace(1) %arg, align 128 + %mai.1 = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.000000e+00, float 2.000000e+00, <32 x float> %in.1, i32 1, i32 2, i32 3) + store <32 x float> %mai.1, ptr addrspace(1) %arg, align 128 + ret void +} + +define <16 x float> @mfma_scale_respect_flag(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %scale0, i32 %scale1) #2 { +; HEURRC-LABEL: mfma_scale_respect_flag: +; HEURRC: ; %bb.0: +; HEURRC-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HEURRC-NEXT: scratch_load_dword a15, off, s32 +; HEURRC-NEXT: scratch_load_dword v31, off, s32 offset:8 +; HEURRC-NEXT: scratch_load_dword v32, off, s32 offset:4 +; HEURRC-NEXT: v_accvgpr_write_b32 a0, v16 +; HEURRC-NEXT: v_accvgpr_write_b32 a1, v17 +; HEURRC-NEXT: v_accvgpr_write_b32 a2, v18 +; HEURRC-NEXT: v_accvgpr_write_b32 a3, v19 +; HEURRC-NEXT: v_accvgpr_write_b32 a4, v20 +; HEURRC-NEXT: v_accvgpr_write_b32 a5, v21 +; HEURRC-NEXT: v_accvgpr_write_b32 a6, v22 +; HEURRC-NEXT: v_accvgpr_write_b32 a7, v23 +; HEURRC-NEXT: v_accvgpr_write_b32 a8, v24 +; HEURRC-NEXT: v_accvgpr_write_b32 a9, v25 +; HEURRC-NEXT: v_accvgpr_write_b32 a10, v26 +; HEURRC-NEXT: v_accvgpr_write_b32 a11, v27 +; HEURRC-NEXT: v_accvgpr_write_b32 a12, v28 +; HEURRC-NEXT: v_accvgpr_write_b32 a13, v29 +; HEURRC-NEXT: v_accvgpr_write_b32 a14, v30 +; HEURRC-NEXT: s_waitcnt vmcnt(0) +; HEURRC-NEXT: s_nop 0 +; HEURRC-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] +; HEURRC-NEXT: s_nop 15 +; HEURRC-NEXT: s_nop 3 +; HEURRC-NEXT: v_accvgpr_read_b32 v0, a0 +; HEURRC-NEXT: v_accvgpr_read_b32 v1, a1 +; HEURRC-NEXT: v_accvgpr_read_b32 v2, a2 +; HEURRC-NEXT: v_accvgpr_read_b32 v3, a3 +; HEURRC-NEXT: v_accvgpr_read_b32 v4, a4 +; HEURRC-NEXT: v_accvgpr_read_b32 v5, a5 +; HEURRC-NEXT: v_accvgpr_read_b32 v6, a6 +; HEURRC-NEXT: v_accvgpr_read_b32 v7, a7 +; HEURRC-NEXT: v_accvgpr_read_b32 v8, a8 +; HEURRC-NEXT: v_accvgpr_read_b32 v9, a9 +; HEURRC-NEXT: v_accvgpr_read_b32 v10, a10 +; HEURRC-NEXT: v_accvgpr_read_b32 v11, a11 +; HEURRC-NEXT: v_accvgpr_read_b32 v12, a12 +; HEURRC-NEXT: v_accvgpr_read_b32 v13, a13 +; HEURRC-NEXT: v_accvgpr_read_b32 v14, a14 +; HEURRC-NEXT: v_accvgpr_read_b32 v15, a15 +; HEURRC-NEXT: s_setpc_b64 s[30:31] +; +; VGPRRC-LABEL: mfma_scale_respect_flag: +; VGPRRC: ; %bb.0: +; VGPRRC-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VGPRRC-NEXT: scratch_load_dword v31, off, s32 +; VGPRRC-NEXT: scratch_load_dword v32, off, s32 offset:8 +; VGPRRC-NEXT: scratch_load_dword v33, off, s32 offset:4 +; VGPRRC-NEXT: s_waitcnt vmcnt(0) +; VGPRRC-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 v[16:31], v[0:7], v[8:15], v[16:31], v33, v32 op_sel_hi:[0,0,0] +; VGPRRC-NEXT: s_nop 15 +; VGPRRC-NEXT: s_nop 3 +; VGPRRC-NEXT: v_mov_b32_e32 v0, v16 +; VGPRRC-NEXT: v_mov_b32_e32 v1, v17 +; VGPRRC-NEXT: v_mov_b32_e32 v2, v18 +; VGPRRC-NEXT: v_mov_b32_e32 v3, v19 +; VGPRRC-NEXT: v_mov_b32_e32 v4, v20 +; VGPRRC-NEXT: v_mov_b32_e32 v5, v21 +; VGPRRC-NEXT: v_mov_b32_e32 v6, v22 +; VGPRRC-NEXT: v_mov_b32_e32 v7, v23 +; VGPRRC-NEXT: v_mov_b32_e32 v8, v24 +; VGPRRC-NEXT: v_mov_b32_e32 v9, v25 +; VGPRRC-NEXT: v_mov_b32_e32 v10, v26 +; VGPRRC-NEXT: v_mov_b32_e32 v11, v27 +; VGPRRC-NEXT: v_mov_b32_e32 v12, v28 +; VGPRRC-NEXT: v_mov_b32_e32 v13, v29 +; VGPRRC-NEXT: v_mov_b32_e32 v14, v30 +; VGPRRC-NEXT: v_mov_b32_e32 v15, v31 +; VGPRRC-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, + i32 0, ; cbsz + i32 0, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <16 x float> %result +} + attributes #0 = { "amdgpu-agpr-alloc"="32,256" } attributes #1 = { "amdgpu-agpr-alloc"="0,0" } +attributes #2 = { nounwind "amdgpu-agpr-alloc"="20" } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll index 5ab8706..22bc62a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll @@ -726,12 +726,12 @@ define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double ; GFX90A-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX90A-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1] +; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], s[6:7], s[6:7] op_sel:[0,1] ; GFX90A-VGPR-NEXT: s_nop 1 -; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[4:5], v[0:1], v[2:3], 0 +; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[0:1], v[2:3], v[4:5], 0 ; GFX90A-VGPR-NEXT: s_nop 3 -; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[0:1], v[0:1], v[2:3], v[4:5] cbsz:1 abid:2 blgp:3 +; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[0:1], v[2:3], v[4:5], v[0:1] cbsz:1 abid:2 blgp:3 ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-VGPR-NEXT: s_nop 7 ; GFX90A-VGPR-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] @@ -742,12 +742,12 @@ define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double ; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], s[6:7] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], s[6:7] ; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[4:5], v[0:1], v[2:3], 0 +; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[2:3], v[4:5], 0 ; GFX942-VGPR-NEXT: s_nop 3 -; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[4:5] cbsz:1 abid:2 neg:[1,1,0] +; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[2:3], v[4:5], v[0:1] cbsz:1 abid:2 neg:[1,1,0] ; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, 0 ; GFX942-VGPR-NEXT: s_nop 7 ; GFX942-VGPR-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] @@ -765,10 +765,10 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl ; GFX90A-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 ; GFX90A-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_mov_b32_e32 v2, s10 +; GFX90A-NEXT: v_mov_b32_e32 v0, s10 ; GFX90A-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0 -; GFX90A-NEXT: v_mov_b32_e32 v3, s11 -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[12:13], s[12:13] op_sel:[0,1] +; GFX90A-NEXT: v_mov_b32_e32 v1, s11 +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[12:13], s[12:13] op_sel:[0,1] ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_accvgpr_write_b32 a0, s0 ; GFX90A-NEXT: v_accvgpr_write_b32 a1, s1 @@ -779,7 +779,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl ; GFX90A-NEXT: v_accvgpr_write_b32 a6, s6 ; GFX90A-NEXT: v_accvgpr_write_b32 a7, s7 ; GFX90A-NEXT: s_nop 1 -; GFX90A-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[2:3], v[0:1], a[0:7] cbsz:1 abid:2 blgp:3 +; GFX90A-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7] cbsz:1 abid:2 blgp:3 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: s_nop 15 ; GFX90A-NEXT: s_nop 0 @@ -792,10 +792,10 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl ; GFX942-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 ; GFX942-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: v_mov_b32_e32 v0, s10 ; GFX942-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0 -; GFX942-NEXT: v_mov_b32_e32 v3, s11 -; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[12:13] +; GFX942-NEXT: v_mov_b32_e32 v1, s11 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[12:13] ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: v_accvgpr_write_b32 a0, s0 ; GFX942-NEXT: v_accvgpr_write_b32 a1, s1 @@ -806,7 +806,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl ; GFX942-NEXT: v_accvgpr_write_b32 a6, s6 ; GFX942-NEXT: v_accvgpr_write_b32 a7, s7 ; GFX942-NEXT: s_nop 1 -; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[2:3], v[0:1], a[0:7] cbsz:1 abid:2 neg:[1,1,0] +; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] cbsz:1 abid:2 neg:[1,1,0] ; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: s_nop 15 ; GFX942-NEXT: s_nop 0 @@ -819,17 +819,17 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl ; GFX90A-VGPR-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 ; GFX90A-VGPR-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34 ; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v10, s10 +; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, s10 ; GFX90A-VGPR-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v11, s11 -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], s[12:13], s[12:13] op_sel:[0,1] +; GFX90A-VGPR-NEXT: v_mov_b32_e32 v9, s11 +; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[12:13], s[12:13] op_sel:[0,1] ; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], s[4:5], s[4:5] op_sel:[0,1] ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], s[6:7], s[6:7] op_sel:[0,1] ; GFX90A-VGPR-NEXT: s_nop 1 -; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[10:11], v[8:9], v[0:7] cbsz:1 abid:2 blgp:3 +; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 blgp:3 ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, 0 ; GFX90A-VGPR-NEXT: s_nop 15 ; GFX90A-VGPR-NEXT: s_nop 0 @@ -842,17 +842,17 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl ; GFX942-VGPR-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 ; GFX942-VGPR-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-VGPR-NEXT: v_mov_b32_e32 v10, s10 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, s10 ; GFX942-VGPR-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v11, s11 -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[12:13] +; GFX942-VGPR-NEXT: v_mov_b32_e32 v9, s11 +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[12:13] ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], s[4:5] ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], s[6:7] ; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[10:11], v[8:9], v[0:7] cbsz:1 abid:2 neg:[1,1,0] +; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0] ; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0 ; GFX942-VGPR-NEXT: s_nop 15 ; GFX942-VGPR-NEXT: s_nop 0 @@ -1629,20 +1629,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, 0x3ff00000 ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0 ; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v12, s2 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v13, s3 +; GFX90A-VGPR-NEXT: v_mov_b32_e32 v10, s2 +; GFX90A-VGPR-NEXT: v_mov_b32_e32 v11, s3 ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v0 ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0 ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v0 ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0 ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, v0 ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1] -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1] +; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1] ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1] ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-VGPR-NEXT: s_nop 1 -; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[12:13], v[10:11], v[2:9] +; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9] ; GFX90A-VGPR-NEXT: s_nop 15 ; GFX90A-VGPR-NEXT: s_nop 1 ; GFX90A-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16 @@ -1657,20 +1657,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d ; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, 0x3ff00000 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-VGPR-NEXT: v_mov_b32_e32 v12, s2 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v13, s3 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v10, s2 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v11, s3 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v0 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v0 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, v0 ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7] ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5] ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3] ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[12:13], v[10:11], v[2:9] +; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] ; GFX942-VGPR-NEXT: s_nop 15 ; GFX942-VGPR-NEXT: s_nop 1 ; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16 @@ -1743,20 +1743,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) % ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, 0x405ec000 ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0 ; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v12, s2 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v13, s3 +; GFX90A-VGPR-NEXT: v_mov_b32_e32 v10, s2 +; GFX90A-VGPR-NEXT: v_mov_b32_e32 v11, s3 ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v1 ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0 ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v1 ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0 ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, v1 ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1] -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1] +; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1] ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1] ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-VGPR-NEXT: s_nop 1 -; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[12:13], v[10:11], v[2:9] +; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9] ; GFX90A-VGPR-NEXT: s_nop 15 ; GFX90A-VGPR-NEXT: s_nop 1 ; GFX90A-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16 @@ -1771,20 +1771,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) % ; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0x405ec000 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-VGPR-NEXT: v_mov_b32_e32 v12, s2 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v13, s3 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v10, s2 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v11, s3 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1 ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7] ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5] ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3] ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[12:13], v[10:11], v[2:9] +; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] ; GFX942-VGPR-NEXT: s_nop 15 ; GFX942-VGPR-NEXT: s_nop 1 ; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx942.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx942.ll index dc4c9291..2fb677e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx942.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx942.ll @@ -1445,20 +1445,20 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x32_f16(ptr addrspace(1) %arg, < ; GFX942-SDAG: ; %bb.0: ; %bb ; GFX942-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 ; GFX942-SDAG-NEXT: s_load_dword s6, s[4:5], 0x44 -; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 -; GFX942-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[10:11] -; GFX942-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[12:13] -; GFX942-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[14:15] -; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, s6 +; GFX942-SDAG-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GFX942-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[12:13] +; GFX942-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[14:15] +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, s6 ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-SDAG-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; GFX942-SDAG-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; GFX942-SDAG-NEXT: v_mov_b64_e32 v[8:9], s[2:3] +; GFX942-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[0:1] ; GFX942-SDAG-NEXT: s_nop 1 -; GFX942-SDAG-NEXT: v_smfmac_f32_16x16x32_f16 v[8:11], v[4:5], v[0:3], v7 cbsz:1 abid:2 +; GFX942-SDAG-NEXT: v_smfmac_f32_16x16x32_f16 v[6:9], v[10:11], v[2:5], v1 cbsz:1 abid:2 ; GFX942-SDAG-NEXT: s_nop 6 -; GFX942-SDAG-NEXT: global_store_dwordx4 v6, v[8:11], s[8:9] +; GFX942-SDAG-NEXT: global_store_dwordx4 v0, v[6:9], s[8:9] ; GFX942-SDAG-NEXT: s_endpgm ; ; GFX942-GISEL-LABEL: test_smfmac_f32_16x16x32_f16: @@ -1485,20 +1485,20 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x32_f16(ptr addrspace(1) %arg, < ; GFX950-SDAG: ; %bb.0: ; %bb ; GFX950-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 ; GFX950-SDAG-NEXT: s_load_dword s6, s[4:5], 0x44 -; GFX950-SDAG-NEXT: v_mov_b32_e32 v6, 0 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 -; GFX950-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[10:11] -; GFX950-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[12:13] -; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[14:15] -; GFX950-SDAG-NEXT: v_mov_b32_e32 v7, s6 +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[12:13] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[14:15] +; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, s6 ; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX950-SDAG-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; GFX950-SDAG-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[8:9], s[2:3] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[0:1] ; GFX950-SDAG-NEXT: s_nop 1 -; GFX950-SDAG-NEXT: v_smfmac_f32_16x16x32_f16 v[8:11], v[4:5], v[0:3], v7 cbsz:1 abid:2 +; GFX950-SDAG-NEXT: v_smfmac_f32_16x16x32_f16 v[6:9], v[10:11], v[2:5], v1 cbsz:1 abid:2 ; GFX950-SDAG-NEXT: s_nop 7 -; GFX950-SDAG-NEXT: global_store_dwordx4 v6, v[8:11], s[8:9] +; GFX950-SDAG-NEXT: global_store_dwordx4 v0, v[6:9], s[8:9] ; GFX950-SDAG-NEXT: s_endpgm ; ; GFX950-GISEL-LABEL: test_smfmac_f32_16x16x32_f16: @@ -1577,11 +1577,11 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x16_f16(ptr addrspace(1) %arg, < ; GFX942-SDAG-NEXT: s_load_dwordx8 s[16:23], s[4:5], 0x24 ; GFX942-SDAG-NEXT: s_load_dword s24, s[4:5], 0x44 ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-SDAG-NEXT: v_mov_b64_e32 v[20:21], s[18:19] +; GFX942-SDAG-NEXT: v_mov_b64_e32 v[22:23], s[18:19] ; GFX942-SDAG-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0 -; GFX942-SDAG-NEXT: v_mov_b64_e32 v[16:17], s[20:21] -; GFX942-SDAG-NEXT: v_mov_b64_e32 v[18:19], s[22:23] -; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, s24 +; GFX942-SDAG-NEXT: v_mov_b64_e32 v[18:19], s[20:21] +; GFX942-SDAG-NEXT: v_mov_b64_e32 v[20:21], s[22:23] +; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, s24 ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[2:3] @@ -1592,7 +1592,7 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x16_f16(ptr addrspace(1) %arg, < ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[12:13], s[12:13] ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[14:15], s[14:15] ; GFX942-SDAG-NEXT: s_nop 1 -; GFX942-SDAG-NEXT: v_smfmac_f32_32x32x16_f16 v[0:15], v[20:21], v[16:19], v22 cbsz:1 abid:2 +; GFX942-SDAG-NEXT: v_smfmac_f32_32x32x16_f16 v[0:15], v[22:23], v[18:21], v16 cbsz:1 abid:2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, 0 ; GFX942-SDAG-NEXT: s_nop 9 ; GFX942-SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:48 @@ -1635,11 +1635,11 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x16_f16(ptr addrspace(1) %arg, < ; GFX950-SDAG-NEXT: s_load_dwordx8 s[16:23], s[4:5], 0x24 ; GFX950-SDAG-NEXT: s_load_dword s24, s[4:5], 0x44 ; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX950-SDAG-NEXT: v_mov_b64_e32 v[20:21], s[18:19] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[22:23], s[18:19] ; GFX950-SDAG-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0 -; GFX950-SDAG-NEXT: v_mov_b64_e32 v[16:17], s[20:21] -; GFX950-SDAG-NEXT: v_mov_b64_e32 v[18:19], s[22:23] -; GFX950-SDAG-NEXT: v_mov_b32_e32 v22, s24 +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[18:19], s[20:21] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[20:21], s[22:23] +; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, s24 ; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[2:3] @@ -1650,7 +1650,7 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x16_f16(ptr addrspace(1) %arg, < ; GFX950-SDAG-NEXT: v_mov_b64_e32 v[12:13], s[12:13] ; GFX950-SDAG-NEXT: v_mov_b64_e32 v[14:15], s[14:15] ; GFX950-SDAG-NEXT: s_nop 1 -; GFX950-SDAG-NEXT: v_smfmac_f32_32x32x16_f16 v[0:15], v[20:21], v[16:19], v22 cbsz:1 abid:2 +; GFX950-SDAG-NEXT: v_smfmac_f32_32x32x16_f16 v[0:15], v[22:23], v[18:21], v16 cbsz:1 abid:2 ; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, 0 ; GFX950-SDAG-NEXT: s_nop 10 ; GFX950-SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:48 @@ -1847,20 +1847,20 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x32_bf16(ptr addrspace(1) %arg, ; GFX942-SDAG: ; %bb.0: ; %bb ; GFX942-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 ; GFX942-SDAG-NEXT: s_load_dword s6, s[4:5], 0x44 -; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 -; GFX942-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[10:11] -; GFX942-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[12:13] -; GFX942-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[14:15] -; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, s6 +; GFX942-SDAG-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GFX942-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[12:13] +; GFX942-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[14:15] +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, s6 ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-SDAG-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; GFX942-SDAG-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; GFX942-SDAG-NEXT: v_mov_b64_e32 v[8:9], s[2:3] +; GFX942-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[0:1] ; GFX942-SDAG-NEXT: s_nop 1 -; GFX942-SDAG-NEXT: v_smfmac_f32_16x16x32_bf16 v[8:11], v[4:5], v[0:3], v7 cbsz:1 abid:2 +; GFX942-SDAG-NEXT: v_smfmac_f32_16x16x32_bf16 v[6:9], v[10:11], v[2:5], v1 cbsz:1 abid:2 ; GFX942-SDAG-NEXT: s_nop 6 -; GFX942-SDAG-NEXT: global_store_dwordx4 v6, v[8:11], s[8:9] +; GFX942-SDAG-NEXT: global_store_dwordx4 v0, v[6:9], s[8:9] ; GFX942-SDAG-NEXT: s_endpgm ; ; GFX942-GISEL-LABEL: test_smfmac_f32_16x16x32_bf16: @@ -1887,20 +1887,20 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x32_bf16(ptr addrspace(1) %arg, ; GFX950-SDAG: ; %bb.0: ; %bb ; GFX950-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 ; GFX950-SDAG-NEXT: s_load_dword s6, s[4:5], 0x44 -; GFX950-SDAG-NEXT: v_mov_b32_e32 v6, 0 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 -; GFX950-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[10:11] -; GFX950-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[12:13] -; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[14:15] -; GFX950-SDAG-NEXT: v_mov_b32_e32 v7, s6 +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[12:13] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[14:15] +; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, s6 ; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX950-SDAG-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; GFX950-SDAG-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[8:9], s[2:3] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[0:1] ; GFX950-SDAG-NEXT: s_nop 1 -; GFX950-SDAG-NEXT: v_smfmac_f32_16x16x32_bf16 v[8:11], v[4:5], v[0:3], v7 cbsz:1 abid:2 +; GFX950-SDAG-NEXT: v_smfmac_f32_16x16x32_bf16 v[6:9], v[10:11], v[2:5], v1 cbsz:1 abid:2 ; GFX950-SDAG-NEXT: s_nop 7 -; GFX950-SDAG-NEXT: global_store_dwordx4 v6, v[8:11], s[8:9] +; GFX950-SDAG-NEXT: global_store_dwordx4 v0, v[6:9], s[8:9] ; GFX950-SDAG-NEXT: s_endpgm ; ; GFX950-GISEL-LABEL: test_smfmac_f32_16x16x32_bf16: @@ -1979,11 +1979,11 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x16_bf16(ptr addrspace(1) %arg, ; GFX942-SDAG-NEXT: s_load_dwordx8 s[16:23], s[4:5], 0x24 ; GFX942-SDAG-NEXT: s_load_dword s24, s[4:5], 0x44 ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-SDAG-NEXT: v_mov_b64_e32 v[20:21], s[18:19] +; GFX942-SDAG-NEXT: v_mov_b64_e32 v[22:23], s[18:19] ; GFX942-SDAG-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0 -; GFX942-SDAG-NEXT: v_mov_b64_e32 v[16:17], s[20:21] -; GFX942-SDAG-NEXT: v_mov_b64_e32 v[18:19], s[22:23] -; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, s24 +; GFX942-SDAG-NEXT: v_mov_b64_e32 v[18:19], s[20:21] +; GFX942-SDAG-NEXT: v_mov_b64_e32 v[20:21], s[22:23] +; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, s24 ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[2:3] @@ -1994,7 +1994,7 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x16_bf16(ptr addrspace(1) %arg, ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[12:13], s[12:13] ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[14:15], s[14:15] ; GFX942-SDAG-NEXT: s_nop 1 -; GFX942-SDAG-NEXT: v_smfmac_f32_32x32x16_bf16 v[0:15], v[20:21], v[16:19], v22 cbsz:1 abid:2 +; GFX942-SDAG-NEXT: v_smfmac_f32_32x32x16_bf16 v[0:15], v[22:23], v[18:21], v16 cbsz:1 abid:2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, 0 ; GFX942-SDAG-NEXT: s_nop 9 ; GFX942-SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:48 @@ -2037,11 +2037,11 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x16_bf16(ptr addrspace(1) %arg, ; GFX950-SDAG-NEXT: s_load_dwordx8 s[16:23], s[4:5], 0x24 ; GFX950-SDAG-NEXT: s_load_dword s24, s[4:5], 0x44 ; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX950-SDAG-NEXT: v_mov_b64_e32 v[20:21], s[18:19] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[22:23], s[18:19] ; GFX950-SDAG-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0 -; GFX950-SDAG-NEXT: v_mov_b64_e32 v[16:17], s[20:21] -; GFX950-SDAG-NEXT: v_mov_b64_e32 v[18:19], s[22:23] -; GFX950-SDAG-NEXT: v_mov_b32_e32 v22, s24 +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[18:19], s[20:21] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[20:21], s[22:23] +; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, s24 ; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[2:3] @@ -2052,7 +2052,7 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x16_bf16(ptr addrspace(1) %arg, ; GFX950-SDAG-NEXT: v_mov_b64_e32 v[12:13], s[12:13] ; GFX950-SDAG-NEXT: v_mov_b64_e32 v[14:15], s[14:15] ; GFX950-SDAG-NEXT: s_nop 1 -; GFX950-SDAG-NEXT: v_smfmac_f32_32x32x16_bf16 v[0:15], v[20:21], v[16:19], v22 cbsz:1 abid:2 +; GFX950-SDAG-NEXT: v_smfmac_f32_32x32x16_bf16 v[0:15], v[22:23], v[18:21], v16 cbsz:1 abid:2 ; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, 0 ; GFX950-SDAG-NEXT: s_nop 10 ; GFX950-SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:48 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll index 033a35f..13a96cf 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll @@ -15,15 +15,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16(<8 x bfloat> %arg0, <8 x ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 ; GCN-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 -; GCN-NEXT: v_mov_b64_e32 v[8:9], 48 -; GCN-NEXT: v_mov_b64_e32 v[10:11], 32 -; GCN-NEXT: v_mov_b64_e32 v[12:13], 16 +; GCN-NEXT: v_mov_b64_e32 v[0:1], 48 +; GCN-NEXT: v_mov_b64_e32 v[2:3], 32 +; GCN-NEXT: v_mov_b64_e32 v[4:5], 16 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b64_e32 v[0:1], s[24:25] -; GCN-NEXT: v_mov_b64_e32 v[2:3], s[26:27] -; GCN-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GCN-NEXT: v_mov_b64_e32 v[8:9], s[24:25] +; GCN-NEXT: v_mov_b64_e32 v[10:11], s[26:27] +; GCN-NEXT: v_mov_b64_e32 v[12:13], s[28:29] ; GCN-NEXT: v_accvgpr_write_b32 a0, s8 -; GCN-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GCN-NEXT: v_mov_b64_e32 v[14:15], s[30:31] ; GCN-NEXT: v_accvgpr_write_b32 a1, s9 ; GCN-NEXT: v_accvgpr_write_b32 a2, s10 ; GCN-NEXT: v_accvgpr_write_b32 a3, s11 @@ -41,40 +41,39 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16(<8 x bfloat> %arg0, <8 x ; GCN-NEXT: v_accvgpr_write_b32 a15, s23 ; GCN-NEXT: v_mov_b32_e32 v16, s16 ; GCN-NEXT: v_mov_b32_e32 v17, s17 -; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[16:31], v[0:3], v[4:7], a[0:15] +; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[16:31], v[8:11], v[12:15], a[0:15] ; GCN-NEXT: v_mov_b32_e32 v18, s18 ; GCN-NEXT: v_mov_b32_e32 v19, s19 -; GCN-NEXT: v_mov_b32_e32 v0, s20 -; GCN-NEXT: v_mov_b32_e32 v1, s21 -; GCN-NEXT: v_mov_b32_e32 v2, s22 -; GCN-NEXT: v_mov_b32_e32 v3, s23 -; GCN-NEXT: v_mov_b64_e32 v[14:15], 0 +; GCN-NEXT: v_mov_b32_e32 v8, s20 +; GCN-NEXT: v_mov_b32_e32 v9, s21 +; GCN-NEXT: v_mov_b32_e32 v10, s22 +; GCN-NEXT: v_mov_b32_e32 v11, s23 +; GCN-NEXT: v_mov_b64_e32 v[6:7], 0 ; GCN-NEXT: s_nop 4 -; GCN-NEXT: global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v[0:1], a[28:31], off sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v[2:3], a[24:27], off sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v[12:13], a[20:23], off sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v[4:5], a[20:23], off sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v[14:15], a[16:19], off sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v[6:7], a[16:19], off sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v[10:11], v[16:19], off sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v[2:3], v[16:19], off sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v[0:1], v[8:11], off sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mov_b32_e32 v0, s8 ; GCN-NEXT: v_mov_b32_e32 v1, s9 ; GCN-NEXT: v_mov_b32_e32 v2, s10 ; GCN-NEXT: v_mov_b32_e32 v3, s11 -; GCN-NEXT: global_store_dwordx4 v[14:15], v[0:3], off sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mov_b32_e32 v0, s12 ; GCN-NEXT: v_mov_b32_e32 v1, s13 ; GCN-NEXT: v_mov_b32_e32 v2, s14 ; GCN-NEXT: v_mov_b32_e32 v3, s15 -; GCN-NEXT: global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_endpgm %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0) @@ -88,15 +87,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__flags(<8 x bfloat> %arg0 ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 ; GCN-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 -; GCN-NEXT: v_mov_b64_e32 v[8:9], 48 -; GCN-NEXT: v_mov_b64_e32 v[10:11], 32 -; GCN-NEXT: v_mov_b64_e32 v[12:13], 16 +; GCN-NEXT: v_mov_b64_e32 v[0:1], 48 +; GCN-NEXT: v_mov_b64_e32 v[2:3], 32 +; GCN-NEXT: v_mov_b64_e32 v[4:5], 16 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b64_e32 v[0:1], s[24:25] -; GCN-NEXT: v_mov_b64_e32 v[2:3], s[26:27] -; GCN-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GCN-NEXT: v_mov_b64_e32 v[8:9], s[24:25] +; GCN-NEXT: v_mov_b64_e32 v[10:11], s[26:27] +; GCN-NEXT: v_mov_b64_e32 v[12:13], s[28:29] ; GCN-NEXT: v_accvgpr_write_b32 a0, s8 -; GCN-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GCN-NEXT: v_mov_b64_e32 v[14:15], s[30:31] ; GCN-NEXT: v_accvgpr_write_b32 a1, s9 ; GCN-NEXT: v_accvgpr_write_b32 a2, s10 ; GCN-NEXT: v_accvgpr_write_b32 a3, s11 @@ -114,40 +113,39 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__flags(<8 x bfloat> %arg0 ; GCN-NEXT: v_accvgpr_write_b32 a15, s23 ; GCN-NEXT: v_mov_b32_e32 v16, s16 ; GCN-NEXT: v_mov_b32_e32 v17, s17 -; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:2 abid:3 blgp:1 +; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[16:31], v[8:11], v[12:15], a[0:15] cbsz:2 abid:3 blgp:1 ; GCN-NEXT: v_mov_b32_e32 v18, s18 ; GCN-NEXT: v_mov_b32_e32 v19, s19 -; GCN-NEXT: v_mov_b32_e32 v0, s20 -; GCN-NEXT: v_mov_b32_e32 v1, s21 -; GCN-NEXT: v_mov_b32_e32 v2, s22 -; GCN-NEXT: v_mov_b32_e32 v3, s23 -; GCN-NEXT: v_mov_b64_e32 v[14:15], 0 +; GCN-NEXT: v_mov_b32_e32 v8, s20 +; GCN-NEXT: v_mov_b32_e32 v9, s21 +; GCN-NEXT: v_mov_b32_e32 v10, s22 +; GCN-NEXT: v_mov_b32_e32 v11, s23 +; GCN-NEXT: v_mov_b64_e32 v[6:7], 0 ; GCN-NEXT: s_nop 4 -; GCN-NEXT: global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v[0:1], a[28:31], off sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v[2:3], a[24:27], off sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v[12:13], a[20:23], off sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v[4:5], a[20:23], off sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v[14:15], a[16:19], off sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v[6:7], a[16:19], off sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v[10:11], v[16:19], off sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v[2:3], v[16:19], off sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v[0:1], v[8:11], off sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mov_b32_e32 v0, s8 ; GCN-NEXT: v_mov_b32_e32 v1, s9 ; GCN-NEXT: v_mov_b32_e32 v2, s10 ; GCN-NEXT: v_mov_b32_e32 v3, s11 -; GCN-NEXT: global_store_dwordx4 v[14:15], v[0:3], off sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mov_b32_e32 v0, s12 ; GCN-NEXT: v_mov_b32_e32 v1, s13 ; GCN-NEXT: v_mov_b32_e32 v2, s14 ; GCN-NEXT: v_mov_b32_e32 v3, s15 -; GCN-NEXT: global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_endpgm %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <16 x float> %arg2, i32 2, i32 3, i32 1) @@ -250,13 +248,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd(<8 x bfloat> %arg ; GCN-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 ; GCN-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 -; GCN-NEXT: v_mov_b32_e32 v44, 0 +; GCN-NEXT: v_mov_b32_e32 v36, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b64_e32 v[34:35], s[26:27] -; GCN-NEXT: v_mov_b64_e32 v[32:33], s[24:25] -; GCN-NEXT: v_mov_b64_e32 v[38:39], s[30:31] +; GCN-NEXT: v_mov_b64_e32 v[40:41], s[26:27] +; GCN-NEXT: v_mov_b64_e32 v[38:39], s[24:25] +; GCN-NEXT: v_mov_b64_e32 v[44:45], s[30:31] ; GCN-NEXT: v_mov_b64_e32 v[30:31], s[22:23] -; GCN-NEXT: v_mov_b64_e32 v[36:37], s[28:29] +; GCN-NEXT: v_mov_b64_e32 v[42:43], s[28:29] ; GCN-NEXT: v_mov_b64_e32 v[28:29], s[20:21] ; GCN-NEXT: v_mov_b64_e32 v[26:27], s[18:19] ; GCN-NEXT: v_mov_b64_e32 v[24:25], s[16:17] @@ -264,41 +262,41 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd(<8 x bfloat> %arg ; GCN-NEXT: v_mov_b64_e32 v[20:21], s[12:13] ; GCN-NEXT: v_mov_b64_e32 v[18:19], s[10:11] ; GCN-NEXT: v_mov_b64_e32 v[16:17], s[8:9] -; GCN-NEXT: v_mov_b32_e32 v40, s20 -; GCN-NEXT: v_mov_b32_e32 v41, s21 -; GCN-NEXT: v_mfma_f32_32x32x16_bf16 v[0:15], v[32:35], v[36:39], v[16:31] -; GCN-NEXT: v_mov_b32_e32 v42, s22 -; GCN-NEXT: v_mov_b32_e32 v43, s23 -; GCN-NEXT: global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1 +; GCN-NEXT: v_mov_b32_e32 v32, s20 +; GCN-NEXT: v_mov_b32_e32 v33, s21 +; GCN-NEXT: v_mfma_f32_32x32x16_bf16 v[0:15], v[38:41], v[42:45], v[16:31] +; GCN-NEXT: v_mov_b32_e32 v34, s22 +; GCN-NEXT: v_mov_b32_e32 v35, s23 +; GCN-NEXT: global_store_dwordx4 v36, v[32:35], s[0:1] offset:48 sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_mov_b32_e32 v16, s16 ; GCN-NEXT: v_mov_b32_e32 v17, s17 ; GCN-NEXT: v_mov_b32_e32 v18, s18 ; GCN-NEXT: v_mov_b32_e32 v19, s19 -; GCN-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] offset:32 sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mov_b32_e32 v16, s12 ; GCN-NEXT: v_mov_b32_e32 v17, s13 ; GCN-NEXT: v_mov_b32_e32 v18, s14 ; GCN-NEXT: v_mov_b32_e32 v19, s15 -; GCN-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] offset:16 sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mov_b32_e32 v16, s8 ; GCN-NEXT: v_mov_b32_e32 v17, s9 ; GCN-NEXT: v_mov_b32_e32 v18, s10 ; GCN-NEXT: v_mov_b32_e32 v19, s11 -; GCN-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v36, v[8:11], s[0:1] offset:32 sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v36, v[12:15], s[0:1] offset:48 sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v36, v[0:3], s[0:1] sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v36, v[4:7], s[0:1] offset:16 sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_endpgm %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0) @@ -313,13 +311,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd__flags(<8 x bfloa ; GCN-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 ; GCN-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 -; GCN-NEXT: v_mov_b32_e32 v44, 0 +; GCN-NEXT: v_mov_b32_e32 v36, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b64_e32 v[34:35], s[26:27] -; GCN-NEXT: v_mov_b64_e32 v[32:33], s[24:25] -; GCN-NEXT: v_mov_b64_e32 v[38:39], s[30:31] +; GCN-NEXT: v_mov_b64_e32 v[40:41], s[26:27] +; GCN-NEXT: v_mov_b64_e32 v[38:39], s[24:25] +; GCN-NEXT: v_mov_b64_e32 v[44:45], s[30:31] ; GCN-NEXT: v_mov_b64_e32 v[30:31], s[22:23] -; GCN-NEXT: v_mov_b64_e32 v[36:37], s[28:29] +; GCN-NEXT: v_mov_b64_e32 v[42:43], s[28:29] ; GCN-NEXT: v_mov_b64_e32 v[28:29], s[20:21] ; GCN-NEXT: v_mov_b64_e32 v[26:27], s[18:19] ; GCN-NEXT: v_mov_b64_e32 v[24:25], s[16:17] @@ -327,41 +325,41 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd__flags(<8 x bfloa ; GCN-NEXT: v_mov_b64_e32 v[20:21], s[12:13] ; GCN-NEXT: v_mov_b64_e32 v[18:19], s[10:11] ; GCN-NEXT: v_mov_b64_e32 v[16:17], s[8:9] -; GCN-NEXT: v_mov_b32_e32 v40, s20 -; GCN-NEXT: v_mov_b32_e32 v41, s21 -; GCN-NEXT: v_mfma_f32_32x32x16_bf16 v[0:15], v[32:35], v[36:39], v[16:31] cbsz:1 abid:2 blgp:3 -; GCN-NEXT: v_mov_b32_e32 v42, s22 -; GCN-NEXT: v_mov_b32_e32 v43, s23 -; GCN-NEXT: global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1 +; GCN-NEXT: v_mov_b32_e32 v32, s20 +; GCN-NEXT: v_mov_b32_e32 v33, s21 +; GCN-NEXT: v_mfma_f32_32x32x16_bf16 v[0:15], v[38:41], v[42:45], v[16:31] cbsz:1 abid:2 blgp:3 +; GCN-NEXT: v_mov_b32_e32 v34, s22 +; GCN-NEXT: v_mov_b32_e32 v35, s23 +; GCN-NEXT: global_store_dwordx4 v36, v[32:35], s[0:1] offset:48 sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_mov_b32_e32 v16, s16 ; GCN-NEXT: v_mov_b32_e32 v17, s17 ; GCN-NEXT: v_mov_b32_e32 v18, s18 ; GCN-NEXT: v_mov_b32_e32 v19, s19 -; GCN-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] offset:32 sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mov_b32_e32 v16, s12 ; GCN-NEXT: v_mov_b32_e32 v17, s13 ; GCN-NEXT: v_mov_b32_e32 v18, s14 ; GCN-NEXT: v_mov_b32_e32 v19, s15 -; GCN-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] offset:16 sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mov_b32_e32 v16, s8 ; GCN-NEXT: v_mov_b32_e32 v17, s9 ; GCN-NEXT: v_mov_b32_e32 v18, s10 ; GCN-NEXT: v_mov_b32_e32 v19, s11 -; GCN-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v36, v[8:11], s[0:1] offset:32 sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v36, v[12:15], s[0:1] offset:48 sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v36, v[0:3], s[0:1] sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1 +; GCN-NEXT: global_store_dwordx4 v36, v[4:7], s[0:1] offset:16 sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_endpgm %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <16 x float> %arg2, i32 1, i32 2, i32 3) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll index 7532062..ab0000f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll @@ -141,18 +141,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd(ptr addrsp ; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 ; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 -; SDAG-NEXT: v_mov_b32_e32 v12, 0 +; SDAG-NEXT: v_mov_b32_e32 v4, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[8:9] +; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[10:11] +; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[12:13] +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[14:15] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_f32_16x16x32_f16 v[0:3], v[0:3], v[4:7], v[8:11] +; SDAG-NEXT: v_mfma_f32_16x16x32_f16 v[0:3], v[6:9], v[10:13], v[0:3] ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7] +; SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd: @@ -179,18 +179,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd(ptr addrsp ; HEURRC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 ; HEURRC-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 ; HEURRC-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 -; HEURRC-NEXT: v_mov_b32_e32 v12, 0 +; HEURRC-NEXT: v_mov_b32_e32 v4, 0 ; HEURRC-NEXT: s_waitcnt lgkmcnt(0) -; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; HEURRC-NEXT: v_mov_b64_e32 v[4:5], s[12:13] -; HEURRC-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; HEURRC-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[8:9] +; HEURRC-NEXT: v_mov_b64_e32 v[8:9], s[10:11] +; HEURRC-NEXT: v_mov_b64_e32 v[10:11], s[12:13] +; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; HEURRC-NEXT: v_mov_b64_e32 v[12:13], s[14:15] +; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; HEURRC-NEXT: s_nop 1 -; HEURRC-NEXT: v_mfma_f32_16x16x32_f16 v[0:3], v[0:3], v[4:7], v[8:11] +; HEURRC-NEXT: v_mfma_f32_16x16x32_f16 v[0:3], v[6:9], v[10:13], v[0:3] ; HEURRC-NEXT: s_nop 7 -; HEURRC-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7] +; HEURRC-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; HEURRC-NEXT: s_endpgm ; ; VGPRRC-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd: @@ -198,18 +198,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd(ptr addrsp ; VGPRRC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 ; VGPRRC-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 ; VGPRRC-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 -; VGPRRC-NEXT: v_mov_b32_e32 v12, 0 +; VGPRRC-NEXT: v_mov_b32_e32 v4, 0 ; VGPRRC-NEXT: s_waitcnt lgkmcnt(0) -; VGPRRC-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; VGPRRC-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; VGPRRC-NEXT: v_mov_b64_e32 v[4:5], s[12:13] -; VGPRRC-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; VGPRRC-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; VGPRRC-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; VGPRRC-NEXT: v_mov_b64_e32 v[6:7], s[8:9] +; VGPRRC-NEXT: v_mov_b64_e32 v[8:9], s[10:11] +; VGPRRC-NEXT: v_mov_b64_e32 v[10:11], s[12:13] +; VGPRRC-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; VGPRRC-NEXT: v_mov_b64_e32 v[12:13], s[14:15] +; VGPRRC-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; VGPRRC-NEXT: s_nop 1 -; VGPRRC-NEXT: v_mfma_f32_16x16x32_f16 v[0:3], v[0:3], v[4:7], v[8:11] +; VGPRRC-NEXT: v_mfma_f32_16x16x32_f16 v[0:3], v[6:9], v[10:13], v[0:3] ; VGPRRC-NEXT: s_nop 7 -; VGPRRC-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7] +; VGPRRC-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; VGPRRC-NEXT: s_endpgm ; AGPR-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd: ; AGPR: ; %bb.0: @@ -260,18 +260,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags(ptr ; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 ; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 -; SDAG-NEXT: v_mov_b32_e32 v12, 0 +; SDAG-NEXT: v_mov_b32_e32 v4, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[8:9] +; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[10:11] +; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[12:13] +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[14:15] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_f32_16x16x32_f16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1 +; SDAG-NEXT: v_mfma_f32_16x16x32_f16 v[0:3], v[6:9], v[10:13], v[0:3] cbsz:3 abid:2 blgp:1 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7] +; SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags: @@ -298,18 +298,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags(ptr ; HEURRC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 ; HEURRC-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 ; HEURRC-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 -; HEURRC-NEXT: v_mov_b32_e32 v12, 0 +; HEURRC-NEXT: v_mov_b32_e32 v4, 0 ; HEURRC-NEXT: s_waitcnt lgkmcnt(0) -; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; HEURRC-NEXT: v_mov_b64_e32 v[4:5], s[12:13] -; HEURRC-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; HEURRC-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[8:9] +; HEURRC-NEXT: v_mov_b64_e32 v[8:9], s[10:11] +; HEURRC-NEXT: v_mov_b64_e32 v[10:11], s[12:13] +; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; HEURRC-NEXT: v_mov_b64_e32 v[12:13], s[14:15] +; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; HEURRC-NEXT: s_nop 1 -; HEURRC-NEXT: v_mfma_f32_16x16x32_f16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1 +; HEURRC-NEXT: v_mfma_f32_16x16x32_f16 v[0:3], v[6:9], v[10:13], v[0:3] cbsz:3 abid:2 blgp:1 ; HEURRC-NEXT: s_nop 7 -; HEURRC-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7] +; HEURRC-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; HEURRC-NEXT: s_endpgm ; ; VGPRRC-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags: @@ -317,18 +317,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags(ptr ; VGPRRC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 ; VGPRRC-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 ; VGPRRC-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 -; VGPRRC-NEXT: v_mov_b32_e32 v12, 0 +; VGPRRC-NEXT: v_mov_b32_e32 v4, 0 ; VGPRRC-NEXT: s_waitcnt lgkmcnt(0) -; VGPRRC-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; VGPRRC-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; VGPRRC-NEXT: v_mov_b64_e32 v[4:5], s[12:13] -; VGPRRC-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; VGPRRC-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; VGPRRC-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; VGPRRC-NEXT: v_mov_b64_e32 v[6:7], s[8:9] +; VGPRRC-NEXT: v_mov_b64_e32 v[8:9], s[10:11] +; VGPRRC-NEXT: v_mov_b64_e32 v[10:11], s[12:13] +; VGPRRC-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; VGPRRC-NEXT: v_mov_b64_e32 v[12:13], s[14:15] +; VGPRRC-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; VGPRRC-NEXT: s_nop 1 -; VGPRRC-NEXT: v_mfma_f32_16x16x32_f16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1 +; VGPRRC-NEXT: v_mfma_f32_16x16x32_f16 v[0:3], v[6:9], v[10:13], v[0:3] cbsz:3 abid:2 blgp:1 ; VGPRRC-NEXT: s_nop 7 -; VGPRRC-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7] +; VGPRRC-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; VGPRRC-NEXT: s_endpgm ; AGPR-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags: ; AGPR: ; %bb.0: @@ -382,15 +382,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x hal ; SDAG: ; %bb.0: ; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 ; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 -; SDAG-NEXT: v_mov_b64_e32 v[8:9], 48 -; SDAG-NEXT: v_mov_b64_e32 v[10:11], 32 -; SDAG-NEXT: v_mov_b64_e32 v[12:13], 16 +; SDAG-NEXT: v_mov_b64_e32 v[0:1], 48 +; SDAG-NEXT: v_mov_b64_e32 v[2:3], 32 +; SDAG-NEXT: v_mov_b64_e32 v[4:5], 16 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[24:25] -; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[26:27] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[24:25] +; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[26:27] +; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[28:29] ; SDAG-NEXT: v_accvgpr_write_b32 a0, s8 -; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[30:31] ; SDAG-NEXT: v_accvgpr_write_b32 a1, s9 ; SDAG-NEXT: v_accvgpr_write_b32 a2, s10 ; SDAG-NEXT: v_accvgpr_write_b32 a3, s11 @@ -408,40 +408,39 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x hal ; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 ; SDAG-NEXT: v_mov_b32_e32 v16, s16 ; SDAG-NEXT: v_mov_b32_e32 v17, s17 -; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15] +; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[8:11], v[12:15], a[0:15] ; SDAG-NEXT: v_mov_b32_e32 v18, s18 ; SDAG-NEXT: v_mov_b32_e32 v19, s19 -; SDAG-NEXT: v_mov_b32_e32 v0, s20 -; SDAG-NEXT: v_mov_b32_e32 v1, s21 -; SDAG-NEXT: v_mov_b32_e32 v2, s22 -; SDAG-NEXT: v_mov_b32_e32 v3, s23 -; SDAG-NEXT: v_mov_b64_e32 v[14:15], 0 +; SDAG-NEXT: v_mov_b32_e32 v8, s20 +; SDAG-NEXT: v_mov_b32_e32 v9, s21 +; SDAG-NEXT: v_mov_b32_e32 v10, s22 +; SDAG-NEXT: v_mov_b32_e32 v11, s23 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], 0 ; SDAG-NEXT: s_nop 4 -; SDAG-NEXT: global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[0:1], a[28:31], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[2:3], a[24:27], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[12:13], a[20:23], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[4:5], a[20:23], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[14:15], a[16:19], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[6:7], a[16:19], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[10:11], v[16:19], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[0:1], v[8:11], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mov_b32_e32 v0, s8 ; SDAG-NEXT: v_mov_b32_e32 v1, s9 ; SDAG-NEXT: v_mov_b32_e32 v2, s10 ; SDAG-NEXT: v_mov_b32_e32 v3, s11 -; SDAG-NEXT: global_store_dwordx4 v[14:15], v[0:3], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mov_b32_e32 v0, s12 ; SDAG-NEXT: v_mov_b32_e32 v1, s13 ; SDAG-NEXT: v_mov_b32_e32 v2, s14 ; SDAG-NEXT: v_mov_b32_e32 v3, s15 -; SDAG-NEXT: global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_endpgm ; @@ -508,15 +507,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x hal ; HEURRC: ; %bb.0: ; HEURRC-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 ; HEURRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 -; HEURRC-NEXT: v_mov_b64_e32 v[8:9], 48 -; HEURRC-NEXT: v_mov_b64_e32 v[10:11], 32 -; HEURRC-NEXT: v_mov_b64_e32 v[12:13], 16 +; HEURRC-NEXT: v_mov_b64_e32 v[0:1], 48 +; HEURRC-NEXT: v_mov_b64_e32 v[2:3], 32 +; HEURRC-NEXT: v_mov_b64_e32 v[4:5], 16 ; HEURRC-NEXT: s_waitcnt lgkmcnt(0) -; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[24:25] -; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[26:27] -; HEURRC-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; HEURRC-NEXT: v_mov_b64_e32 v[8:9], s[24:25] +; HEURRC-NEXT: v_mov_b64_e32 v[10:11], s[26:27] +; HEURRC-NEXT: v_mov_b64_e32 v[12:13], s[28:29] ; HEURRC-NEXT: v_accvgpr_write_b32 a0, s8 -; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; HEURRC-NEXT: v_mov_b64_e32 v[14:15], s[30:31] ; HEURRC-NEXT: v_accvgpr_write_b32 a1, s9 ; HEURRC-NEXT: v_accvgpr_write_b32 a2, s10 ; HEURRC-NEXT: v_accvgpr_write_b32 a3, s11 @@ -534,40 +533,39 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x hal ; HEURRC-NEXT: v_accvgpr_write_b32 a15, s23 ; HEURRC-NEXT: v_mov_b32_e32 v16, s16 ; HEURRC-NEXT: v_mov_b32_e32 v17, s17 -; HEURRC-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15] +; HEURRC-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[8:11], v[12:15], a[0:15] ; HEURRC-NEXT: v_mov_b32_e32 v18, s18 ; HEURRC-NEXT: v_mov_b32_e32 v19, s19 -; HEURRC-NEXT: v_mov_b32_e32 v0, s20 -; HEURRC-NEXT: v_mov_b32_e32 v1, s21 -; HEURRC-NEXT: v_mov_b32_e32 v2, s22 -; HEURRC-NEXT: v_mov_b32_e32 v3, s23 -; HEURRC-NEXT: v_mov_b64_e32 v[14:15], 0 +; HEURRC-NEXT: v_mov_b32_e32 v8, s20 +; HEURRC-NEXT: v_mov_b32_e32 v9, s21 +; HEURRC-NEXT: v_mov_b32_e32 v10, s22 +; HEURRC-NEXT: v_mov_b32_e32 v11, s23 +; HEURRC-NEXT: v_mov_b64_e32 v[6:7], 0 ; HEURRC-NEXT: s_nop 4 -; HEURRC-NEXT: global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v[0:1], a[28:31], off sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v[2:3], a[24:27], off sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: global_store_dwordx4 v[12:13], a[20:23], off sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v[4:5], a[20:23], off sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: global_store_dwordx4 v[14:15], a[16:19], off sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v[6:7], a[16:19], off sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: global_store_dwordx4 v[10:11], v[16:19], off sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v[2:3], v[16:19], off sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v[0:1], v[8:11], off sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: s_nop 0 ; HEURRC-NEXT: v_mov_b32_e32 v0, s8 ; HEURRC-NEXT: v_mov_b32_e32 v1, s9 ; HEURRC-NEXT: v_mov_b32_e32 v2, s10 ; HEURRC-NEXT: v_mov_b32_e32 v3, s11 -; HEURRC-NEXT: global_store_dwordx4 v[14:15], v[0:3], off sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) ; HEURRC-NEXT: s_nop 0 ; HEURRC-NEXT: v_mov_b32_e32 v0, s12 ; HEURRC-NEXT: v_mov_b32_e32 v1, s13 ; HEURRC-NEXT: v_mov_b32_e32 v2, s14 ; HEURRC-NEXT: v_mov_b32_e32 v3, s15 -; HEURRC-NEXT: global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) ; HEURRC-NEXT: s_endpgm ; @@ -575,15 +573,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x hal ; VGPRRC: ; %bb.0: ; VGPRRC-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 ; VGPRRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 -; VGPRRC-NEXT: v_mov_b64_e32 v[40:41], 48 -; VGPRRC-NEXT: v_mov_b64_e32 v[42:43], 32 -; VGPRRC-NEXT: v_mov_b64_e32 v[44:45], 16 +; VGPRRC-NEXT: v_mov_b64_e32 v[32:33], 48 +; VGPRRC-NEXT: v_mov_b64_e32 v[34:35], 32 +; VGPRRC-NEXT: v_mov_b64_e32 v[36:37], 16 ; VGPRRC-NEXT: s_waitcnt lgkmcnt(0) -; VGPRRC-NEXT: v_mov_b64_e32 v[34:35], s[26:27] -; VGPRRC-NEXT: v_mov_b64_e32 v[32:33], s[24:25] -; VGPRRC-NEXT: v_mov_b64_e32 v[38:39], s[30:31] +; VGPRRC-NEXT: v_mov_b64_e32 v[42:43], s[26:27] +; VGPRRC-NEXT: v_mov_b64_e32 v[40:41], s[24:25] +; VGPRRC-NEXT: v_mov_b64_e32 v[46:47], s[30:31] ; VGPRRC-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; VGPRRC-NEXT: v_mov_b64_e32 v[36:37], s[28:29] +; VGPRRC-NEXT: v_mov_b64_e32 v[44:45], s[28:29] ; VGPRRC-NEXT: v_mov_b64_e32 v[2:3], s[10:11] ; VGPRRC-NEXT: v_mov_b64_e32 v[4:5], s[12:13] ; VGPRRC-NEXT: v_mov_b64_e32 v[6:7], s[14:15] @@ -593,40 +591,40 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x hal ; VGPRRC-NEXT: v_mov_b64_e32 v[14:15], s[22:23] ; VGPRRC-NEXT: v_mov_b32_e32 v48, s16 ; VGPRRC-NEXT: v_mov_b32_e32 v49, s17 -; VGPRRC-NEXT: v_mfma_f32_32x32x16_f16 v[16:31], v[32:35], v[36:39], v[0:15] +; VGPRRC-NEXT: v_mfma_f32_32x32x16_f16 v[16:31], v[40:43], v[44:47], v[0:15] ; VGPRRC-NEXT: v_mov_b32_e32 v50, s18 ; VGPRRC-NEXT: v_mov_b32_e32 v51, s19 -; VGPRRC-NEXT: v_mov_b64_e32 v[46:47], 0 +; VGPRRC-NEXT: v_mov_b64_e32 v[38:39], 0 ; VGPRRC-NEXT: s_nop 8 -; VGPRRC-NEXT: global_store_dwordx4 v[40:41], v[28:31], off sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v[32:33], v[28:31], off sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) -; VGPRRC-NEXT: global_store_dwordx4 v[42:43], v[24:27], off sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v[34:35], v[24:27], off sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) -; VGPRRC-NEXT: global_store_dwordx4 v[44:45], v[20:23], off sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v[36:37], v[20:23], off sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) -; VGPRRC-NEXT: global_store_dwordx4 v[46:47], v[16:19], off sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v[38:39], v[16:19], off sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) ; VGPRRC-NEXT: v_mov_b32_e32 v0, s20 ; VGPRRC-NEXT: v_mov_b32_e32 v1, s21 ; VGPRRC-NEXT: v_mov_b32_e32 v2, s22 ; VGPRRC-NEXT: v_mov_b32_e32 v3, s23 -; VGPRRC-NEXT: global_store_dwordx4 v[42:43], v[48:51], off sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v[34:35], v[48:51], off sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) -; VGPRRC-NEXT: global_store_dwordx4 v[40:41], v[0:3], off sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v[32:33], v[0:3], off sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) ; VGPRRC-NEXT: s_nop 0 ; VGPRRC-NEXT: v_mov_b32_e32 v0, s8 ; VGPRRC-NEXT: v_mov_b32_e32 v1, s9 ; VGPRRC-NEXT: v_mov_b32_e32 v2, s10 ; VGPRRC-NEXT: v_mov_b32_e32 v3, s11 -; VGPRRC-NEXT: global_store_dwordx4 v[46:47], v[0:3], off sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v[38:39], v[0:3], off sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) ; VGPRRC-NEXT: s_nop 0 ; VGPRRC-NEXT: v_mov_b32_e32 v0, s12 ; VGPRRC-NEXT: v_mov_b32_e32 v1, s13 ; VGPRRC-NEXT: v_mov_b32_e32 v2, s14 ; VGPRRC-NEXT: v_mov_b32_e32 v3, s15 -; VGPRRC-NEXT: global_store_dwordx4 v[44:45], v[0:3], off sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v[36:37], v[0:3], off sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) ; VGPRRC-NEXT: s_endpgm ; AGPR-LABEL: test_mfma_f32_32x32x16_f16: @@ -765,15 +763,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, < ; SDAG: ; %bb.0: ; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 ; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 -; SDAG-NEXT: v_mov_b64_e32 v[8:9], 48 -; SDAG-NEXT: v_mov_b64_e32 v[10:11], 32 -; SDAG-NEXT: v_mov_b64_e32 v[12:13], 16 +; SDAG-NEXT: v_mov_b64_e32 v[0:1], 48 +; SDAG-NEXT: v_mov_b64_e32 v[2:3], 32 +; SDAG-NEXT: v_mov_b64_e32 v[4:5], 16 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[24:25] -; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[26:27] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[24:25] +; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[26:27] +; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[28:29] ; SDAG-NEXT: v_accvgpr_write_b32 a0, s8 -; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[30:31] ; SDAG-NEXT: v_accvgpr_write_b32 a1, s9 ; SDAG-NEXT: v_accvgpr_write_b32 a2, s10 ; SDAG-NEXT: v_accvgpr_write_b32 a3, s11 @@ -791,40 +789,39 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, < ; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 ; SDAG-NEXT: v_mov_b32_e32 v16, s16 ; SDAG-NEXT: v_mov_b32_e32 v17, s17 -; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:2 abid:3 blgp:1 +; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[8:11], v[12:15], a[0:15] cbsz:2 abid:3 blgp:1 ; SDAG-NEXT: v_mov_b32_e32 v18, s18 ; SDAG-NEXT: v_mov_b32_e32 v19, s19 -; SDAG-NEXT: v_mov_b32_e32 v0, s20 -; SDAG-NEXT: v_mov_b32_e32 v1, s21 -; SDAG-NEXT: v_mov_b32_e32 v2, s22 -; SDAG-NEXT: v_mov_b32_e32 v3, s23 -; SDAG-NEXT: v_mov_b64_e32 v[14:15], 0 +; SDAG-NEXT: v_mov_b32_e32 v8, s20 +; SDAG-NEXT: v_mov_b32_e32 v9, s21 +; SDAG-NEXT: v_mov_b32_e32 v10, s22 +; SDAG-NEXT: v_mov_b32_e32 v11, s23 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], 0 ; SDAG-NEXT: s_nop 4 -; SDAG-NEXT: global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[0:1], a[28:31], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[2:3], a[24:27], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[12:13], a[20:23], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[4:5], a[20:23], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[14:15], a[16:19], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[6:7], a[16:19], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[10:11], v[16:19], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[0:1], v[8:11], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mov_b32_e32 v0, s8 ; SDAG-NEXT: v_mov_b32_e32 v1, s9 ; SDAG-NEXT: v_mov_b32_e32 v2, s10 ; SDAG-NEXT: v_mov_b32_e32 v3, s11 -; SDAG-NEXT: global_store_dwordx4 v[14:15], v[0:3], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mov_b32_e32 v0, s12 ; SDAG-NEXT: v_mov_b32_e32 v1, s13 ; SDAG-NEXT: v_mov_b32_e32 v2, s14 ; SDAG-NEXT: v_mov_b32_e32 v3, s15 -; SDAG-NEXT: global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_endpgm ; @@ -891,15 +888,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, < ; HEURRC: ; %bb.0: ; HEURRC-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 ; HEURRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 -; HEURRC-NEXT: v_mov_b64_e32 v[8:9], 48 -; HEURRC-NEXT: v_mov_b64_e32 v[10:11], 32 -; HEURRC-NEXT: v_mov_b64_e32 v[12:13], 16 +; HEURRC-NEXT: v_mov_b64_e32 v[0:1], 48 +; HEURRC-NEXT: v_mov_b64_e32 v[2:3], 32 +; HEURRC-NEXT: v_mov_b64_e32 v[4:5], 16 ; HEURRC-NEXT: s_waitcnt lgkmcnt(0) -; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[24:25] -; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[26:27] -; HEURRC-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; HEURRC-NEXT: v_mov_b64_e32 v[8:9], s[24:25] +; HEURRC-NEXT: v_mov_b64_e32 v[10:11], s[26:27] +; HEURRC-NEXT: v_mov_b64_e32 v[12:13], s[28:29] ; HEURRC-NEXT: v_accvgpr_write_b32 a0, s8 -; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; HEURRC-NEXT: v_mov_b64_e32 v[14:15], s[30:31] ; HEURRC-NEXT: v_accvgpr_write_b32 a1, s9 ; HEURRC-NEXT: v_accvgpr_write_b32 a2, s10 ; HEURRC-NEXT: v_accvgpr_write_b32 a3, s11 @@ -917,40 +914,39 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, < ; HEURRC-NEXT: v_accvgpr_write_b32 a15, s23 ; HEURRC-NEXT: v_mov_b32_e32 v16, s16 ; HEURRC-NEXT: v_mov_b32_e32 v17, s17 -; HEURRC-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:2 abid:3 blgp:1 +; HEURRC-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[8:11], v[12:15], a[0:15] cbsz:2 abid:3 blgp:1 ; HEURRC-NEXT: v_mov_b32_e32 v18, s18 ; HEURRC-NEXT: v_mov_b32_e32 v19, s19 -; HEURRC-NEXT: v_mov_b32_e32 v0, s20 -; HEURRC-NEXT: v_mov_b32_e32 v1, s21 -; HEURRC-NEXT: v_mov_b32_e32 v2, s22 -; HEURRC-NEXT: v_mov_b32_e32 v3, s23 -; HEURRC-NEXT: v_mov_b64_e32 v[14:15], 0 +; HEURRC-NEXT: v_mov_b32_e32 v8, s20 +; HEURRC-NEXT: v_mov_b32_e32 v9, s21 +; HEURRC-NEXT: v_mov_b32_e32 v10, s22 +; HEURRC-NEXT: v_mov_b32_e32 v11, s23 +; HEURRC-NEXT: v_mov_b64_e32 v[6:7], 0 ; HEURRC-NEXT: s_nop 4 -; HEURRC-NEXT: global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v[0:1], a[28:31], off sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v[2:3], a[24:27], off sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: global_store_dwordx4 v[12:13], a[20:23], off sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v[4:5], a[20:23], off sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: global_store_dwordx4 v[14:15], a[16:19], off sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v[6:7], a[16:19], off sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: global_store_dwordx4 v[10:11], v[16:19], off sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v[2:3], v[16:19], off sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v[0:1], v[8:11], off sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: s_nop 0 ; HEURRC-NEXT: v_mov_b32_e32 v0, s8 ; HEURRC-NEXT: v_mov_b32_e32 v1, s9 ; HEURRC-NEXT: v_mov_b32_e32 v2, s10 ; HEURRC-NEXT: v_mov_b32_e32 v3, s11 -; HEURRC-NEXT: global_store_dwordx4 v[14:15], v[0:3], off sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) ; HEURRC-NEXT: s_nop 0 ; HEURRC-NEXT: v_mov_b32_e32 v0, s12 ; HEURRC-NEXT: v_mov_b32_e32 v1, s13 ; HEURRC-NEXT: v_mov_b32_e32 v2, s14 ; HEURRC-NEXT: v_mov_b32_e32 v3, s15 -; HEURRC-NEXT: global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) ; HEURRC-NEXT: s_endpgm ; @@ -958,15 +954,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, < ; VGPRRC: ; %bb.0: ; VGPRRC-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 ; VGPRRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 -; VGPRRC-NEXT: v_mov_b64_e32 v[40:41], 48 -; VGPRRC-NEXT: v_mov_b64_e32 v[42:43], 32 -; VGPRRC-NEXT: v_mov_b64_e32 v[44:45], 16 +; VGPRRC-NEXT: v_mov_b64_e32 v[32:33], 48 +; VGPRRC-NEXT: v_mov_b64_e32 v[34:35], 32 +; VGPRRC-NEXT: v_mov_b64_e32 v[36:37], 16 ; VGPRRC-NEXT: s_waitcnt lgkmcnt(0) -; VGPRRC-NEXT: v_mov_b64_e32 v[34:35], s[26:27] -; VGPRRC-NEXT: v_mov_b64_e32 v[32:33], s[24:25] -; VGPRRC-NEXT: v_mov_b64_e32 v[38:39], s[30:31] +; VGPRRC-NEXT: v_mov_b64_e32 v[42:43], s[26:27] +; VGPRRC-NEXT: v_mov_b64_e32 v[40:41], s[24:25] +; VGPRRC-NEXT: v_mov_b64_e32 v[46:47], s[30:31] ; VGPRRC-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; VGPRRC-NEXT: v_mov_b64_e32 v[36:37], s[28:29] +; VGPRRC-NEXT: v_mov_b64_e32 v[44:45], s[28:29] ; VGPRRC-NEXT: v_mov_b64_e32 v[2:3], s[10:11] ; VGPRRC-NEXT: v_mov_b64_e32 v[4:5], s[12:13] ; VGPRRC-NEXT: v_mov_b64_e32 v[6:7], s[14:15] @@ -976,40 +972,40 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, < ; VGPRRC-NEXT: v_mov_b64_e32 v[14:15], s[22:23] ; VGPRRC-NEXT: v_mov_b32_e32 v48, s16 ; VGPRRC-NEXT: v_mov_b32_e32 v49, s17 -; VGPRRC-NEXT: v_mfma_f32_32x32x16_f16 v[16:31], v[32:35], v[36:39], v[0:15] cbsz:2 abid:3 blgp:1 +; VGPRRC-NEXT: v_mfma_f32_32x32x16_f16 v[16:31], v[40:43], v[44:47], v[0:15] cbsz:2 abid:3 blgp:1 ; VGPRRC-NEXT: v_mov_b32_e32 v50, s18 ; VGPRRC-NEXT: v_mov_b32_e32 v51, s19 -; VGPRRC-NEXT: v_mov_b64_e32 v[46:47], 0 +; VGPRRC-NEXT: v_mov_b64_e32 v[38:39], 0 ; VGPRRC-NEXT: s_nop 8 -; VGPRRC-NEXT: global_store_dwordx4 v[40:41], v[28:31], off sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v[32:33], v[28:31], off sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) -; VGPRRC-NEXT: global_store_dwordx4 v[42:43], v[24:27], off sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v[34:35], v[24:27], off sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) -; VGPRRC-NEXT: global_store_dwordx4 v[44:45], v[20:23], off sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v[36:37], v[20:23], off sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) -; VGPRRC-NEXT: global_store_dwordx4 v[46:47], v[16:19], off sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v[38:39], v[16:19], off sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) ; VGPRRC-NEXT: v_mov_b32_e32 v0, s20 ; VGPRRC-NEXT: v_mov_b32_e32 v1, s21 ; VGPRRC-NEXT: v_mov_b32_e32 v2, s22 ; VGPRRC-NEXT: v_mov_b32_e32 v3, s23 -; VGPRRC-NEXT: global_store_dwordx4 v[42:43], v[48:51], off sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v[34:35], v[48:51], off sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) -; VGPRRC-NEXT: global_store_dwordx4 v[40:41], v[0:3], off sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v[32:33], v[0:3], off sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) ; VGPRRC-NEXT: s_nop 0 ; VGPRRC-NEXT: v_mov_b32_e32 v0, s8 ; VGPRRC-NEXT: v_mov_b32_e32 v1, s9 ; VGPRRC-NEXT: v_mov_b32_e32 v2, s10 ; VGPRRC-NEXT: v_mov_b32_e32 v3, s11 -; VGPRRC-NEXT: global_store_dwordx4 v[46:47], v[0:3], off sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v[38:39], v[0:3], off sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) ; VGPRRC-NEXT: s_nop 0 ; VGPRRC-NEXT: v_mov_b32_e32 v0, s12 ; VGPRRC-NEXT: v_mov_b32_e32 v1, s13 ; VGPRRC-NEXT: v_mov_b32_e32 v2, s14 ; VGPRRC-NEXT: v_mov_b32_e32 v3, s15 -; VGPRRC-NEXT: global_store_dwordx4 v[44:45], v[0:3], off sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v[36:37], v[0:3], off sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) ; VGPRRC-NEXT: s_endpgm ; AGPR-LABEL: test_mfma_f32_32x32x16_f16__flags: @@ -1489,13 +1485,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd(<8 x half> %arg0, ; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 ; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 ; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 -; SDAG-NEXT: v_mov_b32_e32 v44, 0 +; SDAG-NEXT: v_mov_b32_e32 v36, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[34:35], s[26:27] -; SDAG-NEXT: v_mov_b64_e32 v[32:33], s[24:25] -; SDAG-NEXT: v_mov_b64_e32 v[38:39], s[30:31] +; SDAG-NEXT: v_mov_b64_e32 v[40:41], s[26:27] +; SDAG-NEXT: v_mov_b64_e32 v[38:39], s[24:25] +; SDAG-NEXT: v_mov_b64_e32 v[44:45], s[30:31] ; SDAG-NEXT: v_mov_b64_e32 v[30:31], s[22:23] -; SDAG-NEXT: v_mov_b64_e32 v[36:37], s[28:29] +; SDAG-NEXT: v_mov_b64_e32 v[42:43], s[28:29] ; SDAG-NEXT: v_mov_b64_e32 v[28:29], s[20:21] ; SDAG-NEXT: v_mov_b64_e32 v[26:27], s[18:19] ; SDAG-NEXT: v_mov_b64_e32 v[24:25], s[16:17] @@ -1503,41 +1499,41 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd(<8 x half> %arg0, ; SDAG-NEXT: v_mov_b64_e32 v[20:21], s[12:13] ; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[10:11] ; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[8:9] -; SDAG-NEXT: v_mov_b32_e32 v40, s20 -; SDAG-NEXT: v_mov_b32_e32 v41, s21 -; SDAG-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[32:35], v[36:39], v[16:31] -; SDAG-NEXT: v_mov_b32_e32 v42, s22 -; SDAG-NEXT: v_mov_b32_e32 v43, s23 -; SDAG-NEXT: global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: v_mov_b32_e32 v32, s20 +; SDAG-NEXT: v_mov_b32_e32 v33, s21 +; SDAG-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[38:41], v[42:45], v[16:31] +; SDAG-NEXT: v_mov_b32_e32 v34, s22 +; SDAG-NEXT: v_mov_b32_e32 v35, s23 +; SDAG-NEXT: global_store_dwordx4 v36, v[32:35], s[0:1] offset:48 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_mov_b32_e32 v16, s16 ; SDAG-NEXT: v_mov_b32_e32 v17, s17 ; SDAG-NEXT: v_mov_b32_e32 v18, s18 ; SDAG-NEXT: v_mov_b32_e32 v19, s19 -; SDAG-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] offset:32 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mov_b32_e32 v16, s12 ; SDAG-NEXT: v_mov_b32_e32 v17, s13 ; SDAG-NEXT: v_mov_b32_e32 v18, s14 ; SDAG-NEXT: v_mov_b32_e32 v19, s15 -; SDAG-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] offset:16 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mov_b32_e32 v16, s8 ; SDAG-NEXT: v_mov_b32_e32 v17, s9 ; SDAG-NEXT: v_mov_b32_e32 v18, s10 ; SDAG-NEXT: v_mov_b32_e32 v19, s11 -; SDAG-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v36, v[8:11], s[0:1] offset:32 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v36, v[12:15], s[0:1] offset:48 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v36, v[0:3], s[0:1] sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v36, v[4:7], s[0:1] offset:16 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_endpgm ; @@ -1592,13 +1588,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd(<8 x half> %arg0, ; HEURRC-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 ; HEURRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 ; HEURRC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 -; HEURRC-NEXT: v_mov_b32_e32 v44, 0 +; HEURRC-NEXT: v_mov_b32_e32 v36, 0 ; HEURRC-NEXT: s_waitcnt lgkmcnt(0) -; HEURRC-NEXT: v_mov_b64_e32 v[34:35], s[26:27] -; HEURRC-NEXT: v_mov_b64_e32 v[32:33], s[24:25] -; HEURRC-NEXT: v_mov_b64_e32 v[38:39], s[30:31] +; HEURRC-NEXT: v_mov_b64_e32 v[40:41], s[26:27] +; HEURRC-NEXT: v_mov_b64_e32 v[38:39], s[24:25] +; HEURRC-NEXT: v_mov_b64_e32 v[44:45], s[30:31] ; HEURRC-NEXT: v_mov_b64_e32 v[30:31], s[22:23] -; HEURRC-NEXT: v_mov_b64_e32 v[36:37], s[28:29] +; HEURRC-NEXT: v_mov_b64_e32 v[42:43], s[28:29] ; HEURRC-NEXT: v_mov_b64_e32 v[28:29], s[20:21] ; HEURRC-NEXT: v_mov_b64_e32 v[26:27], s[18:19] ; HEURRC-NEXT: v_mov_b64_e32 v[24:25], s[16:17] @@ -1606,41 +1602,41 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd(<8 x half> %arg0, ; HEURRC-NEXT: v_mov_b64_e32 v[20:21], s[12:13] ; HEURRC-NEXT: v_mov_b64_e32 v[18:19], s[10:11] ; HEURRC-NEXT: v_mov_b64_e32 v[16:17], s[8:9] -; HEURRC-NEXT: v_mov_b32_e32 v40, s20 -; HEURRC-NEXT: v_mov_b32_e32 v41, s21 -; HEURRC-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[32:35], v[36:39], v[16:31] -; HEURRC-NEXT: v_mov_b32_e32 v42, s22 -; HEURRC-NEXT: v_mov_b32_e32 v43, s23 -; HEURRC-NEXT: global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1 +; HEURRC-NEXT: v_mov_b32_e32 v32, s20 +; HEURRC-NEXT: v_mov_b32_e32 v33, s21 +; HEURRC-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[38:41], v[42:45], v[16:31] +; HEURRC-NEXT: v_mov_b32_e32 v34, s22 +; HEURRC-NEXT: v_mov_b32_e32 v35, s23 +; HEURRC-NEXT: global_store_dwordx4 v36, v[32:35], s[0:1] offset:48 sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) ; HEURRC-NEXT: s_nop 2 ; HEURRC-NEXT: v_mov_b32_e32 v16, s16 ; HEURRC-NEXT: v_mov_b32_e32 v17, s17 ; HEURRC-NEXT: v_mov_b32_e32 v18, s18 ; HEURRC-NEXT: v_mov_b32_e32 v19, s19 -; HEURRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] offset:32 sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) ; HEURRC-NEXT: s_nop 0 ; HEURRC-NEXT: v_mov_b32_e32 v16, s12 ; HEURRC-NEXT: v_mov_b32_e32 v17, s13 ; HEURRC-NEXT: v_mov_b32_e32 v18, s14 ; HEURRC-NEXT: v_mov_b32_e32 v19, s15 -; HEURRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] offset:16 sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) ; HEURRC-NEXT: s_nop 0 ; HEURRC-NEXT: v_mov_b32_e32 v16, s8 ; HEURRC-NEXT: v_mov_b32_e32 v17, s9 ; HEURRC-NEXT: v_mov_b32_e32 v18, s10 ; HEURRC-NEXT: v_mov_b32_e32 v19, s11 -; HEURRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v36, v[8:11], s[0:1] offset:32 sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v36, v[12:15], s[0:1] offset:48 sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v36, v[0:3], s[0:1] sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v36, v[4:7], s[0:1] offset:16 sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) ; HEURRC-NEXT: s_endpgm ; @@ -1649,13 +1645,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd(<8 x half> %arg0, ; VGPRRC-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 ; VGPRRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 ; VGPRRC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 -; VGPRRC-NEXT: v_mov_b32_e32 v44, 0 +; VGPRRC-NEXT: v_mov_b32_e32 v36, 0 ; VGPRRC-NEXT: s_waitcnt lgkmcnt(0) -; VGPRRC-NEXT: v_mov_b64_e32 v[34:35], s[26:27] -; VGPRRC-NEXT: v_mov_b64_e32 v[32:33], s[24:25] -; VGPRRC-NEXT: v_mov_b64_e32 v[38:39], s[30:31] +; VGPRRC-NEXT: v_mov_b64_e32 v[40:41], s[26:27] +; VGPRRC-NEXT: v_mov_b64_e32 v[38:39], s[24:25] +; VGPRRC-NEXT: v_mov_b64_e32 v[44:45], s[30:31] ; VGPRRC-NEXT: v_mov_b64_e32 v[30:31], s[22:23] -; VGPRRC-NEXT: v_mov_b64_e32 v[36:37], s[28:29] +; VGPRRC-NEXT: v_mov_b64_e32 v[42:43], s[28:29] ; VGPRRC-NEXT: v_mov_b64_e32 v[28:29], s[20:21] ; VGPRRC-NEXT: v_mov_b64_e32 v[26:27], s[18:19] ; VGPRRC-NEXT: v_mov_b64_e32 v[24:25], s[16:17] @@ -1663,41 +1659,41 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd(<8 x half> %arg0, ; VGPRRC-NEXT: v_mov_b64_e32 v[20:21], s[12:13] ; VGPRRC-NEXT: v_mov_b64_e32 v[18:19], s[10:11] ; VGPRRC-NEXT: v_mov_b64_e32 v[16:17], s[8:9] -; VGPRRC-NEXT: v_mov_b32_e32 v40, s20 -; VGPRRC-NEXT: v_mov_b32_e32 v41, s21 -; VGPRRC-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[32:35], v[36:39], v[16:31] -; VGPRRC-NEXT: v_mov_b32_e32 v42, s22 -; VGPRRC-NEXT: v_mov_b32_e32 v43, s23 -; VGPRRC-NEXT: global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1 +; VGPRRC-NEXT: v_mov_b32_e32 v32, s20 +; VGPRRC-NEXT: v_mov_b32_e32 v33, s21 +; VGPRRC-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[38:41], v[42:45], v[16:31] +; VGPRRC-NEXT: v_mov_b32_e32 v34, s22 +; VGPRRC-NEXT: v_mov_b32_e32 v35, s23 +; VGPRRC-NEXT: global_store_dwordx4 v36, v[32:35], s[0:1] offset:48 sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) ; VGPRRC-NEXT: s_nop 2 ; VGPRRC-NEXT: v_mov_b32_e32 v16, s16 ; VGPRRC-NEXT: v_mov_b32_e32 v17, s17 ; VGPRRC-NEXT: v_mov_b32_e32 v18, s18 ; VGPRRC-NEXT: v_mov_b32_e32 v19, s19 -; VGPRRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] offset:32 sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) ; VGPRRC-NEXT: s_nop 0 ; VGPRRC-NEXT: v_mov_b32_e32 v16, s12 ; VGPRRC-NEXT: v_mov_b32_e32 v17, s13 ; VGPRRC-NEXT: v_mov_b32_e32 v18, s14 ; VGPRRC-NEXT: v_mov_b32_e32 v19, s15 -; VGPRRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] offset:16 sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) ; VGPRRC-NEXT: s_nop 0 ; VGPRRC-NEXT: v_mov_b32_e32 v16, s8 ; VGPRRC-NEXT: v_mov_b32_e32 v17, s9 ; VGPRRC-NEXT: v_mov_b32_e32 v18, s10 ; VGPRRC-NEXT: v_mov_b32_e32 v19, s11 -; VGPRRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) -; VGPRRC-NEXT: global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v36, v[8:11], s[0:1] offset:32 sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) -; VGPRRC-NEXT: global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v36, v[12:15], s[0:1] offset:48 sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) -; VGPRRC-NEXT: global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v36, v[0:3], s[0:1] sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) -; VGPRRC-NEXT: global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v36, v[4:7], s[0:1] offset:16 sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) ; VGPRRC-NEXT: s_endpgm ; AGPR-LABEL: test_mfma_f32_32x32x16_f16__vgprcd: @@ -1831,13 +1827,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd__flags(<8 x half> ; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 ; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 ; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 -; SDAG-NEXT: v_mov_b32_e32 v44, 0 +; SDAG-NEXT: v_mov_b32_e32 v36, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[34:35], s[26:27] -; SDAG-NEXT: v_mov_b64_e32 v[32:33], s[24:25] -; SDAG-NEXT: v_mov_b64_e32 v[38:39], s[30:31] +; SDAG-NEXT: v_mov_b64_e32 v[40:41], s[26:27] +; SDAG-NEXT: v_mov_b64_e32 v[38:39], s[24:25] +; SDAG-NEXT: v_mov_b64_e32 v[44:45], s[30:31] ; SDAG-NEXT: v_mov_b64_e32 v[30:31], s[22:23] -; SDAG-NEXT: v_mov_b64_e32 v[36:37], s[28:29] +; SDAG-NEXT: v_mov_b64_e32 v[42:43], s[28:29] ; SDAG-NEXT: v_mov_b64_e32 v[28:29], s[20:21] ; SDAG-NEXT: v_mov_b64_e32 v[26:27], s[18:19] ; SDAG-NEXT: v_mov_b64_e32 v[24:25], s[16:17] @@ -1845,41 +1841,41 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd__flags(<8 x half> ; SDAG-NEXT: v_mov_b64_e32 v[20:21], s[12:13] ; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[10:11] ; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[8:9] -; SDAG-NEXT: v_mov_b32_e32 v40, s20 -; SDAG-NEXT: v_mov_b32_e32 v41, s21 -; SDAG-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[32:35], v[36:39], v[16:31] cbsz:1 abid:2 blgp:3 -; SDAG-NEXT: v_mov_b32_e32 v42, s22 -; SDAG-NEXT: v_mov_b32_e32 v43, s23 -; SDAG-NEXT: global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: v_mov_b32_e32 v32, s20 +; SDAG-NEXT: v_mov_b32_e32 v33, s21 +; SDAG-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[38:41], v[42:45], v[16:31] cbsz:1 abid:2 blgp:3 +; SDAG-NEXT: v_mov_b32_e32 v34, s22 +; SDAG-NEXT: v_mov_b32_e32 v35, s23 +; SDAG-NEXT: global_store_dwordx4 v36, v[32:35], s[0:1] offset:48 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_mov_b32_e32 v16, s16 ; SDAG-NEXT: v_mov_b32_e32 v17, s17 ; SDAG-NEXT: v_mov_b32_e32 v18, s18 ; SDAG-NEXT: v_mov_b32_e32 v19, s19 -; SDAG-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] offset:32 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mov_b32_e32 v16, s12 ; SDAG-NEXT: v_mov_b32_e32 v17, s13 ; SDAG-NEXT: v_mov_b32_e32 v18, s14 ; SDAG-NEXT: v_mov_b32_e32 v19, s15 -; SDAG-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] offset:16 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mov_b32_e32 v16, s8 ; SDAG-NEXT: v_mov_b32_e32 v17, s9 ; SDAG-NEXT: v_mov_b32_e32 v18, s10 ; SDAG-NEXT: v_mov_b32_e32 v19, s11 -; SDAG-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v36, v[8:11], s[0:1] offset:32 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v36, v[12:15], s[0:1] offset:48 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v36, v[0:3], s[0:1] sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v36, v[4:7], s[0:1] offset:16 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_endpgm ; @@ -1934,13 +1930,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd__flags(<8 x half> ; HEURRC-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 ; HEURRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 ; HEURRC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 -; HEURRC-NEXT: v_mov_b32_e32 v44, 0 +; HEURRC-NEXT: v_mov_b32_e32 v36, 0 ; HEURRC-NEXT: s_waitcnt lgkmcnt(0) -; HEURRC-NEXT: v_mov_b64_e32 v[34:35], s[26:27] -; HEURRC-NEXT: v_mov_b64_e32 v[32:33], s[24:25] -; HEURRC-NEXT: v_mov_b64_e32 v[38:39], s[30:31] +; HEURRC-NEXT: v_mov_b64_e32 v[40:41], s[26:27] +; HEURRC-NEXT: v_mov_b64_e32 v[38:39], s[24:25] +; HEURRC-NEXT: v_mov_b64_e32 v[44:45], s[30:31] ; HEURRC-NEXT: v_mov_b64_e32 v[30:31], s[22:23] -; HEURRC-NEXT: v_mov_b64_e32 v[36:37], s[28:29] +; HEURRC-NEXT: v_mov_b64_e32 v[42:43], s[28:29] ; HEURRC-NEXT: v_mov_b64_e32 v[28:29], s[20:21] ; HEURRC-NEXT: v_mov_b64_e32 v[26:27], s[18:19] ; HEURRC-NEXT: v_mov_b64_e32 v[24:25], s[16:17] @@ -1948,41 +1944,41 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd__flags(<8 x half> ; HEURRC-NEXT: v_mov_b64_e32 v[20:21], s[12:13] ; HEURRC-NEXT: v_mov_b64_e32 v[18:19], s[10:11] ; HEURRC-NEXT: v_mov_b64_e32 v[16:17], s[8:9] -; HEURRC-NEXT: v_mov_b32_e32 v40, s20 -; HEURRC-NEXT: v_mov_b32_e32 v41, s21 -; HEURRC-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[32:35], v[36:39], v[16:31] cbsz:1 abid:2 blgp:3 -; HEURRC-NEXT: v_mov_b32_e32 v42, s22 -; HEURRC-NEXT: v_mov_b32_e32 v43, s23 -; HEURRC-NEXT: global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1 +; HEURRC-NEXT: v_mov_b32_e32 v32, s20 +; HEURRC-NEXT: v_mov_b32_e32 v33, s21 +; HEURRC-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[38:41], v[42:45], v[16:31] cbsz:1 abid:2 blgp:3 +; HEURRC-NEXT: v_mov_b32_e32 v34, s22 +; HEURRC-NEXT: v_mov_b32_e32 v35, s23 +; HEURRC-NEXT: global_store_dwordx4 v36, v[32:35], s[0:1] offset:48 sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) ; HEURRC-NEXT: s_nop 2 ; HEURRC-NEXT: v_mov_b32_e32 v16, s16 ; HEURRC-NEXT: v_mov_b32_e32 v17, s17 ; HEURRC-NEXT: v_mov_b32_e32 v18, s18 ; HEURRC-NEXT: v_mov_b32_e32 v19, s19 -; HEURRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] offset:32 sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) ; HEURRC-NEXT: s_nop 0 ; HEURRC-NEXT: v_mov_b32_e32 v16, s12 ; HEURRC-NEXT: v_mov_b32_e32 v17, s13 ; HEURRC-NEXT: v_mov_b32_e32 v18, s14 ; HEURRC-NEXT: v_mov_b32_e32 v19, s15 -; HEURRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] offset:16 sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) ; HEURRC-NEXT: s_nop 0 ; HEURRC-NEXT: v_mov_b32_e32 v16, s8 ; HEURRC-NEXT: v_mov_b32_e32 v17, s9 ; HEURRC-NEXT: v_mov_b32_e32 v18, s10 ; HEURRC-NEXT: v_mov_b32_e32 v19, s11 -; HEURRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v36, v[8:11], s[0:1] offset:32 sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v36, v[12:15], s[0:1] offset:48 sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v36, v[0:3], s[0:1] sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) -; HEURRC-NEXT: global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1 +; HEURRC-NEXT: global_store_dwordx4 v36, v[4:7], s[0:1] offset:16 sc0 sc1 ; HEURRC-NEXT: s_waitcnt vmcnt(0) ; HEURRC-NEXT: s_endpgm ; @@ -1991,13 +1987,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd__flags(<8 x half> ; VGPRRC-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 ; VGPRRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 ; VGPRRC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 -; VGPRRC-NEXT: v_mov_b32_e32 v44, 0 +; VGPRRC-NEXT: v_mov_b32_e32 v36, 0 ; VGPRRC-NEXT: s_waitcnt lgkmcnt(0) -; VGPRRC-NEXT: v_mov_b64_e32 v[34:35], s[26:27] -; VGPRRC-NEXT: v_mov_b64_e32 v[32:33], s[24:25] -; VGPRRC-NEXT: v_mov_b64_e32 v[38:39], s[30:31] +; VGPRRC-NEXT: v_mov_b64_e32 v[40:41], s[26:27] +; VGPRRC-NEXT: v_mov_b64_e32 v[38:39], s[24:25] +; VGPRRC-NEXT: v_mov_b64_e32 v[44:45], s[30:31] ; VGPRRC-NEXT: v_mov_b64_e32 v[30:31], s[22:23] -; VGPRRC-NEXT: v_mov_b64_e32 v[36:37], s[28:29] +; VGPRRC-NEXT: v_mov_b64_e32 v[42:43], s[28:29] ; VGPRRC-NEXT: v_mov_b64_e32 v[28:29], s[20:21] ; VGPRRC-NEXT: v_mov_b64_e32 v[26:27], s[18:19] ; VGPRRC-NEXT: v_mov_b64_e32 v[24:25], s[16:17] @@ -2005,41 +2001,41 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd__flags(<8 x half> ; VGPRRC-NEXT: v_mov_b64_e32 v[20:21], s[12:13] ; VGPRRC-NEXT: v_mov_b64_e32 v[18:19], s[10:11] ; VGPRRC-NEXT: v_mov_b64_e32 v[16:17], s[8:9] -; VGPRRC-NEXT: v_mov_b32_e32 v40, s20 -; VGPRRC-NEXT: v_mov_b32_e32 v41, s21 -; VGPRRC-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[32:35], v[36:39], v[16:31] cbsz:1 abid:2 blgp:3 -; VGPRRC-NEXT: v_mov_b32_e32 v42, s22 -; VGPRRC-NEXT: v_mov_b32_e32 v43, s23 -; VGPRRC-NEXT: global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1 +; VGPRRC-NEXT: v_mov_b32_e32 v32, s20 +; VGPRRC-NEXT: v_mov_b32_e32 v33, s21 +; VGPRRC-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[38:41], v[42:45], v[16:31] cbsz:1 abid:2 blgp:3 +; VGPRRC-NEXT: v_mov_b32_e32 v34, s22 +; VGPRRC-NEXT: v_mov_b32_e32 v35, s23 +; VGPRRC-NEXT: global_store_dwordx4 v36, v[32:35], s[0:1] offset:48 sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) ; VGPRRC-NEXT: s_nop 2 ; VGPRRC-NEXT: v_mov_b32_e32 v16, s16 ; VGPRRC-NEXT: v_mov_b32_e32 v17, s17 ; VGPRRC-NEXT: v_mov_b32_e32 v18, s18 ; VGPRRC-NEXT: v_mov_b32_e32 v19, s19 -; VGPRRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] offset:32 sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) ; VGPRRC-NEXT: s_nop 0 ; VGPRRC-NEXT: v_mov_b32_e32 v16, s12 ; VGPRRC-NEXT: v_mov_b32_e32 v17, s13 ; VGPRRC-NEXT: v_mov_b32_e32 v18, s14 ; VGPRRC-NEXT: v_mov_b32_e32 v19, s15 -; VGPRRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] offset:16 sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) ; VGPRRC-NEXT: s_nop 0 ; VGPRRC-NEXT: v_mov_b32_e32 v16, s8 ; VGPRRC-NEXT: v_mov_b32_e32 v17, s9 ; VGPRRC-NEXT: v_mov_b32_e32 v18, s10 ; VGPRRC-NEXT: v_mov_b32_e32 v19, s11 -; VGPRRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) -; VGPRRC-NEXT: global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v36, v[8:11], s[0:1] offset:32 sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) -; VGPRRC-NEXT: global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v36, v[12:15], s[0:1] offset:48 sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) -; VGPRRC-NEXT: global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v36, v[0:3], s[0:1] sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) -; VGPRRC-NEXT: global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1 +; VGPRRC-NEXT: global_store_dwordx4 v36, v[4:7], s[0:1] offset:16 sc0 sc1 ; VGPRRC-NEXT: s_waitcnt vmcnt(0) ; VGPRRC-NEXT: s_endpgm ; AGPR-LABEL: test_mfma_f32_32x32x16_f16__vgprcd__flags: @@ -5425,18 +5421,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd(ptr addrs ; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 ; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 -; GCN-NEXT: v_mov_b32_e32 v12, 0 +; GCN-NEXT: v_mov_b32_e32 v4, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; GCN-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; GCN-NEXT: v_mov_b64_e32 v[4:5], s[12:13] -; GCN-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; GCN-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; GCN-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; GCN-NEXT: v_mov_b64_e32 v[6:7], s[8:9] +; GCN-NEXT: v_mov_b64_e32 v[8:9], s[10:11] +; GCN-NEXT: v_mov_b64_e32 v[10:11], s[12:13] +; GCN-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GCN-NEXT: v_mov_b64_e32 v[12:13], s[14:15] +; GCN-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11] +; GCN-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[6:9], v[10:13], v[0:3] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7] +; GCN-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; GCN-NEXT: s_endpgm ; ; HEURRC-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd: @@ -5444,18 +5440,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd(ptr addrs ; HEURRC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 ; HEURRC-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 ; HEURRC-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 -; HEURRC-NEXT: v_mov_b32_e32 v12, 0 +; HEURRC-NEXT: v_mov_b32_e32 v4, 0 ; HEURRC-NEXT: s_waitcnt lgkmcnt(0) -; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; HEURRC-NEXT: v_mov_b64_e32 v[4:5], s[12:13] -; HEURRC-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; HEURRC-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[8:9] +; HEURRC-NEXT: v_mov_b64_e32 v[8:9], s[10:11] +; HEURRC-NEXT: v_mov_b64_e32 v[10:11], s[12:13] +; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; HEURRC-NEXT: v_mov_b64_e32 v[12:13], s[14:15] +; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; HEURRC-NEXT: s_nop 1 -; HEURRC-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11] +; HEURRC-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[6:9], v[10:13], v[0:3] ; HEURRC-NEXT: s_nop 7 -; HEURRC-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7] +; HEURRC-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; HEURRC-NEXT: s_endpgm ; ; VGPRRC-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd: @@ -5463,18 +5459,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd(ptr addrs ; VGPRRC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 ; VGPRRC-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 ; VGPRRC-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 -; VGPRRC-NEXT: v_mov_b32_e32 v12, 0 +; VGPRRC-NEXT: v_mov_b32_e32 v4, 0 ; VGPRRC-NEXT: s_waitcnt lgkmcnt(0) -; VGPRRC-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; VGPRRC-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; VGPRRC-NEXT: v_mov_b64_e32 v[4:5], s[12:13] -; VGPRRC-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; VGPRRC-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; VGPRRC-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; VGPRRC-NEXT: v_mov_b64_e32 v[6:7], s[8:9] +; VGPRRC-NEXT: v_mov_b64_e32 v[8:9], s[10:11] +; VGPRRC-NEXT: v_mov_b64_e32 v[10:11], s[12:13] +; VGPRRC-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; VGPRRC-NEXT: v_mov_b64_e32 v[12:13], s[14:15] +; VGPRRC-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; VGPRRC-NEXT: s_nop 1 -; VGPRRC-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11] +; VGPRRC-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[6:9], v[10:13], v[0:3] ; VGPRRC-NEXT: s_nop 7 -; VGPRRC-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7] +; VGPRRC-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; VGPRRC-NEXT: s_endpgm ; AGPR-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd: ; AGPR: ; %bb.0: @@ -5525,18 +5521,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags(pt ; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 ; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 -; GCN-NEXT: v_mov_b32_e32 v12, 0 +; GCN-NEXT: v_mov_b32_e32 v4, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; GCN-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; GCN-NEXT: v_mov_b64_e32 v[4:5], s[12:13] -; GCN-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; GCN-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; GCN-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; GCN-NEXT: v_mov_b64_e32 v[6:7], s[8:9] +; GCN-NEXT: v_mov_b64_e32 v[8:9], s[10:11] +; GCN-NEXT: v_mov_b64_e32 v[10:11], s[12:13] +; GCN-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GCN-NEXT: v_mov_b64_e32 v[12:13], s[14:15] +; GCN-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1 +; GCN-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[6:9], v[10:13], v[0:3] cbsz:3 abid:2 blgp:1 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7] +; GCN-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; GCN-NEXT: s_endpgm ; ; HEURRC-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags: @@ -5544,18 +5540,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags(pt ; HEURRC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 ; HEURRC-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 ; HEURRC-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 -; HEURRC-NEXT: v_mov_b32_e32 v12, 0 +; HEURRC-NEXT: v_mov_b32_e32 v4, 0 ; HEURRC-NEXT: s_waitcnt lgkmcnt(0) -; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; HEURRC-NEXT: v_mov_b64_e32 v[4:5], s[12:13] -; HEURRC-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; HEURRC-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[8:9] +; HEURRC-NEXT: v_mov_b64_e32 v[8:9], s[10:11] +; HEURRC-NEXT: v_mov_b64_e32 v[10:11], s[12:13] +; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; HEURRC-NEXT: v_mov_b64_e32 v[12:13], s[14:15] +; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; HEURRC-NEXT: s_nop 1 -; HEURRC-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1 +; HEURRC-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[6:9], v[10:13], v[0:3] cbsz:3 abid:2 blgp:1 ; HEURRC-NEXT: s_nop 7 -; HEURRC-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7] +; HEURRC-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; HEURRC-NEXT: s_endpgm ; ; VGPRRC-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags: @@ -5563,18 +5559,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags(pt ; VGPRRC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 ; VGPRRC-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 ; VGPRRC-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 -; VGPRRC-NEXT: v_mov_b32_e32 v12, 0 +; VGPRRC-NEXT: v_mov_b32_e32 v4, 0 ; VGPRRC-NEXT: s_waitcnt lgkmcnt(0) -; VGPRRC-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; VGPRRC-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; VGPRRC-NEXT: v_mov_b64_e32 v[4:5], s[12:13] -; VGPRRC-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; VGPRRC-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; VGPRRC-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; VGPRRC-NEXT: v_mov_b64_e32 v[6:7], s[8:9] +; VGPRRC-NEXT: v_mov_b64_e32 v[8:9], s[10:11] +; VGPRRC-NEXT: v_mov_b64_e32 v[10:11], s[12:13] +; VGPRRC-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; VGPRRC-NEXT: v_mov_b64_e32 v[12:13], s[14:15] +; VGPRRC-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; VGPRRC-NEXT: s_nop 1 -; VGPRRC-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1 +; VGPRRC-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[6:9], v[10:13], v[0:3] cbsz:3 abid:2 blgp:1 ; VGPRRC-NEXT: s_nop 7 -; VGPRRC-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7] +; VGPRRC-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; VGPRRC-NEXT: s_endpgm ; AGPR-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags: ; AGPR: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll index 6eb9449..ee11b92 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll @@ -17,24 +17,24 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x64_f16__vgpr(ptr addrspace(1) % ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 ; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0 -; SDAG-NEXT: v_mov_b32_e32 v16, 0 +; SDAG-NEXT: v_mov_b32_e32 v4, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7] +; SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[6:7] ; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 ; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64 -; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[2:3] -; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[0:1] +; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[2:3] +; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[0:1] ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; SDAG-NEXT: v_mov_b32_e32 v17, s16 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[8:9] +; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[10:11] +; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[12:13] +; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[14:15] +; SDAG-NEXT: v_mov_b32_e32 v5, s16 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_smfmac_f32_16x16x64_f16 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2 +; SDAG-NEXT: v_smfmac_f32_16x16x64_f16 v[0:3], v[14:17], v[6:13], v5 cbsz:1 abid:2 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] +; SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_smfmac_f32_16x16x64_f16__vgpr: @@ -120,30 +120,25 @@ define <4 x float> @test_smfmac_f32_16x16x64_f16__sgpr(<8 x half> inreg %arg0, < ; SDAG-LABEL: test_smfmac_f32_16x16x64_f16__sgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v10, s0 -; SDAG-NEXT: v_mov_b32_e32 v11, s1 -; SDAG-NEXT: v_mov_b32_e32 v12, s2 -; SDAG-NEXT: v_mov_b32_e32 v13, s3 -; SDAG-NEXT: v_mov_b32_e32 v2, s16 -; SDAG-NEXT: v_mov_b32_e32 v3, s17 -; SDAG-NEXT: v_mov_b32_e32 v4, s18 -; SDAG-NEXT: v_mov_b32_e32 v5, s19 -; SDAG-NEXT: v_mov_b32_e32 v6, s20 -; SDAG-NEXT: v_mov_b32_e32 v7, s21 -; SDAG-NEXT: v_mov_b32_e32 v8, s22 -; SDAG-NEXT: v_mov_b32_e32 v9, s23 -; SDAG-NEXT: v_accvgpr_write_b32 a0, s24 -; SDAG-NEXT: v_accvgpr_write_b32 a1, s25 -; SDAG-NEXT: v_accvgpr_write_b32 a2, s26 -; SDAG-NEXT: v_accvgpr_write_b32 a3, s27 -; SDAG-NEXT: v_mov_b32_e32 v0, s28 +; SDAG-NEXT: v_mov_b32_e32 v14, s0 +; SDAG-NEXT: v_mov_b32_e32 v15, s1 +; SDAG-NEXT: v_mov_b32_e32 v16, s2 +; SDAG-NEXT: v_mov_b32_e32 v17, s3 +; SDAG-NEXT: v_mov_b32_e32 v6, s16 +; SDAG-NEXT: v_mov_b32_e32 v7, s17 +; SDAG-NEXT: v_mov_b32_e32 v8, s18 +; SDAG-NEXT: v_mov_b32_e32 v9, s19 +; SDAG-NEXT: v_mov_b32_e32 v10, s20 +; SDAG-NEXT: v_mov_b32_e32 v11, s21 +; SDAG-NEXT: v_mov_b32_e32 v12, s22 +; SDAG-NEXT: v_mov_b32_e32 v13, s23 +; SDAG-NEXT: v_mov_b32_e32 v0, s24 +; SDAG-NEXT: v_mov_b32_e32 v1, s25 +; SDAG-NEXT: v_mov_b32_e32 v2, s26 +; SDAG-NEXT: v_mov_b32_e32 v3, s27 +; SDAG-NEXT: v_mov_b32_e32 v4, s28 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_smfmac_f32_16x16x64_f16 a[0:3], v[10:13], v[2:9], v0 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 -; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 -; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 -; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_smfmac_f32_16x16x64_f16 v[0:3], v[14:17], v[6:13], v4 ; SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_smfmac_f32_16x16x64_f16__sgpr: @@ -187,17 +182,17 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x32_f16__vgpr(ptr addrspace(1) % ; SDAG-NEXT: global_load_dwordx4 v[0:3], v16, s[6:7] ; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 ; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64 -; SDAG-NEXT: v_mov_b64_e32 v[26:27], s[2:3] -; SDAG-NEXT: v_mov_b64_e32 v[24:25], s[0:1] +; SDAG-NEXT: v_mov_b64_e32 v[28:29], s[2:3] +; SDAG-NEXT: v_mov_b64_e32 v[26:27], s[0:1] ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[22:23], s[14:15] -; SDAG-NEXT: v_mov_b64_e32 v[20:21], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[8:9] -; SDAG-NEXT: v_mov_b32_e32 v28, s16 +; SDAG-NEXT: v_mov_b64_e32 v[24:25], s[14:15] +; SDAG-NEXT: v_mov_b64_e32 v[22:23], s[12:13] +; SDAG-NEXT: v_mov_b64_e32 v[20:21], s[10:11] +; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[8:9] +; SDAG-NEXT: v_mov_b32_e32 v16, s16 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_smfmac_f32_32x32x32_f16 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 +; SDAG-NEXT: v_smfmac_f32_32x32x32_f16 v[0:15], v[26:29], v[18:25], v16 cbsz:1 abid:2 ; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_nop 10 ; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] offset:32 @@ -436,53 +431,37 @@ define <16 x float> @test_smfmac_f32_32x32x32_f16__sgpr(<8 x half> inreg %arg0, ; SDAG-LABEL: test_smfmac_f32_32x32x32_f16__sgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v36, s0 -; SDAG-NEXT: v_mov_b32_e32 v37, s1 -; SDAG-NEXT: v_mov_b32_e32 v38, s2 -; SDAG-NEXT: v_mov_b32_e32 v39, s3 -; SDAG-NEXT: v_mov_b32_e32 v13, s25 -; SDAG-NEXT: v_mov_b32_e32 v14, s26 -; SDAG-NEXT: v_mov_b32_e32 v15, s27 -; SDAG-NEXT: v_mov_b32_e32 v16, s28 -; SDAG-NEXT: v_mov_b32_e32 v17, s29 -; SDAG-NEXT: v_mov_b32_e32 v28, s16 -; SDAG-NEXT: v_mov_b32_e32 v29, s17 -; SDAG-NEXT: v_mov_b32_e32 v30, s18 -; SDAG-NEXT: v_mov_b32_e32 v31, s19 -; SDAG-NEXT: v_mov_b32_e32 v32, s20 -; SDAG-NEXT: v_mov_b32_e32 v33, s21 -; SDAG-NEXT: v_mov_b32_e32 v34, s22 -; SDAG-NEXT: v_mov_b32_e32 v35, s23 -; SDAG-NEXT: v_mov_b32_e32 v12, s24 -; SDAG-NEXT: v_mov_b32_e32 v18, v0 -; SDAG-NEXT: v_mov_b32_e32 v19, v1 -; SDAG-NEXT: v_mov_b32_e32 v20, v2 -; SDAG-NEXT: v_mov_b32_e32 v21, v3 -; SDAG-NEXT: v_mov_b32_e32 v22, v4 -; SDAG-NEXT: v_mov_b32_e32 v23, v5 -; SDAG-NEXT: v_mov_b32_e32 v24, v6 -; SDAG-NEXT: v_mov_b32_e32 v25, v7 -; SDAG-NEXT: v_mov_b32_e32 v26, v8 -; SDAG-NEXT: v_mov_b32_e32 v27, v9 +; SDAG-NEXT: v_mov_b32_e32 v26, s0 +; SDAG-NEXT: v_mov_b32_e32 v27, s1 +; SDAG-NEXT: v_mov_b32_e32 v28, s2 +; SDAG-NEXT: v_mov_b32_e32 v29, s3 +; SDAG-NEXT: v_mov_b32_e32 v16, v10 +; SDAG-NEXT: v_mov_b32_e32 v15, v9 +; SDAG-NEXT: v_mov_b32_e32 v14, v8 +; SDAG-NEXT: v_mov_b32_e32 v13, v7 +; SDAG-NEXT: v_mov_b32_e32 v12, v6 +; SDAG-NEXT: v_mov_b32_e32 v11, v5 +; SDAG-NEXT: v_mov_b32_e32 v10, v4 +; SDAG-NEXT: v_mov_b32_e32 v9, v3 +; SDAG-NEXT: v_mov_b32_e32 v8, v2 +; SDAG-NEXT: v_mov_b32_e32 v7, v1 +; SDAG-NEXT: v_mov_b32_e32 v6, v0 +; SDAG-NEXT: v_mov_b32_e32 v0, s24 +; SDAG-NEXT: v_mov_b32_e32 v1, s25 +; SDAG-NEXT: v_mov_b32_e32 v2, s26 +; SDAG-NEXT: v_mov_b32_e32 v3, s27 +; SDAG-NEXT: v_mov_b32_e32 v4, s28 +; SDAG-NEXT: v_mov_b32_e32 v5, s29 +; SDAG-NEXT: v_mov_b32_e32 v18, s16 +; SDAG-NEXT: v_mov_b32_e32 v19, s17 +; SDAG-NEXT: v_mov_b32_e32 v20, s18 +; SDAG-NEXT: v_mov_b32_e32 v21, s19 +; SDAG-NEXT: v_mov_b32_e32 v22, s20 +; SDAG-NEXT: v_mov_b32_e32 v23, s21 +; SDAG-NEXT: v_mov_b32_e32 v24, s22 +; SDAG-NEXT: v_mov_b32_e32 v25, s23 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_smfmac_f32_32x32x32_f16 v[12:27], v[36:39], v[28:35], v10 -; SDAG-NEXT: s_nop 11 -; SDAG-NEXT: v_mov_b32_e32 v0, v12 -; SDAG-NEXT: v_mov_b32_e32 v1, v13 -; SDAG-NEXT: v_mov_b32_e32 v2, v14 -; SDAG-NEXT: v_mov_b32_e32 v3, v15 -; SDAG-NEXT: v_mov_b32_e32 v4, v16 -; SDAG-NEXT: v_mov_b32_e32 v5, v17 -; SDAG-NEXT: v_mov_b32_e32 v6, v18 -; SDAG-NEXT: v_mov_b32_e32 v7, v19 -; SDAG-NEXT: v_mov_b32_e32 v8, v20 -; SDAG-NEXT: v_mov_b32_e32 v9, v21 -; SDAG-NEXT: v_mov_b32_e32 v10, v22 -; SDAG-NEXT: v_mov_b32_e32 v11, v23 -; SDAG-NEXT: v_mov_b32_e32 v12, v24 -; SDAG-NEXT: v_mov_b32_e32 v13, v25 -; SDAG-NEXT: v_mov_b32_e32 v14, v26 -; SDAG-NEXT: v_mov_b32_e32 v15, v27 +; SDAG-NEXT: v_smfmac_f32_32x32x32_f16 v[0:15], v[26:29], v[18:25], v16 ; SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_smfmac_f32_32x32x32_f16__sgpr: @@ -541,24 +520,24 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x64_bf16__vgpr(ptr addrspace(1) ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 ; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 4, v0 -; GCN-NEXT: v_mov_b32_e32 v16, 0 +; GCN-NEXT: v_mov_b32_e32 v4, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7] +; GCN-NEXT: global_load_dwordx4 v[0:3], v0, s[6:7] ; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 ; GCN-NEXT: s_load_dword s16, s[4:5], 0x64 -; GCN-NEXT: v_mov_b64_e32 v[14:15], s[2:3] -; GCN-NEXT: v_mov_b64_e32 v[12:13], s[0:1] +; GCN-NEXT: v_mov_b64_e32 v[16:17], s[2:3] +; GCN-NEXT: v_mov_b64_e32 v[14:15], s[0:1] ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; GCN-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; GCN-NEXT: v_mov_b64_e32 v[4:5], s[12:13] -; GCN-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; GCN-NEXT: v_mov_b32_e32 v17, s16 +; GCN-NEXT: v_mov_b64_e32 v[6:7], s[8:9] +; GCN-NEXT: v_mov_b64_e32 v[8:9], s[10:11] +; GCN-NEXT: v_mov_b64_e32 v[10:11], s[12:13] +; GCN-NEXT: v_mov_b64_e32 v[12:13], s[14:15] +; GCN-NEXT: v_mov_b32_e32 v5, s16 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 -; GCN-NEXT: v_smfmac_f32_16x16x64_bf16 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2 +; GCN-NEXT: v_smfmac_f32_16x16x64_bf16 v[0:3], v[14:17], v[6:13], v5 cbsz:1 abid:2 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] +; GCN-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; GCN-NEXT: s_endpgm bb: %id = call i32 @llvm.amdgcn.workitem.id.x() @@ -618,30 +597,25 @@ define <4 x float> @test_smfmac_f32_16x16x64_bf16__sgpr(<8 x bfloat> inreg %arg0 ; GCN-LABEL: test_smfmac_f32_16x16x64_bf16__sgpr: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v10, s0 -; GCN-NEXT: v_mov_b32_e32 v11, s1 -; GCN-NEXT: v_mov_b32_e32 v12, s2 -; GCN-NEXT: v_mov_b32_e32 v13, s3 -; GCN-NEXT: v_mov_b32_e32 v2, s16 -; GCN-NEXT: v_mov_b32_e32 v3, s17 -; GCN-NEXT: v_mov_b32_e32 v4, s18 -; GCN-NEXT: v_mov_b32_e32 v5, s19 -; GCN-NEXT: v_mov_b32_e32 v6, s20 -; GCN-NEXT: v_mov_b32_e32 v7, s21 -; GCN-NEXT: v_mov_b32_e32 v8, s22 -; GCN-NEXT: v_mov_b32_e32 v9, s23 -; GCN-NEXT: v_accvgpr_write_b32 a0, s24 -; GCN-NEXT: v_accvgpr_write_b32 a1, s25 -; GCN-NEXT: v_accvgpr_write_b32 a2, s26 -; GCN-NEXT: v_accvgpr_write_b32 a3, s27 -; GCN-NEXT: v_mov_b32_e32 v0, s28 +; GCN-NEXT: v_mov_b32_e32 v14, s0 +; GCN-NEXT: v_mov_b32_e32 v15, s1 +; GCN-NEXT: v_mov_b32_e32 v16, s2 +; GCN-NEXT: v_mov_b32_e32 v17, s3 +; GCN-NEXT: v_mov_b32_e32 v6, s16 +; GCN-NEXT: v_mov_b32_e32 v7, s17 +; GCN-NEXT: v_mov_b32_e32 v8, s18 +; GCN-NEXT: v_mov_b32_e32 v9, s19 +; GCN-NEXT: v_mov_b32_e32 v10, s20 +; GCN-NEXT: v_mov_b32_e32 v11, s21 +; GCN-NEXT: v_mov_b32_e32 v12, s22 +; GCN-NEXT: v_mov_b32_e32 v13, s23 +; GCN-NEXT: v_mov_b32_e32 v0, s24 +; GCN-NEXT: v_mov_b32_e32 v1, s25 +; GCN-NEXT: v_mov_b32_e32 v2, s26 +; GCN-NEXT: v_mov_b32_e32 v3, s27 +; GCN-NEXT: v_mov_b32_e32 v4, s28 ; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_smfmac_f32_16x16x64_bf16 a[0:3], v[10:13], v[2:9], v0 -; GCN-NEXT: s_nop 7 -; GCN-NEXT: v_accvgpr_read_b32 v0, a0 -; GCN-NEXT: v_accvgpr_read_b32 v1, a1 -; GCN-NEXT: v_accvgpr_read_b32 v2, a2 -; GCN-NEXT: v_accvgpr_read_b32 v3, a3 +; GCN-NEXT: v_smfmac_f32_16x16x64_bf16 v[0:3], v[14:17], v[6:13], v4 ; GCN-NEXT: s_setpc_b64 s[30:31] %result = call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.bf16(<8 x bfloat> %arg0, <16 x bfloat> %arg1, <4 x float> %arg2, i32 %arg3, i32 immarg 0, i32 immarg 0) ret <4 x float> %result @@ -667,17 +641,17 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x32_bf16__vgpr(ptr addrspace(1) ; GCN-NEXT: global_load_dwordx4 v[0:3], v16, s[6:7] ; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 ; GCN-NEXT: s_load_dword s16, s[4:5], 0x64 -; GCN-NEXT: v_mov_b64_e32 v[26:27], s[2:3] -; GCN-NEXT: v_mov_b64_e32 v[24:25], s[0:1] +; GCN-NEXT: v_mov_b64_e32 v[28:29], s[2:3] +; GCN-NEXT: v_mov_b64_e32 v[26:27], s[0:1] ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b64_e32 v[22:23], s[14:15] -; GCN-NEXT: v_mov_b64_e32 v[20:21], s[12:13] -; GCN-NEXT: v_mov_b64_e32 v[18:19], s[10:11] -; GCN-NEXT: v_mov_b64_e32 v[16:17], s[8:9] -; GCN-NEXT: v_mov_b32_e32 v28, s16 +; GCN-NEXT: v_mov_b64_e32 v[24:25], s[14:15] +; GCN-NEXT: v_mov_b64_e32 v[22:23], s[12:13] +; GCN-NEXT: v_mov_b64_e32 v[20:21], s[10:11] +; GCN-NEXT: v_mov_b64_e32 v[18:19], s[8:9] +; GCN-NEXT: v_mov_b32_e32 v16, s16 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 -; GCN-NEXT: v_smfmac_f32_32x32x32_bf16 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 +; GCN-NEXT: v_smfmac_f32_32x32x32_bf16 v[0:15], v[26:29], v[18:25], v16 cbsz:1 abid:2 ; GCN-NEXT: v_mov_b32_e32 v16, 0 ; GCN-NEXT: s_nop 10 ; GCN-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] offset:32 @@ -779,53 +753,37 @@ define <16 x float> @test_smfmac_f32_32x32x32_bf16__sgpr(<8 x bfloat> inreg %arg ; GCN-LABEL: test_smfmac_f32_32x32x32_bf16__sgpr: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v36, s0 -; GCN-NEXT: v_mov_b32_e32 v37, s1 -; GCN-NEXT: v_mov_b32_e32 v38, s2 -; GCN-NEXT: v_mov_b32_e32 v39, s3 -; GCN-NEXT: v_mov_b32_e32 v13, s25 -; GCN-NEXT: v_mov_b32_e32 v14, s26 -; GCN-NEXT: v_mov_b32_e32 v15, s27 -; GCN-NEXT: v_mov_b32_e32 v16, s28 -; GCN-NEXT: v_mov_b32_e32 v17, s29 -; GCN-NEXT: v_mov_b32_e32 v28, s16 -; GCN-NEXT: v_mov_b32_e32 v29, s17 -; GCN-NEXT: v_mov_b32_e32 v30, s18 -; GCN-NEXT: v_mov_b32_e32 v31, s19 -; GCN-NEXT: v_mov_b32_e32 v32, s20 -; GCN-NEXT: v_mov_b32_e32 v33, s21 -; GCN-NEXT: v_mov_b32_e32 v34, s22 -; GCN-NEXT: v_mov_b32_e32 v35, s23 -; GCN-NEXT: v_mov_b32_e32 v12, s24 -; GCN-NEXT: v_mov_b32_e32 v18, v0 -; GCN-NEXT: v_mov_b32_e32 v19, v1 -; GCN-NEXT: v_mov_b32_e32 v20, v2 -; GCN-NEXT: v_mov_b32_e32 v21, v3 -; GCN-NEXT: v_mov_b32_e32 v22, v4 -; GCN-NEXT: v_mov_b32_e32 v23, v5 -; GCN-NEXT: v_mov_b32_e32 v24, v6 -; GCN-NEXT: v_mov_b32_e32 v25, v7 -; GCN-NEXT: v_mov_b32_e32 v26, v8 -; GCN-NEXT: v_mov_b32_e32 v27, v9 +; GCN-NEXT: v_mov_b32_e32 v26, s0 +; GCN-NEXT: v_mov_b32_e32 v27, s1 +; GCN-NEXT: v_mov_b32_e32 v28, s2 +; GCN-NEXT: v_mov_b32_e32 v29, s3 +; GCN-NEXT: v_mov_b32_e32 v16, v10 +; GCN-NEXT: v_mov_b32_e32 v15, v9 +; GCN-NEXT: v_mov_b32_e32 v14, v8 +; GCN-NEXT: v_mov_b32_e32 v13, v7 +; GCN-NEXT: v_mov_b32_e32 v12, v6 +; GCN-NEXT: v_mov_b32_e32 v11, v5 +; GCN-NEXT: v_mov_b32_e32 v10, v4 +; GCN-NEXT: v_mov_b32_e32 v9, v3 +; GCN-NEXT: v_mov_b32_e32 v8, v2 +; GCN-NEXT: v_mov_b32_e32 v7, v1 +; GCN-NEXT: v_mov_b32_e32 v6, v0 +; GCN-NEXT: v_mov_b32_e32 v0, s24 +; GCN-NEXT: v_mov_b32_e32 v1, s25 +; GCN-NEXT: v_mov_b32_e32 v2, s26 +; GCN-NEXT: v_mov_b32_e32 v3, s27 +; GCN-NEXT: v_mov_b32_e32 v4, s28 +; GCN-NEXT: v_mov_b32_e32 v5, s29 +; GCN-NEXT: v_mov_b32_e32 v18, s16 +; GCN-NEXT: v_mov_b32_e32 v19, s17 +; GCN-NEXT: v_mov_b32_e32 v20, s18 +; GCN-NEXT: v_mov_b32_e32 v21, s19 +; GCN-NEXT: v_mov_b32_e32 v22, s20 +; GCN-NEXT: v_mov_b32_e32 v23, s21 +; GCN-NEXT: v_mov_b32_e32 v24, s22 +; GCN-NEXT: v_mov_b32_e32 v25, s23 ; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_smfmac_f32_32x32x32_bf16 v[12:27], v[36:39], v[28:35], v10 -; GCN-NEXT: s_nop 11 -; GCN-NEXT: v_mov_b32_e32 v0, v12 -; GCN-NEXT: v_mov_b32_e32 v1, v13 -; GCN-NEXT: v_mov_b32_e32 v2, v14 -; GCN-NEXT: v_mov_b32_e32 v3, v15 -; GCN-NEXT: v_mov_b32_e32 v4, v16 -; GCN-NEXT: v_mov_b32_e32 v5, v17 -; GCN-NEXT: v_mov_b32_e32 v6, v18 -; GCN-NEXT: v_mov_b32_e32 v7, v19 -; GCN-NEXT: v_mov_b32_e32 v8, v20 -; GCN-NEXT: v_mov_b32_e32 v9, v21 -; GCN-NEXT: v_mov_b32_e32 v10, v22 -; GCN-NEXT: v_mov_b32_e32 v11, v23 -; GCN-NEXT: v_mov_b32_e32 v12, v24 -; GCN-NEXT: v_mov_b32_e32 v13, v25 -; GCN-NEXT: v_mov_b32_e32 v14, v26 -; GCN-NEXT: v_mov_b32_e32 v15, v27 +; GCN-NEXT: v_smfmac_f32_32x32x32_bf16 v[0:15], v[26:29], v[18:25], v16 ; GCN-NEXT: s_setpc_b64 s[30:31] %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.bf16(<8 x bfloat> %arg0, <16 x bfloat> %arg1, <16 x float> %arg2, i32 %arg3, i32 immarg 0, i32 immarg 0) ret <16 x float> %result @@ -953,30 +911,25 @@ define <4 x i32> @test_smfmac_i32_16x16x128_i8__sgpr(<4 x i32> inreg %arg0, <8 x ; SDAG-LABEL: test_smfmac_i32_16x16x128_i8__sgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v10, s0 -; SDAG-NEXT: v_mov_b32_e32 v11, s1 -; SDAG-NEXT: v_mov_b32_e32 v12, s2 -; SDAG-NEXT: v_mov_b32_e32 v13, s3 -; SDAG-NEXT: v_mov_b32_e32 v2, s16 -; SDAG-NEXT: v_mov_b32_e32 v3, s17 -; SDAG-NEXT: v_mov_b32_e32 v4, s18 -; SDAG-NEXT: v_mov_b32_e32 v5, s19 -; SDAG-NEXT: v_mov_b32_e32 v6, s20 -; SDAG-NEXT: v_mov_b32_e32 v7, s21 -; SDAG-NEXT: v_mov_b32_e32 v8, s22 -; SDAG-NEXT: v_mov_b32_e32 v9, s23 -; SDAG-NEXT: v_accvgpr_write_b32 a0, s24 -; SDAG-NEXT: v_accvgpr_write_b32 a1, s25 -; SDAG-NEXT: v_accvgpr_write_b32 a2, s26 -; SDAG-NEXT: v_accvgpr_write_b32 a3, s27 -; SDAG-NEXT: v_mov_b32_e32 v0, s28 +; SDAG-NEXT: v_mov_b32_e32 v14, s0 +; SDAG-NEXT: v_mov_b32_e32 v15, s1 +; SDAG-NEXT: v_mov_b32_e32 v16, s2 +; SDAG-NEXT: v_mov_b32_e32 v17, s3 +; SDAG-NEXT: v_mov_b32_e32 v6, s16 +; SDAG-NEXT: v_mov_b32_e32 v7, s17 +; SDAG-NEXT: v_mov_b32_e32 v8, s18 +; SDAG-NEXT: v_mov_b32_e32 v9, s19 +; SDAG-NEXT: v_mov_b32_e32 v10, s20 +; SDAG-NEXT: v_mov_b32_e32 v11, s21 +; SDAG-NEXT: v_mov_b32_e32 v12, s22 +; SDAG-NEXT: v_mov_b32_e32 v13, s23 +; SDAG-NEXT: v_mov_b32_e32 v0, s24 +; SDAG-NEXT: v_mov_b32_e32 v1, s25 +; SDAG-NEXT: v_mov_b32_e32 v2, s26 +; SDAG-NEXT: v_mov_b32_e32 v3, s27 +; SDAG-NEXT: v_mov_b32_e32 v4, s28 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 a[0:3], v[10:13], v[2:9], v0 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 -; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 -; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 -; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 v[0:3], v[14:17], v[6:13], v4 ; SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_smfmac_i32_16x16x128_i8__sgpr: @@ -1275,53 +1228,37 @@ define <16 x i32> @test_smfmac_i32_32x32x64_i8__sgpr(<4 x i32> inreg %arg0, <8 x ; SDAG-LABEL: test_smfmac_i32_32x32x64_i8__sgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v36, s0 -; SDAG-NEXT: v_mov_b32_e32 v37, s1 -; SDAG-NEXT: v_mov_b32_e32 v38, s2 -; SDAG-NEXT: v_mov_b32_e32 v39, s3 -; SDAG-NEXT: v_mov_b32_e32 v13, s25 -; SDAG-NEXT: v_mov_b32_e32 v14, s26 -; SDAG-NEXT: v_mov_b32_e32 v15, s27 -; SDAG-NEXT: v_mov_b32_e32 v16, s28 -; SDAG-NEXT: v_mov_b32_e32 v17, s29 -; SDAG-NEXT: v_mov_b32_e32 v28, s16 -; SDAG-NEXT: v_mov_b32_e32 v29, s17 -; SDAG-NEXT: v_mov_b32_e32 v30, s18 -; SDAG-NEXT: v_mov_b32_e32 v31, s19 -; SDAG-NEXT: v_mov_b32_e32 v32, s20 -; SDAG-NEXT: v_mov_b32_e32 v33, s21 -; SDAG-NEXT: v_mov_b32_e32 v34, s22 -; SDAG-NEXT: v_mov_b32_e32 v35, s23 -; SDAG-NEXT: v_mov_b32_e32 v12, s24 -; SDAG-NEXT: v_mov_b32_e32 v18, v0 -; SDAG-NEXT: v_mov_b32_e32 v19, v1 -; SDAG-NEXT: v_mov_b32_e32 v20, v2 -; SDAG-NEXT: v_mov_b32_e32 v21, v3 -; SDAG-NEXT: v_mov_b32_e32 v22, v4 -; SDAG-NEXT: v_mov_b32_e32 v23, v5 -; SDAG-NEXT: v_mov_b32_e32 v24, v6 -; SDAG-NEXT: v_mov_b32_e32 v25, v7 -; SDAG-NEXT: v_mov_b32_e32 v26, v8 -; SDAG-NEXT: v_mov_b32_e32 v27, v9 +; SDAG-NEXT: v_mov_b32_e32 v26, s0 +; SDAG-NEXT: v_mov_b32_e32 v27, s1 +; SDAG-NEXT: v_mov_b32_e32 v28, s2 +; SDAG-NEXT: v_mov_b32_e32 v29, s3 +; SDAG-NEXT: v_mov_b32_e32 v16, v10 +; SDAG-NEXT: v_mov_b32_e32 v15, v9 +; SDAG-NEXT: v_mov_b32_e32 v14, v8 +; SDAG-NEXT: v_mov_b32_e32 v13, v7 +; SDAG-NEXT: v_mov_b32_e32 v12, v6 +; SDAG-NEXT: v_mov_b32_e32 v11, v5 +; SDAG-NEXT: v_mov_b32_e32 v10, v4 +; SDAG-NEXT: v_mov_b32_e32 v9, v3 +; SDAG-NEXT: v_mov_b32_e32 v8, v2 +; SDAG-NEXT: v_mov_b32_e32 v7, v1 +; SDAG-NEXT: v_mov_b32_e32 v6, v0 +; SDAG-NEXT: v_mov_b32_e32 v0, s24 +; SDAG-NEXT: v_mov_b32_e32 v1, s25 +; SDAG-NEXT: v_mov_b32_e32 v2, s26 +; SDAG-NEXT: v_mov_b32_e32 v3, s27 +; SDAG-NEXT: v_mov_b32_e32 v4, s28 +; SDAG-NEXT: v_mov_b32_e32 v5, s29 +; SDAG-NEXT: v_mov_b32_e32 v18, s16 +; SDAG-NEXT: v_mov_b32_e32 v19, s17 +; SDAG-NEXT: v_mov_b32_e32 v20, s18 +; SDAG-NEXT: v_mov_b32_e32 v21, s19 +; SDAG-NEXT: v_mov_b32_e32 v22, s20 +; SDAG-NEXT: v_mov_b32_e32 v23, s21 +; SDAG-NEXT: v_mov_b32_e32 v24, s22 +; SDAG-NEXT: v_mov_b32_e32 v25, s23 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_smfmac_i32_32x32x64_i8 v[12:27], v[36:39], v[28:35], v10 -; SDAG-NEXT: s_nop 11 -; SDAG-NEXT: v_mov_b32_e32 v0, v12 -; SDAG-NEXT: v_mov_b32_e32 v1, v13 -; SDAG-NEXT: v_mov_b32_e32 v2, v14 -; SDAG-NEXT: v_mov_b32_e32 v3, v15 -; SDAG-NEXT: v_mov_b32_e32 v4, v16 -; SDAG-NEXT: v_mov_b32_e32 v5, v17 -; SDAG-NEXT: v_mov_b32_e32 v6, v18 -; SDAG-NEXT: v_mov_b32_e32 v7, v19 -; SDAG-NEXT: v_mov_b32_e32 v8, v20 -; SDAG-NEXT: v_mov_b32_e32 v9, v21 -; SDAG-NEXT: v_mov_b32_e32 v10, v22 -; SDAG-NEXT: v_mov_b32_e32 v11, v23 -; SDAG-NEXT: v_mov_b32_e32 v12, v24 -; SDAG-NEXT: v_mov_b32_e32 v13, v25 -; SDAG-NEXT: v_mov_b32_e32 v14, v26 -; SDAG-NEXT: v_mov_b32_e32 v15, v27 +; SDAG-NEXT: v_smfmac_i32_32x32x64_i8 v[0:15], v[26:29], v[18:25], v16 ; SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_smfmac_i32_32x32x64_i8__sgpr: @@ -1489,30 +1426,25 @@ define <4 x float> @test_smfmac_f32_16x16x128_bf8_bf8__sgpr(<4 x i32> inreg %arg ; SDAG-LABEL: test_smfmac_f32_16x16x128_bf8_bf8__sgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v10, s0 -; SDAG-NEXT: v_mov_b32_e32 v11, s1 -; SDAG-NEXT: v_mov_b32_e32 v12, s2 -; SDAG-NEXT: v_mov_b32_e32 v13, s3 -; SDAG-NEXT: v_mov_b32_e32 v2, s16 -; SDAG-NEXT: v_mov_b32_e32 v3, s17 -; SDAG-NEXT: v_mov_b32_e32 v4, s18 -; SDAG-NEXT: v_mov_b32_e32 v5, s19 -; SDAG-NEXT: v_mov_b32_e32 v6, s20 -; SDAG-NEXT: v_mov_b32_e32 v7, s21 -; SDAG-NEXT: v_mov_b32_e32 v8, s22 -; SDAG-NEXT: v_mov_b32_e32 v9, s23 -; SDAG-NEXT: v_accvgpr_write_b32 a0, s24 -; SDAG-NEXT: v_accvgpr_write_b32 a1, s25 -; SDAG-NEXT: v_accvgpr_write_b32 a2, s26 -; SDAG-NEXT: v_accvgpr_write_b32 a3, s27 -; SDAG-NEXT: v_mov_b32_e32 v0, s28 +; SDAG-NEXT: v_mov_b32_e32 v14, s0 +; SDAG-NEXT: v_mov_b32_e32 v15, s1 +; SDAG-NEXT: v_mov_b32_e32 v16, s2 +; SDAG-NEXT: v_mov_b32_e32 v17, s3 +; SDAG-NEXT: v_mov_b32_e32 v6, s16 +; SDAG-NEXT: v_mov_b32_e32 v7, s17 +; SDAG-NEXT: v_mov_b32_e32 v8, s18 +; SDAG-NEXT: v_mov_b32_e32 v9, s19 +; SDAG-NEXT: v_mov_b32_e32 v10, s20 +; SDAG-NEXT: v_mov_b32_e32 v11, s21 +; SDAG-NEXT: v_mov_b32_e32 v12, s22 +; SDAG-NEXT: v_mov_b32_e32 v13, s23 +; SDAG-NEXT: v_mov_b32_e32 v0, s24 +; SDAG-NEXT: v_mov_b32_e32 v1, s25 +; SDAG-NEXT: v_mov_b32_e32 v2, s26 +; SDAG-NEXT: v_mov_b32_e32 v3, s27 +; SDAG-NEXT: v_mov_b32_e32 v4, s28 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 a[0:3], v[10:13], v[2:9], v0 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 -; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 -; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 -; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 v[0:3], v[14:17], v[6:13], v4 ; SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_smfmac_f32_16x16x128_bf8_bf8__sgpr: @@ -1658,30 +1590,25 @@ define <4 x float> @test_smfmac_f32_16x16x128_bf8_fp8__sgpr(<4 x i32> inreg %arg ; SDAG-LABEL: test_smfmac_f32_16x16x128_bf8_fp8__sgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v10, s0 -; SDAG-NEXT: v_mov_b32_e32 v11, s1 -; SDAG-NEXT: v_mov_b32_e32 v12, s2 -; SDAG-NEXT: v_mov_b32_e32 v13, s3 -; SDAG-NEXT: v_mov_b32_e32 v2, s16 -; SDAG-NEXT: v_mov_b32_e32 v3, s17 -; SDAG-NEXT: v_mov_b32_e32 v4, s18 -; SDAG-NEXT: v_mov_b32_e32 v5, s19 -; SDAG-NEXT: v_mov_b32_e32 v6, s20 -; SDAG-NEXT: v_mov_b32_e32 v7, s21 -; SDAG-NEXT: v_mov_b32_e32 v8, s22 -; SDAG-NEXT: v_mov_b32_e32 v9, s23 -; SDAG-NEXT: v_accvgpr_write_b32 a0, s24 -; SDAG-NEXT: v_accvgpr_write_b32 a1, s25 -; SDAG-NEXT: v_accvgpr_write_b32 a2, s26 -; SDAG-NEXT: v_accvgpr_write_b32 a3, s27 -; SDAG-NEXT: v_mov_b32_e32 v0, s28 +; SDAG-NEXT: v_mov_b32_e32 v14, s0 +; SDAG-NEXT: v_mov_b32_e32 v15, s1 +; SDAG-NEXT: v_mov_b32_e32 v16, s2 +; SDAG-NEXT: v_mov_b32_e32 v17, s3 +; SDAG-NEXT: v_mov_b32_e32 v6, s16 +; SDAG-NEXT: v_mov_b32_e32 v7, s17 +; SDAG-NEXT: v_mov_b32_e32 v8, s18 +; SDAG-NEXT: v_mov_b32_e32 v9, s19 +; SDAG-NEXT: v_mov_b32_e32 v10, s20 +; SDAG-NEXT: v_mov_b32_e32 v11, s21 +; SDAG-NEXT: v_mov_b32_e32 v12, s22 +; SDAG-NEXT: v_mov_b32_e32 v13, s23 +; SDAG-NEXT: v_mov_b32_e32 v0, s24 +; SDAG-NEXT: v_mov_b32_e32 v1, s25 +; SDAG-NEXT: v_mov_b32_e32 v2, s26 +; SDAG-NEXT: v_mov_b32_e32 v3, s27 +; SDAG-NEXT: v_mov_b32_e32 v4, s28 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 a[0:3], v[10:13], v[2:9], v0 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 -; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 -; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 -; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 v[0:3], v[14:17], v[6:13], v4 ; SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_smfmac_f32_16x16x128_bf8_fp8__sgpr: @@ -1827,30 +1754,25 @@ define <4 x float> @test_smfmac_f32_16x16x128_fp8_bf8__sgpr(<4 x i32> inreg %arg ; SDAG-LABEL: test_smfmac_f32_16x16x128_fp8_bf8__sgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v10, s0 -; SDAG-NEXT: v_mov_b32_e32 v11, s1 -; SDAG-NEXT: v_mov_b32_e32 v12, s2 -; SDAG-NEXT: v_mov_b32_e32 v13, s3 -; SDAG-NEXT: v_mov_b32_e32 v2, s16 -; SDAG-NEXT: v_mov_b32_e32 v3, s17 -; SDAG-NEXT: v_mov_b32_e32 v4, s18 -; SDAG-NEXT: v_mov_b32_e32 v5, s19 -; SDAG-NEXT: v_mov_b32_e32 v6, s20 -; SDAG-NEXT: v_mov_b32_e32 v7, s21 -; SDAG-NEXT: v_mov_b32_e32 v8, s22 -; SDAG-NEXT: v_mov_b32_e32 v9, s23 -; SDAG-NEXT: v_accvgpr_write_b32 a0, s24 -; SDAG-NEXT: v_accvgpr_write_b32 a1, s25 -; SDAG-NEXT: v_accvgpr_write_b32 a2, s26 -; SDAG-NEXT: v_accvgpr_write_b32 a3, s27 -; SDAG-NEXT: v_mov_b32_e32 v0, s28 +; SDAG-NEXT: v_mov_b32_e32 v14, s0 +; SDAG-NEXT: v_mov_b32_e32 v15, s1 +; SDAG-NEXT: v_mov_b32_e32 v16, s2 +; SDAG-NEXT: v_mov_b32_e32 v17, s3 +; SDAG-NEXT: v_mov_b32_e32 v6, s16 +; SDAG-NEXT: v_mov_b32_e32 v7, s17 +; SDAG-NEXT: v_mov_b32_e32 v8, s18 +; SDAG-NEXT: v_mov_b32_e32 v9, s19 +; SDAG-NEXT: v_mov_b32_e32 v10, s20 +; SDAG-NEXT: v_mov_b32_e32 v11, s21 +; SDAG-NEXT: v_mov_b32_e32 v12, s22 +; SDAG-NEXT: v_mov_b32_e32 v13, s23 +; SDAG-NEXT: v_mov_b32_e32 v0, s24 +; SDAG-NEXT: v_mov_b32_e32 v1, s25 +; SDAG-NEXT: v_mov_b32_e32 v2, s26 +; SDAG-NEXT: v_mov_b32_e32 v3, s27 +; SDAG-NEXT: v_mov_b32_e32 v4, s28 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 a[0:3], v[10:13], v[2:9], v0 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 -; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 -; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 -; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 v[0:3], v[14:17], v[6:13], v4 ; SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_smfmac_f32_16x16x128_fp8_bf8__sgpr: @@ -1996,30 +1918,25 @@ define <4 x float> @test_smfmac_f32_16x16x128_fp8_fp8__sgpr(<4 x i32> inreg %arg ; SDAG-LABEL: test_smfmac_f32_16x16x128_fp8_fp8__sgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v10, s0 -; SDAG-NEXT: v_mov_b32_e32 v11, s1 -; SDAG-NEXT: v_mov_b32_e32 v12, s2 -; SDAG-NEXT: v_mov_b32_e32 v13, s3 -; SDAG-NEXT: v_mov_b32_e32 v2, s16 -; SDAG-NEXT: v_mov_b32_e32 v3, s17 -; SDAG-NEXT: v_mov_b32_e32 v4, s18 -; SDAG-NEXT: v_mov_b32_e32 v5, s19 -; SDAG-NEXT: v_mov_b32_e32 v6, s20 -; SDAG-NEXT: v_mov_b32_e32 v7, s21 -; SDAG-NEXT: v_mov_b32_e32 v8, s22 -; SDAG-NEXT: v_mov_b32_e32 v9, s23 -; SDAG-NEXT: v_accvgpr_write_b32 a0, s24 -; SDAG-NEXT: v_accvgpr_write_b32 a1, s25 -; SDAG-NEXT: v_accvgpr_write_b32 a2, s26 -; SDAG-NEXT: v_accvgpr_write_b32 a3, s27 -; SDAG-NEXT: v_mov_b32_e32 v0, s28 +; SDAG-NEXT: v_mov_b32_e32 v14, s0 +; SDAG-NEXT: v_mov_b32_e32 v15, s1 +; SDAG-NEXT: v_mov_b32_e32 v16, s2 +; SDAG-NEXT: v_mov_b32_e32 v17, s3 +; SDAG-NEXT: v_mov_b32_e32 v6, s16 +; SDAG-NEXT: v_mov_b32_e32 v7, s17 +; SDAG-NEXT: v_mov_b32_e32 v8, s18 +; SDAG-NEXT: v_mov_b32_e32 v9, s19 +; SDAG-NEXT: v_mov_b32_e32 v10, s20 +; SDAG-NEXT: v_mov_b32_e32 v11, s21 +; SDAG-NEXT: v_mov_b32_e32 v12, s22 +; SDAG-NEXT: v_mov_b32_e32 v13, s23 +; SDAG-NEXT: v_mov_b32_e32 v0, s24 +; SDAG-NEXT: v_mov_b32_e32 v1, s25 +; SDAG-NEXT: v_mov_b32_e32 v2, s26 +; SDAG-NEXT: v_mov_b32_e32 v3, s27 +; SDAG-NEXT: v_mov_b32_e32 v4, s28 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 a[0:3], v[10:13], v[2:9], v0 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 -; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 -; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 -; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 v[0:3], v[14:17], v[6:13], v4 ; SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_smfmac_f32_16x16x128_fp8_fp8__sgpr: @@ -2318,53 +2235,37 @@ define <16 x float> @test_smfmac_f32_32x32x64_bf8_bf8__sgpr(<4 x i32> inreg %arg ; SDAG-LABEL: test_smfmac_f32_32x32x64_bf8_bf8__sgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v36, s0 -; SDAG-NEXT: v_mov_b32_e32 v37, s1 -; SDAG-NEXT: v_mov_b32_e32 v38, s2 -; SDAG-NEXT: v_mov_b32_e32 v39, s3 -; SDAG-NEXT: v_mov_b32_e32 v13, s25 -; SDAG-NEXT: v_mov_b32_e32 v14, s26 -; SDAG-NEXT: v_mov_b32_e32 v15, s27 -; SDAG-NEXT: v_mov_b32_e32 v16, s28 -; SDAG-NEXT: v_mov_b32_e32 v17, s29 -; SDAG-NEXT: v_mov_b32_e32 v28, s16 -; SDAG-NEXT: v_mov_b32_e32 v29, s17 -; SDAG-NEXT: v_mov_b32_e32 v30, s18 -; SDAG-NEXT: v_mov_b32_e32 v31, s19 -; SDAG-NEXT: v_mov_b32_e32 v32, s20 -; SDAG-NEXT: v_mov_b32_e32 v33, s21 -; SDAG-NEXT: v_mov_b32_e32 v34, s22 -; SDAG-NEXT: v_mov_b32_e32 v35, s23 -; SDAG-NEXT: v_mov_b32_e32 v12, s24 -; SDAG-NEXT: v_mov_b32_e32 v18, v0 -; SDAG-NEXT: v_mov_b32_e32 v19, v1 -; SDAG-NEXT: v_mov_b32_e32 v20, v2 -; SDAG-NEXT: v_mov_b32_e32 v21, v3 -; SDAG-NEXT: v_mov_b32_e32 v22, v4 -; SDAG-NEXT: v_mov_b32_e32 v23, v5 -; SDAG-NEXT: v_mov_b32_e32 v24, v6 -; SDAG-NEXT: v_mov_b32_e32 v25, v7 -; SDAG-NEXT: v_mov_b32_e32 v26, v8 -; SDAG-NEXT: v_mov_b32_e32 v27, v9 +; SDAG-NEXT: v_mov_b32_e32 v26, s0 +; SDAG-NEXT: v_mov_b32_e32 v27, s1 +; SDAG-NEXT: v_mov_b32_e32 v28, s2 +; SDAG-NEXT: v_mov_b32_e32 v29, s3 +; SDAG-NEXT: v_mov_b32_e32 v16, v10 +; SDAG-NEXT: v_mov_b32_e32 v15, v9 +; SDAG-NEXT: v_mov_b32_e32 v14, v8 +; SDAG-NEXT: v_mov_b32_e32 v13, v7 +; SDAG-NEXT: v_mov_b32_e32 v12, v6 +; SDAG-NEXT: v_mov_b32_e32 v11, v5 +; SDAG-NEXT: v_mov_b32_e32 v10, v4 +; SDAG-NEXT: v_mov_b32_e32 v9, v3 +; SDAG-NEXT: v_mov_b32_e32 v8, v2 +; SDAG-NEXT: v_mov_b32_e32 v7, v1 +; SDAG-NEXT: v_mov_b32_e32 v6, v0 +; SDAG-NEXT: v_mov_b32_e32 v0, s24 +; SDAG-NEXT: v_mov_b32_e32 v1, s25 +; SDAG-NEXT: v_mov_b32_e32 v2, s26 +; SDAG-NEXT: v_mov_b32_e32 v3, s27 +; SDAG-NEXT: v_mov_b32_e32 v4, s28 +; SDAG-NEXT: v_mov_b32_e32 v5, s29 +; SDAG-NEXT: v_mov_b32_e32 v18, s16 +; SDAG-NEXT: v_mov_b32_e32 v19, s17 +; SDAG-NEXT: v_mov_b32_e32 v20, s18 +; SDAG-NEXT: v_mov_b32_e32 v21, s19 +; SDAG-NEXT: v_mov_b32_e32 v22, s20 +; SDAG-NEXT: v_mov_b32_e32 v23, s21 +; SDAG-NEXT: v_mov_b32_e32 v24, s22 +; SDAG-NEXT: v_mov_b32_e32 v25, s23 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 v[12:27], v[36:39], v[28:35], v10 -; SDAG-NEXT: s_nop 11 -; SDAG-NEXT: v_mov_b32_e32 v0, v12 -; SDAG-NEXT: v_mov_b32_e32 v1, v13 -; SDAG-NEXT: v_mov_b32_e32 v2, v14 -; SDAG-NEXT: v_mov_b32_e32 v3, v15 -; SDAG-NEXT: v_mov_b32_e32 v4, v16 -; SDAG-NEXT: v_mov_b32_e32 v5, v17 -; SDAG-NEXT: v_mov_b32_e32 v6, v18 -; SDAG-NEXT: v_mov_b32_e32 v7, v19 -; SDAG-NEXT: v_mov_b32_e32 v8, v20 -; SDAG-NEXT: v_mov_b32_e32 v9, v21 -; SDAG-NEXT: v_mov_b32_e32 v10, v22 -; SDAG-NEXT: v_mov_b32_e32 v11, v23 -; SDAG-NEXT: v_mov_b32_e32 v12, v24 -; SDAG-NEXT: v_mov_b32_e32 v13, v25 -; SDAG-NEXT: v_mov_b32_e32 v14, v26 -; SDAG-NEXT: v_mov_b32_e32 v15, v27 +; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 v[0:15], v[26:29], v[18:25], v16 ; SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_smfmac_f32_32x32x64_bf8_bf8__sgpr: @@ -2685,53 +2586,37 @@ define <16 x float> @test_smfmac_f32_32x32x64_bf8_fp8__sgpr(<4 x i32> inreg %arg ; SDAG-LABEL: test_smfmac_f32_32x32x64_bf8_fp8__sgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v36, s0 -; SDAG-NEXT: v_mov_b32_e32 v37, s1 -; SDAG-NEXT: v_mov_b32_e32 v38, s2 -; SDAG-NEXT: v_mov_b32_e32 v39, s3 -; SDAG-NEXT: v_mov_b32_e32 v13, s25 -; SDAG-NEXT: v_mov_b32_e32 v14, s26 -; SDAG-NEXT: v_mov_b32_e32 v15, s27 -; SDAG-NEXT: v_mov_b32_e32 v16, s28 -; SDAG-NEXT: v_mov_b32_e32 v17, s29 -; SDAG-NEXT: v_mov_b32_e32 v28, s16 -; SDAG-NEXT: v_mov_b32_e32 v29, s17 -; SDAG-NEXT: v_mov_b32_e32 v30, s18 -; SDAG-NEXT: v_mov_b32_e32 v31, s19 -; SDAG-NEXT: v_mov_b32_e32 v32, s20 -; SDAG-NEXT: v_mov_b32_e32 v33, s21 -; SDAG-NEXT: v_mov_b32_e32 v34, s22 -; SDAG-NEXT: v_mov_b32_e32 v35, s23 -; SDAG-NEXT: v_mov_b32_e32 v12, s24 -; SDAG-NEXT: v_mov_b32_e32 v18, v0 -; SDAG-NEXT: v_mov_b32_e32 v19, v1 -; SDAG-NEXT: v_mov_b32_e32 v20, v2 -; SDAG-NEXT: v_mov_b32_e32 v21, v3 -; SDAG-NEXT: v_mov_b32_e32 v22, v4 -; SDAG-NEXT: v_mov_b32_e32 v23, v5 -; SDAG-NEXT: v_mov_b32_e32 v24, v6 -; SDAG-NEXT: v_mov_b32_e32 v25, v7 -; SDAG-NEXT: v_mov_b32_e32 v26, v8 -; SDAG-NEXT: v_mov_b32_e32 v27, v9 +; SDAG-NEXT: v_mov_b32_e32 v26, s0 +; SDAG-NEXT: v_mov_b32_e32 v27, s1 +; SDAG-NEXT: v_mov_b32_e32 v28, s2 +; SDAG-NEXT: v_mov_b32_e32 v29, s3 +; SDAG-NEXT: v_mov_b32_e32 v16, v10 +; SDAG-NEXT: v_mov_b32_e32 v15, v9 +; SDAG-NEXT: v_mov_b32_e32 v14, v8 +; SDAG-NEXT: v_mov_b32_e32 v13, v7 +; SDAG-NEXT: v_mov_b32_e32 v12, v6 +; SDAG-NEXT: v_mov_b32_e32 v11, v5 +; SDAG-NEXT: v_mov_b32_e32 v10, v4 +; SDAG-NEXT: v_mov_b32_e32 v9, v3 +; SDAG-NEXT: v_mov_b32_e32 v8, v2 +; SDAG-NEXT: v_mov_b32_e32 v7, v1 +; SDAG-NEXT: v_mov_b32_e32 v6, v0 +; SDAG-NEXT: v_mov_b32_e32 v0, s24 +; SDAG-NEXT: v_mov_b32_e32 v1, s25 +; SDAG-NEXT: v_mov_b32_e32 v2, s26 +; SDAG-NEXT: v_mov_b32_e32 v3, s27 +; SDAG-NEXT: v_mov_b32_e32 v4, s28 +; SDAG-NEXT: v_mov_b32_e32 v5, s29 +; SDAG-NEXT: v_mov_b32_e32 v18, s16 +; SDAG-NEXT: v_mov_b32_e32 v19, s17 +; SDAG-NEXT: v_mov_b32_e32 v20, s18 +; SDAG-NEXT: v_mov_b32_e32 v21, s19 +; SDAG-NEXT: v_mov_b32_e32 v22, s20 +; SDAG-NEXT: v_mov_b32_e32 v23, s21 +; SDAG-NEXT: v_mov_b32_e32 v24, s22 +; SDAG-NEXT: v_mov_b32_e32 v25, s23 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 v[12:27], v[36:39], v[28:35], v10 -; SDAG-NEXT: s_nop 11 -; SDAG-NEXT: v_mov_b32_e32 v0, v12 -; SDAG-NEXT: v_mov_b32_e32 v1, v13 -; SDAG-NEXT: v_mov_b32_e32 v2, v14 -; SDAG-NEXT: v_mov_b32_e32 v3, v15 -; SDAG-NEXT: v_mov_b32_e32 v4, v16 -; SDAG-NEXT: v_mov_b32_e32 v5, v17 -; SDAG-NEXT: v_mov_b32_e32 v6, v18 -; SDAG-NEXT: v_mov_b32_e32 v7, v19 -; SDAG-NEXT: v_mov_b32_e32 v8, v20 -; SDAG-NEXT: v_mov_b32_e32 v9, v21 -; SDAG-NEXT: v_mov_b32_e32 v10, v22 -; SDAG-NEXT: v_mov_b32_e32 v11, v23 -; SDAG-NEXT: v_mov_b32_e32 v12, v24 -; SDAG-NEXT: v_mov_b32_e32 v13, v25 -; SDAG-NEXT: v_mov_b32_e32 v14, v26 -; SDAG-NEXT: v_mov_b32_e32 v15, v27 +; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 v[0:15], v[26:29], v[18:25], v16 ; SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_smfmac_f32_32x32x64_bf8_fp8__sgpr: @@ -3052,53 +2937,37 @@ define <16 x float> @test_smfmac_f32_32x32x64_fp8_bf8__sgpr(<4 x i32> inreg %arg ; SDAG-LABEL: test_smfmac_f32_32x32x64_fp8_bf8__sgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v36, s0 -; SDAG-NEXT: v_mov_b32_e32 v37, s1 -; SDAG-NEXT: v_mov_b32_e32 v38, s2 -; SDAG-NEXT: v_mov_b32_e32 v39, s3 -; SDAG-NEXT: v_mov_b32_e32 v13, s25 -; SDAG-NEXT: v_mov_b32_e32 v14, s26 -; SDAG-NEXT: v_mov_b32_e32 v15, s27 -; SDAG-NEXT: v_mov_b32_e32 v16, s28 -; SDAG-NEXT: v_mov_b32_e32 v17, s29 -; SDAG-NEXT: v_mov_b32_e32 v28, s16 -; SDAG-NEXT: v_mov_b32_e32 v29, s17 -; SDAG-NEXT: v_mov_b32_e32 v30, s18 -; SDAG-NEXT: v_mov_b32_e32 v31, s19 -; SDAG-NEXT: v_mov_b32_e32 v32, s20 -; SDAG-NEXT: v_mov_b32_e32 v33, s21 -; SDAG-NEXT: v_mov_b32_e32 v34, s22 -; SDAG-NEXT: v_mov_b32_e32 v35, s23 -; SDAG-NEXT: v_mov_b32_e32 v12, s24 -; SDAG-NEXT: v_mov_b32_e32 v18, v0 -; SDAG-NEXT: v_mov_b32_e32 v19, v1 -; SDAG-NEXT: v_mov_b32_e32 v20, v2 -; SDAG-NEXT: v_mov_b32_e32 v21, v3 -; SDAG-NEXT: v_mov_b32_e32 v22, v4 -; SDAG-NEXT: v_mov_b32_e32 v23, v5 -; SDAG-NEXT: v_mov_b32_e32 v24, v6 -; SDAG-NEXT: v_mov_b32_e32 v25, v7 -; SDAG-NEXT: v_mov_b32_e32 v26, v8 -; SDAG-NEXT: v_mov_b32_e32 v27, v9 +; SDAG-NEXT: v_mov_b32_e32 v26, s0 +; SDAG-NEXT: v_mov_b32_e32 v27, s1 +; SDAG-NEXT: v_mov_b32_e32 v28, s2 +; SDAG-NEXT: v_mov_b32_e32 v29, s3 +; SDAG-NEXT: v_mov_b32_e32 v16, v10 +; SDAG-NEXT: v_mov_b32_e32 v15, v9 +; SDAG-NEXT: v_mov_b32_e32 v14, v8 +; SDAG-NEXT: v_mov_b32_e32 v13, v7 +; SDAG-NEXT: v_mov_b32_e32 v12, v6 +; SDAG-NEXT: v_mov_b32_e32 v11, v5 +; SDAG-NEXT: v_mov_b32_e32 v10, v4 +; SDAG-NEXT: v_mov_b32_e32 v9, v3 +; SDAG-NEXT: v_mov_b32_e32 v8, v2 +; SDAG-NEXT: v_mov_b32_e32 v7, v1 +; SDAG-NEXT: v_mov_b32_e32 v6, v0 +; SDAG-NEXT: v_mov_b32_e32 v0, s24 +; SDAG-NEXT: v_mov_b32_e32 v1, s25 +; SDAG-NEXT: v_mov_b32_e32 v2, s26 +; SDAG-NEXT: v_mov_b32_e32 v3, s27 +; SDAG-NEXT: v_mov_b32_e32 v4, s28 +; SDAG-NEXT: v_mov_b32_e32 v5, s29 +; SDAG-NEXT: v_mov_b32_e32 v18, s16 +; SDAG-NEXT: v_mov_b32_e32 v19, s17 +; SDAG-NEXT: v_mov_b32_e32 v20, s18 +; SDAG-NEXT: v_mov_b32_e32 v21, s19 +; SDAG-NEXT: v_mov_b32_e32 v22, s20 +; SDAG-NEXT: v_mov_b32_e32 v23, s21 +; SDAG-NEXT: v_mov_b32_e32 v24, s22 +; SDAG-NEXT: v_mov_b32_e32 v25, s23 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 v[12:27], v[36:39], v[28:35], v10 -; SDAG-NEXT: s_nop 11 -; SDAG-NEXT: v_mov_b32_e32 v0, v12 -; SDAG-NEXT: v_mov_b32_e32 v1, v13 -; SDAG-NEXT: v_mov_b32_e32 v2, v14 -; SDAG-NEXT: v_mov_b32_e32 v3, v15 -; SDAG-NEXT: v_mov_b32_e32 v4, v16 -; SDAG-NEXT: v_mov_b32_e32 v5, v17 -; SDAG-NEXT: v_mov_b32_e32 v6, v18 -; SDAG-NEXT: v_mov_b32_e32 v7, v19 -; SDAG-NEXT: v_mov_b32_e32 v8, v20 -; SDAG-NEXT: v_mov_b32_e32 v9, v21 -; SDAG-NEXT: v_mov_b32_e32 v10, v22 -; SDAG-NEXT: v_mov_b32_e32 v11, v23 -; SDAG-NEXT: v_mov_b32_e32 v12, v24 -; SDAG-NEXT: v_mov_b32_e32 v13, v25 -; SDAG-NEXT: v_mov_b32_e32 v14, v26 -; SDAG-NEXT: v_mov_b32_e32 v15, v27 +; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 v[0:15], v[26:29], v[18:25], v16 ; SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_smfmac_f32_32x32x64_fp8_bf8__sgpr: @@ -3419,53 +3288,37 @@ define <16 x float> @test_smfmac_f32_32x32x64_fp8_fp8__sgpr(<4 x i32> inreg %arg ; SDAG-LABEL: test_smfmac_f32_32x32x64_fp8_fp8__sgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v36, s0 -; SDAG-NEXT: v_mov_b32_e32 v37, s1 -; SDAG-NEXT: v_mov_b32_e32 v38, s2 -; SDAG-NEXT: v_mov_b32_e32 v39, s3 -; SDAG-NEXT: v_mov_b32_e32 v13, s25 -; SDAG-NEXT: v_mov_b32_e32 v14, s26 -; SDAG-NEXT: v_mov_b32_e32 v15, s27 -; SDAG-NEXT: v_mov_b32_e32 v16, s28 -; SDAG-NEXT: v_mov_b32_e32 v17, s29 -; SDAG-NEXT: v_mov_b32_e32 v28, s16 -; SDAG-NEXT: v_mov_b32_e32 v29, s17 -; SDAG-NEXT: v_mov_b32_e32 v30, s18 -; SDAG-NEXT: v_mov_b32_e32 v31, s19 -; SDAG-NEXT: v_mov_b32_e32 v32, s20 -; SDAG-NEXT: v_mov_b32_e32 v33, s21 -; SDAG-NEXT: v_mov_b32_e32 v34, s22 -; SDAG-NEXT: v_mov_b32_e32 v35, s23 -; SDAG-NEXT: v_mov_b32_e32 v12, s24 -; SDAG-NEXT: v_mov_b32_e32 v18, v0 -; SDAG-NEXT: v_mov_b32_e32 v19, v1 -; SDAG-NEXT: v_mov_b32_e32 v20, v2 -; SDAG-NEXT: v_mov_b32_e32 v21, v3 -; SDAG-NEXT: v_mov_b32_e32 v22, v4 -; SDAG-NEXT: v_mov_b32_e32 v23, v5 -; SDAG-NEXT: v_mov_b32_e32 v24, v6 -; SDAG-NEXT: v_mov_b32_e32 v25, v7 -; SDAG-NEXT: v_mov_b32_e32 v26, v8 -; SDAG-NEXT: v_mov_b32_e32 v27, v9 +; SDAG-NEXT: v_mov_b32_e32 v26, s0 +; SDAG-NEXT: v_mov_b32_e32 v27, s1 +; SDAG-NEXT: v_mov_b32_e32 v28, s2 +; SDAG-NEXT: v_mov_b32_e32 v29, s3 +; SDAG-NEXT: v_mov_b32_e32 v16, v10 +; SDAG-NEXT: v_mov_b32_e32 v15, v9 +; SDAG-NEXT: v_mov_b32_e32 v14, v8 +; SDAG-NEXT: v_mov_b32_e32 v13, v7 +; SDAG-NEXT: v_mov_b32_e32 v12, v6 +; SDAG-NEXT: v_mov_b32_e32 v11, v5 +; SDAG-NEXT: v_mov_b32_e32 v10, v4 +; SDAG-NEXT: v_mov_b32_e32 v9, v3 +; SDAG-NEXT: v_mov_b32_e32 v8, v2 +; SDAG-NEXT: v_mov_b32_e32 v7, v1 +; SDAG-NEXT: v_mov_b32_e32 v6, v0 +; SDAG-NEXT: v_mov_b32_e32 v0, s24 +; SDAG-NEXT: v_mov_b32_e32 v1, s25 +; SDAG-NEXT: v_mov_b32_e32 v2, s26 +; SDAG-NEXT: v_mov_b32_e32 v3, s27 +; SDAG-NEXT: v_mov_b32_e32 v4, s28 +; SDAG-NEXT: v_mov_b32_e32 v5, s29 +; SDAG-NEXT: v_mov_b32_e32 v18, s16 +; SDAG-NEXT: v_mov_b32_e32 v19, s17 +; SDAG-NEXT: v_mov_b32_e32 v20, s18 +; SDAG-NEXT: v_mov_b32_e32 v21, s19 +; SDAG-NEXT: v_mov_b32_e32 v22, s20 +; SDAG-NEXT: v_mov_b32_e32 v23, s21 +; SDAG-NEXT: v_mov_b32_e32 v24, s22 +; SDAG-NEXT: v_mov_b32_e32 v25, s23 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 v[12:27], v[36:39], v[28:35], v10 -; SDAG-NEXT: s_nop 11 -; SDAG-NEXT: v_mov_b32_e32 v0, v12 -; SDAG-NEXT: v_mov_b32_e32 v1, v13 -; SDAG-NEXT: v_mov_b32_e32 v2, v14 -; SDAG-NEXT: v_mov_b32_e32 v3, v15 -; SDAG-NEXT: v_mov_b32_e32 v4, v16 -; SDAG-NEXT: v_mov_b32_e32 v5, v17 -; SDAG-NEXT: v_mov_b32_e32 v6, v18 -; SDAG-NEXT: v_mov_b32_e32 v7, v19 -; SDAG-NEXT: v_mov_b32_e32 v8, v20 -; SDAG-NEXT: v_mov_b32_e32 v9, v21 -; SDAG-NEXT: v_mov_b32_e32 v10, v22 -; SDAG-NEXT: v_mov_b32_e32 v11, v23 -; SDAG-NEXT: v_mov_b32_e32 v12, v24 -; SDAG-NEXT: v_mov_b32_e32 v13, v25 -; SDAG-NEXT: v_mov_b32_e32 v14, v26 -; SDAG-NEXT: v_mov_b32_e32 v15, v27 +; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 v[0:15], v[26:29], v[18:25], v16 ; SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_smfmac_f32_32x32x64_fp8_fp8__sgpr: diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll index b9e9893..9a23788 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll @@ -369,7 +369,7 @@ define amdgpu_kernel void @illegal_mfma_after_rewrite() #1 { ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_mov_b32 s0, 0 ; CHECK-NEXT: s_mov_b32 s1, s0 -; CHECK-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; CHECK-NEXT: v_mov_b64_e32 v[28:29], s[0:1] ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def s[0:3] ; CHECK-NEXT: ;;#ASMEND @@ -378,73 +378,66 @@ define amdgpu_kernel void @illegal_mfma_after_rewrite() #1 { ; CHECK-NEXT: v_mov_b64_e32 v[4:5], s[0:1] ; CHECK-NEXT: s_mov_b32 s0, 0x3c003c00 ; CHECK-NEXT: s_mov_b32 s1, s0 -; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[0:3], v[8:9], v[8:9], v[4:7] -; CHECK-NEXT: v_mov_b64_e32 v[12:13], s[0:1] +; CHECK-NEXT: v_mov_b64_e32 v[30:31], s[0:1] ; CHECK-NEXT: s_mov_b32 s0, 0x7e007e00 ; CHECK-NEXT: s_mov_b32 s1, s0 -; CHECK-NEXT: v_mov_b64_e32 v[10:11], s[0:1] -; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[14:17], v[8:9], v[12:13], v[4:7] -; CHECK-NEXT: s_nop 1 -; CHECK-NEXT: v_accvgpr_write_b32 a0, v0 -; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[18:21], v[8:9], v[10:11], v[4:7] -; CHECK-NEXT: v_accvgpr_write_b32 a1, v1 -; CHECK-NEXT: v_accvgpr_write_b32 a2, v2 -; CHECK-NEXT: v_accvgpr_write_b32 a3, v3 +; CHECK-NEXT: v_accvgpr_write_b32 a0, s0 +; CHECK-NEXT: v_accvgpr_write_b32 a1, s1 +; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[0:3], v[28:29], v[28:29], v[4:7] +; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[8:11], v[28:29], v[30:31], v[4:7] +; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[12:15], v[28:29], a[0:1], v[4:7] +; CHECK-NEXT: s_nop 2 ; CHECK-NEXT: v_mov_b32_e32 v4, 0x7fc00000 ; CHECK-NEXT: v_mov_b32_e32 v5, v4 ; CHECK-NEXT: v_mov_b32_e32 v6, v4 ; CHECK-NEXT: v_mov_b32_e32 v7, v4 -; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[14:17], v[8:9], v[8:9], v[14:17] +; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[8:11], v[28:29], v[28:29], v[8:11] ; CHECK-NEXT: s_nop 0 -; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[22:25], v[8:9], v[8:9], v[4:7] +; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[16:19], v[28:29], v[28:29], v[4:7] ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def v[4:7] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_nop 0 -; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[0:3], v[8:9], v[12:13], v[4:7] -; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[26:29], v[8:9], v[8:9], v[4:7] -; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[0:3], v[8:9], v[8:9], v[0:3] -; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[22:25], v[8:9], v[8:9], v[22:25] -; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[4:7], v[8:9], v[8:9], v[26:29] +; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[16:19], v[28:29], v[28:29], v[16:19] +; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[24:27], v[28:29], v[30:31], v[4:7] ; CHECK-NEXT: s_nop 5 -; CHECK-NEXT: v_cvt_f16_f32_e32 v23, v14 -; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[14:17], v[8:9], v[8:9], v[18:21] -; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[0:3], v[12:13], v[8:9], v[0:3] -; CHECK-NEXT: s_nop 1 -; CHECK-NEXT: v_accvgpr_read_b32 v19, a3 -; CHECK-NEXT: v_accvgpr_read_b32 v18, a2 -; CHECK-NEXT: v_mov_b64_e32 v[20:21], 0 -; CHECK-NEXT: s_nop 0 -; CHECK-NEXT: v_accvgpr_read_b32 v17, a1 -; CHECK-NEXT: v_accvgpr_read_b32 v16, a0 -; CHECK-NEXT: v_cvt_f16_f32_e32 v15, v22 -; CHECK-NEXT: v_cvt_f16_f32_e32 v14, v14 -; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[16:19], v[8:9], v[8:9], v[16:19] -; CHECK-NEXT: v_cvt_f16_f32_e32 v12, v0 -; CHECK-NEXT: global_store_short v[20:21], v23, off +; CHECK-NEXT: v_cvt_f16_f32_e32 v17, v8 +; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[8:11], v[28:29], v[28:29], v[12:15] +; CHECK-NEXT: s_nop 2 +; CHECK-NEXT: v_mov_b64_e32 v[12:13], 0 +; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[0:3], v[28:29], v[28:29], v[0:3] +; CHECK-NEXT: global_store_short v[12:13], v17, off ; CHECK-NEXT: buffer_wbl2 sc0 sc1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: buffer_inv sc0 sc1 -; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[0:3], v[10:11], v[8:9], v[4:7] -; CHECK-NEXT: global_store_short v[20:21], v15, off +; CHECK-NEXT: v_cvt_f16_f32_e32 v9, v16 +; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[20:23], v[28:29], v[28:29], v[4:7] +; CHECK-NEXT: global_store_short v[12:13], v9, off +; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v8 +; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[8:11], v[28:29], v[28:29], v[24:27] ; CHECK-NEXT: buffer_wbl2 sc0 sc1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: buffer_inv sc0 sc1 -; CHECK-NEXT: global_store_short v[20:21], v14, off -; CHECK-NEXT: v_cvt_f16_f32_e32 v14, v16 +; CHECK-NEXT: v_cvt_f16_f32_e32 v14, v0 +; CHECK-NEXT: global_store_short v[12:13], v1, off +; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[4:7], v[28:29], v[28:29], v[20:23] ; CHECK-NEXT: buffer_wbl2 sc0 sc1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: buffer_inv sc0 sc1 -; CHECK-NEXT: global_store_short v[20:21], v14, off -; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CHECK-NEXT: global_store_short v[12:13], v14, off ; CHECK-NEXT: buffer_wbl2 sc0 sc1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: buffer_inv sc0 sc1 -; CHECK-NEXT: global_store_short v[20:21], v12, off +; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[0:3], v[30:31], v[28:29], v[8:11] +; CHECK-NEXT: s_nop 6 +; CHECK-NEXT: v_cvt_f16_f32_e32 v8, v0 +; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[0:3], a[0:1], v[28:29], v[4:7] +; CHECK-NEXT: global_store_short v[12:13], v8, off ; CHECK-NEXT: buffer_wbl2 sc0 sc1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: buffer_inv sc0 sc1 -; CHECK-NEXT: global_store_short v[20:21], v0, off +; CHECK-NEXT: s_nop 2 +; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CHECK-NEXT: global_store_short v[12:13], v0, off ; CHECK-NEXT: s_endpgm entry: %k0 = call <4 x float> asm sideeffect "; def $0", "=s"() diff --git a/llvm/test/CodeGen/AMDGPU/smfmac_alloc_failure_no_agpr_O0.ll b/llvm/test/CodeGen/AMDGPU/smfmac_alloc_failure_no_agpr_O0.ll new file mode 100644 index 0000000..ba0fdc68 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/smfmac_alloc_failure_no_agpr_O0.ll @@ -0,0 +1,119 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx950 -amdgpu-mfma-vgpr-form=0 < %s | FileCheck %s +; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx950 -amdgpu-mfma-vgpr-form=1 < %s | FileCheck %s + +declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.f16(<8 x half>, <16 x half>, <16 x float>, i32, i32 immarg, i32 immarg) + +define amdgpu_kernel void @test_smfmac_f32_32x32x32_f16__vgpr(ptr addrspace(1) %arg, <8 x half> %a, <16 x half> %b, i32 %idx) #0 { +; CHECK-LABEL: test_smfmac_f32_32x32x32_f16__vgpr: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: s_mov_b64 s[2:3], s[4:5] +; CHECK-NEXT: v_mov_b32_e32 v1, v0 +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 +; CHECK-NEXT: s_load_dwordx4 s[12:15], s[2:3], 0x34 +; CHECK-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x44 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_load_dword s2, s[2:3], 0x64 +; CHECK-NEXT: s_mov_b32 s3, 0x3ff +; CHECK-NEXT: v_and_b32_e64 v1, v1, s3 +; CHECK-NEXT: s_mov_b32 s3, 6 +; CHECK-NEXT: v_lshlrev_b32_e64 v8, s3, v1 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_load_dwordx4 v[4:7], v8, s[0:1] offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v1, v7 +; CHECK-NEXT: v_mov_b32_e32 v2, v6 +; CHECK-NEXT: v_mov_b32_e32 v3, v5 +; CHECK-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec +; CHECK-NEXT: global_load_dwordx4 v[10:13], v8, s[0:1] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v5, v13 +; CHECK-NEXT: v_mov_b32_e32 v6, v12 +; CHECK-NEXT: v_mov_b32_e32 v7, v11 +; CHECK-NEXT: v_mov_b32_e32 v24, v10 +; CHECK-NEXT: global_load_dwordx4 v[10:13], v8, s[0:1] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v25, v13 +; CHECK-NEXT: v_mov_b32_e32 v26, v12 +; CHECK-NEXT: v_mov_b32_e32 v27, v11 +; CHECK-NEXT: v_mov_b32_e32 v28, v10 +; CHECK-NEXT: global_load_dwordx4 v[8:11], v8, s[0:1] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v29, v11 +; CHECK-NEXT: v_mov_b32_e32 v30, v10 +; CHECK-NEXT: v_mov_b32_e32 v31, v9 +; CHECK-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec +; CHECK-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 killed $exec +; CHECK-NEXT: v_mov_b32_e32 v9, v31 +; CHECK-NEXT: v_mov_b32_e32 v10, v30 +; CHECK-NEXT: v_mov_b32_e32 v11, v29 +; CHECK-NEXT: v_mov_b32_e32 v12, v28 +; CHECK-NEXT: v_mov_b32_e32 v13, v27 +; CHECK-NEXT: v_mov_b32_e32 v14, v26 +; CHECK-NEXT: v_mov_b32_e32 v15, v25 +; CHECK-NEXT: v_mov_b32_e32 v16, v24 +; CHECK-NEXT: v_mov_b32_e32 v17, v7 +; CHECK-NEXT: v_mov_b32_e32 v18, v6 +; CHECK-NEXT: v_mov_b32_e32 v19, v5 +; CHECK-NEXT: v_mov_b32_e32 v20, v4 +; CHECK-NEXT: v_mov_b32_e32 v21, v3 +; CHECK-NEXT: v_mov_b32_e32 v22, v2 +; CHECK-NEXT: v_mov_b32_e32 v23, v1 +; CHECK-NEXT: v_mov_b64_e32 v[2:3], s[12:13] +; CHECK-NEXT: v_mov_b64_e32 v[4:5], s[14:15] +; CHECK-NEXT: v_mov_b64_e32 v[30:31], s[10:11] +; CHECK-NEXT: v_mov_b64_e32 v[28:29], s[8:9] +; CHECK-NEXT: v_mov_b64_e32 v[26:27], s[6:7] +; CHECK-NEXT: v_mov_b64_e32 v[24:25], s[4:5] +; CHECK-NEXT: v_mov_b32_e32 v1, s2 +; CHECK-NEXT: s_nop 1 +; CHECK-NEXT: v_smfmac_f32_32x32x32_f16 v[8:23], v[2:5], v[24:31], v1 cbsz:1 abid:2 +; CHECK-NEXT: s_nop 11 +; CHECK-NEXT: v_mov_b32_e32 v1, v23 +; CHECK-NEXT: v_mov_b32_e32 v6, v22 +; CHECK-NEXT: v_mov_b32_e32 v7, v21 +; CHECK-NEXT: v_mov_b32_e32 v2, v20 +; CHECK-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec +; CHECK-NEXT: v_mov_b32_e32 v3, v7 +; CHECK-NEXT: v_mov_b32_e32 v4, v6 +; CHECK-NEXT: v_mov_b32_e32 v5, v1 +; CHECK-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:48 +; CHECK-NEXT: v_mov_b32_e32 v1, v19 +; CHECK-NEXT: v_mov_b32_e32 v6, v18 +; CHECK-NEXT: v_mov_b32_e32 v7, v17 +; CHECK-NEXT: v_mov_b32_e32 v2, v16 +; CHECK-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec +; CHECK-NEXT: v_mov_b32_e32 v3, v7 +; CHECK-NEXT: v_mov_b32_e32 v4, v6 +; CHECK-NEXT: v_mov_b32_e32 v5, v1 +; CHECK-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:32 +; CHECK-NEXT: v_mov_b32_e32 v1, v15 +; CHECK-NEXT: v_mov_b32_e32 v6, v14 +; CHECK-NEXT: v_mov_b32_e32 v7, v13 +; CHECK-NEXT: v_mov_b32_e32 v2, v12 +; CHECK-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec +; CHECK-NEXT: v_mov_b32_e32 v3, v7 +; CHECK-NEXT: v_mov_b32_e32 v4, v6 +; CHECK-NEXT: v_mov_b32_e32 v5, v1 +; CHECK-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:16 +; CHECK-NEXT: v_mov_b32_e32 v1, v11 +; CHECK-NEXT: v_mov_b32_e32 v6, v10 +; CHECK-NEXT: v_mov_b32_e32 v7, v9 +; CHECK-NEXT: v_mov_b32_e32 v2, v8 +; CHECK-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec +; CHECK-NEXT: v_mov_b32_e32 v3, v7 +; CHECK-NEXT: v_mov_b32_e32 v4, v6 +; CHECK-NEXT: v_mov_b32_e32 v5, v1 +; CHECK-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] +; CHECK-NEXT: s_endpgm +bb: + %id = call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr <16 x float>, ptr addrspace(1) %arg, i32 %id + %in.1 = load <16 x float>, ptr addrspace(1) %gep + %mai.1 = tail call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.f16(<8 x half> %a, <16 x half> %b, <16 x float> %in.1, i32 %idx, i32 1, i32 2) + store <16 x float> %mai.1, ptr addrspace(1) %arg + ret void +} + +attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-agpr-alloc"="0,0" } diff --git a/llvm/test/CodeGen/RISCV/float-imm.ll b/llvm/test/CodeGen/RISCV/float-imm.ll index e4e3454..610c72b 100644 --- a/llvm/test/CodeGen/RISCV/float-imm.ll +++ b/llvm/test/CodeGen/RISCV/float-imm.ll @@ -4,11 +4,10 @@ ; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \ ; RUN: -target-abi=lp64f | FileCheck %s ; RUN: llc -mtriple=riscv32 -mattr=+zfinx -verify-machineinstrs < %s \ -; RUN: -target-abi=ilp32 | FileCheck --check-prefixes=CHECKZFINX,RV32ZFINX %s +; RUN: -target-abi=ilp32 | FileCheck --check-prefixes=CHECKZFINX %s ; RUN: llc -mtriple=riscv64 -mattr=+zfinx -verify-machineinstrs < %s \ -; RUN: -target-abi=lp64 | FileCheck --check-prefixes=CHECKZFINX,RV64ZFINX %s +; RUN: -target-abi=lp64 | FileCheck --check-prefixes=CHECKZFINX %s -; TODO: constant pool shouldn't be necessary for RV64IF. define float @float_imm() nounwind { ; CHECK-LABEL: float_imm: ; CHECK: # %bb.0: @@ -69,6 +68,3 @@ define float @float_negative_zero(ptr %pf) nounwind { ; CHECKZFINX-NEXT: ret ret float -0.0 } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; RV32ZFINX: {{.*}} -; RV64ZFINX: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/half-imm.ll b/llvm/test/CodeGen/RISCV/half-imm.ll index 1dc0da8c..ec1a7a4 100644 --- a/llvm/test/CodeGen/RISCV/half-imm.ll +++ b/llvm/test/CodeGen/RISCV/half-imm.ll @@ -5,22 +5,21 @@ ; RUN: -target-abi lp64f < %s | FileCheck %s ; RUN: llc -mtriple=riscv32 -mattr=+zhinx -verify-machineinstrs \ ; RUN: -target-abi ilp32 < %s \ -; RUN: | FileCheck -check-prefix=RV32IZHINX %s +; RUN: | FileCheck -check-prefixes=CHECKIZHINX %s ; RUN: llc -mtriple=riscv64 -mattr=+zhinx -verify-machineinstrs \ ; RUN: -target-abi lp64 < %s \ -; RUN: | FileCheck -check-prefix=RV64IZHINX %s +; RUN: | FileCheck -check-prefixes=CHECKIZHINX %s ; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs \ ; RUN: -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECKIZFHMIN %s ; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs \ ; RUN: -target-abi lp64f < %s | FileCheck -check-prefixes=CHECKIZFHMIN %s ; RUN: llc -mtriple=riscv32 -mattr=+zhinxmin -verify-machineinstrs \ ; RUN: -target-abi ilp32 < %s \ -; RUN: | FileCheck -check-prefixes=CHECKIZHINXMIN,RV32IZHINXMIN %s +; RUN: | FileCheck -check-prefixes=CHECKIZHINXMIN %s ; RUN: llc -mtriple=riscv64 -mattr=+zhinxmin -verify-machineinstrs \ ; RUN: -target-abi lp64 < %s \ -; RUN: | FileCheck -check-prefixes=CHECKIZHINXMIN,RV64IZHINXMIN %s +; RUN: | FileCheck -check-prefixes=CHECKIZHINXMIN %s -; TODO: constant pool shouldn't be necessary for RV32IZfh and RV64IZfh define half @half_imm() nounwind { ; CHECK-LABEL: half_imm: ; CHECK: # %bb.0: @@ -29,19 +28,12 @@ define half @half_imm() nounwind { ; CHECK-NEXT: fmv.h.x fa0, a0 ; CHECK-NEXT: ret ; -; RV32IZHINX-LABEL: half_imm: -; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: lui a0, 4 -; RV32IZHINX-NEXT: addi a0, a0, 512 -; RV32IZHINX-NEXT: # kill: def $x10_h killed $x10_h killed $x10 -; RV32IZHINX-NEXT: ret -; -; RV64IZHINX-LABEL: half_imm: -; RV64IZHINX: # %bb.0: -; RV64IZHINX-NEXT: lui a0, 4 -; RV64IZHINX-NEXT: addi a0, a0, 512 -; RV64IZHINX-NEXT: # kill: def $x10_h killed $x10_h killed $x10 -; RV64IZHINX-NEXT: ret +; CHECKIZHINX-LABEL: half_imm: +; CHECKIZHINX: # %bb.0: +; CHECKIZHINX-NEXT: lui a0, 4 +; CHECKIZHINX-NEXT: addi a0, a0, 512 +; CHECKIZHINX-NEXT: # kill: def $x10_h killed $x10_h killed $x10 +; CHECKIZHINX-NEXT: ret ; ; CHECKIZFHMIN-LABEL: half_imm: ; CHECKIZFHMIN: # %bb.0: @@ -68,19 +60,12 @@ define half @half_imm_op(half %a) nounwind { ; CHECK-NEXT: fadd.h fa0, fa0, fa5 ; CHECK-NEXT: ret ; -; RV32IZHINX-LABEL: half_imm_op: -; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: li a1, 15 -; RV32IZHINX-NEXT: slli a1, a1, 10 -; RV32IZHINX-NEXT: fadd.h a0, a0, a1 -; RV32IZHINX-NEXT: ret -; -; RV64IZHINX-LABEL: half_imm_op: -; RV64IZHINX: # %bb.0: -; RV64IZHINX-NEXT: li a1, 15 -; RV64IZHINX-NEXT: slli a1, a1, 10 -; RV64IZHINX-NEXT: fadd.h a0, a0, a1 -; RV64IZHINX-NEXT: ret +; CHECKIZHINX-LABEL: half_imm_op: +; CHECKIZHINX: # %bb.0: +; CHECKIZHINX-NEXT: li a1, 15 +; CHECKIZHINX-NEXT: slli a1, a1, 10 +; CHECKIZHINX-NEXT: fadd.h a0, a0, a1 +; CHECKIZHINX-NEXT: ret ; ; CHECKIZFHMIN-LABEL: half_imm_op: ; CHECKIZFHMIN: # %bb.0: @@ -108,15 +93,10 @@ define half @half_positive_zero(ptr %pf) nounwind { ; CHECK-NEXT: fmv.h.x fa0, zero ; CHECK-NEXT: ret ; -; RV32IZHINX-LABEL: half_positive_zero: -; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: li a0, 0 -; RV32IZHINX-NEXT: ret -; -; RV64IZHINX-LABEL: half_positive_zero: -; RV64IZHINX: # %bb.0: -; RV64IZHINX-NEXT: li a0, 0 -; RV64IZHINX-NEXT: ret +; CHECKIZHINX-LABEL: half_positive_zero: +; CHECKIZHINX: # %bb.0: +; CHECKIZHINX-NEXT: li a0, 0 +; CHECKIZHINX-NEXT: ret ; ; CHECKIZFHMIN-LABEL: half_positive_zero: ; CHECKIZFHMIN: # %bb.0: @@ -137,15 +117,10 @@ define half @half_negative_zero(ptr %pf) nounwind { ; CHECK-NEXT: fmv.h.x fa0, a0 ; CHECK-NEXT: ret ; -; RV32IZHINX-LABEL: half_negative_zero: -; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: lui a0, 1048568 -; RV32IZHINX-NEXT: ret -; -; RV64IZHINX-LABEL: half_negative_zero: -; RV64IZHINX: # %bb.0: -; RV64IZHINX-NEXT: lui a0, 1048568 -; RV64IZHINX-NEXT: ret +; CHECKIZHINX-LABEL: half_negative_zero: +; CHECKIZHINX: # %bb.0: +; CHECKIZHINX-NEXT: lui a0, 1048568 +; CHECKIZHINX-NEXT: ret ; ; CHECKIZFHMIN-LABEL: half_negative_zero: ; CHECKIZFHMIN: # %bb.0: @@ -159,6 +134,3 @@ define half @half_negative_zero(ptr %pf) nounwind { ; CHECKIZHINXMIN-NEXT: ret ret half -0.0 } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; RV32IZHINXMIN: {{.*}} -; RV64IZHINXMIN: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index c028d25..7fd7626 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -409,15 +409,11 @@ define i64 @sh3adduw_2(i64 %0, i64 %1) { ; ; RV64ZBA-LABEL: sh3adduw_2: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: slli a0, a0, 3 -; RV64ZBA-NEXT: srli a0, a0, 3 ; RV64ZBA-NEXT: sh3add.uw a0, a0, a1 ; RV64ZBA-NEXT: ret ; ; RV64XANDESPERF-LABEL: sh3adduw_2: ; RV64XANDESPERF: # %bb.0: -; RV64XANDESPERF-NEXT: slli a0, a0, 3 -; RV64XANDESPERF-NEXT: srli a0, a0, 3 ; RV64XANDESPERF-NEXT: nds.lea.d.ze a0, a1, a0 ; RV64XANDESPERF-NEXT: ret %3 = shl i64 %0, 3 @@ -436,15 +432,11 @@ define i64 @sh3adduw_3(i64 %0, i64 %1) { ; ; RV64ZBA-LABEL: sh3adduw_3: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: slli a0, a0, 3 -; RV64ZBA-NEXT: srli a0, a0, 3 ; RV64ZBA-NEXT: sh3add.uw a0, a0, a1 ; RV64ZBA-NEXT: ret ; ; RV64XANDESPERF-LABEL: sh3adduw_3: ; RV64XANDESPERF: # %bb.0: -; RV64XANDESPERF-NEXT: slli a0, a0, 3 -; RV64XANDESPERF-NEXT: srli a0, a0, 3 ; RV64XANDESPERF-NEXT: nds.lea.d.ze a0, a1, a0 ; RV64XANDESPERF-NEXT: ret %3 = shl i64 %0, 3 @@ -2681,7 +2673,7 @@ define i64 @srliw_3_sh3add(ptr %0, i32 signext %1) { ; RV64ZBA-LABEL: srliw_3_sh3add: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: srliw a1, a1, 3 -; RV64ZBA-NEXT: sh3add.uw a0, a1, a0 +; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret ; diff --git a/llvm/test/DebugInfo/dwarf-complex-int.ll b/llvm/test/DebugInfo/dwarf-complex-int.ll new file mode 100644 index 0000000..effd0ec --- /dev/null +++ b/llvm/test/DebugInfo/dwarf-complex-int.ll @@ -0,0 +1,59 @@ +; REQUIRES: object-emission +; RUN: %llc_dwarf %s -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s + +;; https://github.com/llvm/llvm-project/issues/140362 +;; Don't assert when emitting a complex integer type in DWARF. + +;; C source: +;; int g; +;; +;; void foo(_Complex short c) { __builtin_memmove(&g, (char *)&c, 2); } +;; +;; void bar() { foo(0); } + +; CHECK: DW_AT_type ([[complex:0x[0-9a-f]+]] "complex") + +; CHECK: [[complex]]: DW_TAG_base_type +; CHECK-NEXT: DW_AT_name ("complex") +; CHECK-NEXT: DW_AT_encoding (0x80) +; CHECK-NEXT: DW_AT_byte_size (0x04) + +@g = dso_local local_unnamed_addr global i32 0, align 4, !dbg !0 + +define dso_local void @bar() local_unnamed_addr !dbg !18 { +entry: + #dbg_value(i32 0, !21, !DIExpression(), !27) + store i16 0, ptr @g, align 4, !dbg !29 + ret void, !dbg !30 +} + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!10, !11} +!llvm.ident = !{!17} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "g", scope: !2, file: !8, line: 1, type: !9, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 22.0.0git", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !4, globals: !7, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "/app/example.cpp", directory: "/app") +!4 = !{!5} +!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !6, size: 64) +!6 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) +!7 = !{!0} +!8 = !DIFile(filename: "example.cpp", directory: "/app") +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!10 = !{i32 7, !"Dwarf Version", i32 5} +!11 = !{i32 2, !"Debug Info Version", i32 3} +!17 = !{!"clang version 22.0.0git"} +!18 = distinct !DISubprogram(name: "bar", linkageName: "bar()", scope: !8, file: !8, line: 5, type: !19, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, keyInstructions: true) +!19 = !DISubroutineType(types: !20) +!20 = !{null} +!21 = !DILocalVariable(name: "c", arg: 1, scope: !22, file: !8, line: 3, type: !25) +!22 = distinct !DISubprogram(name: "foo", linkageName: "_ZL3fooCs", scope: !8, file: !8, line: 3, type: !23, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !26, keyInstructions: true) +!23 = !DISubroutineType(types: !24) +!24 = !{null, !25} +!25 = !DIBasicType(name: "complex", size: 32, encoding: 128) +!26 = !{!21} +!27 = !DILocation(line: 0, scope: !22, inlinedAt: !28) +!28 = distinct !DILocation(line: 5, column: 14, scope: !18) +!29 = !DILocation(line: 3, column: 37, scope: !22, inlinedAt: !28, atomGroup: 1, atomRank: 1) +!30 = !DILocation(line: 5, column: 22, scope: !18, atomGroup: 1, atomRank: 1) diff --git a/llvm/test/Instrumentation/AddressSanitizer/RISCV/asan-rvv-intrinsics.ll b/llvm/test/Instrumentation/AddressSanitizer/RISCV/asan-rvv-intrinsics.ll index 919f16b..4b50094 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/RISCV/asan-rvv-intrinsics.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/RISCV/asan-rvv-intrinsics.ll @@ -180,7 +180,29 @@ define <vscale x 1 x i32> @test_vlseg2_nxv1i32(ptr %base, i64 %vl) sanitize_addr ; CHECK-LABEL: @test_vlseg2_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP24:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i64>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP8]], i64 8) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[TMP24:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[TMP24]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP25]] ; @@ -194,7 +216,29 @@ define <vscale x 1 x i32> @test_vlseg2_mask_nxv1i32(ptr %base, i64 %vl, <vscale ; CHECK-LABEL: @test_vlseg2_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP24:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i64>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP8]], i64 8) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[TMP24:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[TMP24]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP25]] ; @@ -212,7 +256,29 @@ define <vscale x 1 x i32> @test_vlseg3_nxv1i32(ptr %base, i64 %vl) sanitize_addr ; CHECK-LABEL: @test_vlseg3_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP36:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i96>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP8]], i64 12) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[TMP36:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP37:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[TMP36]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP37]] ; @@ -226,7 +292,29 @@ define <vscale x 1 x i32> @test_vlseg3_mask_nxv1i32(ptr %base, i64 %vl, <vscale ; CHECK-LABEL: @test_vlseg3_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP36:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i96>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP8]], i64 12) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[TMP36:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP37:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[TMP36]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP37]] ; @@ -244,7 +332,29 @@ define <vscale x 1 x i32> @test_vlseg4_nxv1i32(ptr %base, i64 %vl) sanitize_addr ; CHECK-LABEL: @test_vlseg4_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP48:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i128>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP8]], i64 16) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[TMP48:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP49:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[TMP48]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP49]] ; @@ -258,7 +368,29 @@ define <vscale x 1 x i32> @test_vlseg4_mask_nxv1i32(ptr %base, i64 %vl, <vscale ; CHECK-LABEL: @test_vlseg4_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP48:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i128>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP8]], i64 16) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[TMP48:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP49:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[TMP48]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP49]] ; @@ -276,7 +408,29 @@ define <vscale x 1 x i32> @test_vlseg5_nxv1i32(ptr %base, i64 %vl) sanitize_addr ; CHECK-LABEL: @test_vlseg5_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP60:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i160>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP8]], i64 20) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[TMP60:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP61:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[TMP60]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP61]] ; @@ -290,7 +444,29 @@ define <vscale x 1 x i32> @test_vlseg5_mask_nxv1i32(ptr %base, i64 %vl, <vscale ; CHECK-LABEL: @test_vlseg5_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP60:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i160>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP8]], i64 20) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[TMP60:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP61:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[TMP60]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP61]] ; @@ -308,7 +484,29 @@ define <vscale x 1 x i32> @test_vlseg6_nxv1i32(ptr %base, i64 %vl) sanitize_addr ; CHECK-LABEL: @test_vlseg6_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP72:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i192>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP8]], i64 24) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[TMP72:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP73:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[TMP72]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP73]] ; @@ -322,7 +520,29 @@ define <vscale x 1 x i32> @test_vlseg6_mask_nxv1i32(ptr %base, i64 %vl, <vscale ; CHECK-LABEL: @test_vlseg6_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP72:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i192>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP8]], i64 24) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[TMP72:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP73:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[TMP72]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP73]] ; @@ -340,7 +560,29 @@ define <vscale x 1 x i32> @test_vlseg7_nxv1i32(ptr %base, i64 %vl) sanitize_addr ; CHECK-LABEL: @test_vlseg7_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP84:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i224>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP8]], i64 28) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[TMP84:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP85:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[TMP84]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP85]] ; @@ -354,7 +596,29 @@ define <vscale x 1 x i32> @test_vlseg7_mask_nxv1i32(ptr %base, i64 %vl, <vscale ; CHECK-LABEL: @test_vlseg7_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP84:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i224>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP8]], i64 28) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[TMP84:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP85:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[TMP84]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP85]] ; @@ -372,7 +636,29 @@ define <vscale x 1 x i32> @test_vlseg8_nxv1i32(ptr %base, i64 %vl) sanitize_addr ; CHECK-LABEL: @test_vlseg8_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP96:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i256>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP8]], i64 32) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[TMP96:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP97:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[TMP96]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP97]] ; @@ -386,7 +672,29 @@ define <vscale x 1 x i32> @test_vlseg8_mask_nxv1i32(ptr %base, i64 %vl, <vscale ; CHECK-LABEL: @test_vlseg8_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP96:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i256>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP8]], i64 32) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[TMP96:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP97:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[TMP96]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP97]] ; @@ -404,7 +712,29 @@ define void @test_vsseg2_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>, ; CHECK-LABEL: @test_vsseg2_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsseg2.triscv.vector.tuple_nxv4i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i64>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP8]], i64 8) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: tail call void @llvm.riscv.vsseg2.triscv.vector.tuple_nxv4i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -416,7 +746,29 @@ define void @test_vsseg2_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x ; CHECK-LABEL: @test_vsseg2_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i64>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP8]], i64 8) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: tail call void @llvm.riscv.vsseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -432,7 +784,29 @@ define void @test_vsseg3_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>, ; CHECK-LABEL: @test_vsseg3_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsseg3.triscv.vector.tuple_nxv4i8_3t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i96>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP8]], i64 12) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: tail call void @llvm.riscv.vsseg3.triscv.vector.tuple_nxv4i8_3t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -444,7 +818,29 @@ define void @test_vsseg3_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x ; CHECK-LABEL: @test_vsseg3_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i96>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP8]], i64 12) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: tail call void @llvm.riscv.vsseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -460,7 +856,29 @@ define void @test_vsseg4_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>, ; CHECK-LABEL: @test_vsseg4_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv4i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i128>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP8]], i64 16) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: tail call void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv4i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -472,7 +890,29 @@ define void @test_vsseg4_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x ; CHECK-LABEL: @test_vsseg4_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i128>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP8]], i64 16) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: tail call void @llvm.riscv.vsseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -488,7 +928,29 @@ define void @test_vsseg5_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>, ; CHECK-LABEL: @test_vsseg5_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsseg5.triscv.vector.tuple_nxv4i8_5t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i160>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP8]], i64 20) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: tail call void @llvm.riscv.vsseg5.triscv.vector.tuple_nxv4i8_5t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -500,7 +962,29 @@ define void @test_vsseg5_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x ; CHECK-LABEL: @test_vsseg5_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i160>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP8]], i64 20) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: tail call void @llvm.riscv.vsseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -516,7 +1000,29 @@ define void @test_vsseg6_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>, ; CHECK-LABEL: @test_vsseg6_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsseg6.triscv.vector.tuple_nxv4i8_6t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i192>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP8]], i64 24) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: tail call void @llvm.riscv.vsseg6.triscv.vector.tuple_nxv4i8_6t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -528,7 +1034,29 @@ define void @test_vsseg6_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x ; CHECK-LABEL: @test_vsseg6_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i192>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP8]], i64 24) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: tail call void @llvm.riscv.vsseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -544,7 +1072,29 @@ define void @test_vsseg7_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>, ; CHECK-LABEL: @test_vsseg7_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsseg7.triscv.vector.tuple_nxv4i8_7t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i224>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP8]], i64 28) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: tail call void @llvm.riscv.vsseg7.triscv.vector.tuple_nxv4i8_7t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -556,7 +1106,29 @@ define void @test_vsseg7_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x ; CHECK-LABEL: @test_vsseg7_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i224>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP8]], i64 28) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: tail call void @llvm.riscv.vsseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -572,7 +1144,29 @@ define void @test_vsseg8_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>, ; CHECK-LABEL: @test_vsseg8_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsseg8.triscv.vector.tuple_nxv4i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i256>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP8]], i64 32) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: tail call void @llvm.riscv.vsseg8.triscv.vector.tuple_nxv4i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -584,7 +1178,29 @@ define void @test_vsseg8_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x ; CHECK-LABEL: @test_vsseg8_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <vscale x 1 x i256>, ptr [[BASE:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP8]], i64 32) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: tail call void @llvm.riscv.vsseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -792,7 +1408,30 @@ define <vscale x 1 x i32> @test_vlsseg2_nxv1i32(ptr %base, i64 %offset, i64 %vl) ; CHECK-LABEL: @test_vlsseg2_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP24:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlsseg2.triscv.vector.tuple_nxv4i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP9]], i64 8) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[TMP24:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlsseg2.triscv.vector.tuple_nxv4i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[TMP24]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP25]] ; @@ -806,7 +1445,30 @@ define <vscale x 1 x i32> @test_vlsseg2_mask_nxv1i32(ptr %base, i64 %offset, i64 ; CHECK-LABEL: @test_vlsseg2_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP24:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlsseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP9]], i64 8) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[TMP24:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlsseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[TMP24]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP25]] ; @@ -824,7 +1486,30 @@ define <vscale x 1 x i32> @test_vlsseg3_nxv1i32(ptr %base, i64 %offset, i64 %vl) ; CHECK-LABEL: @test_vlsseg3_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP36:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlsseg3.triscv.vector.tuple_nxv4i8_3t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP9]], i64 12) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[TMP36:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlsseg3.triscv.vector.tuple_nxv4i8_3t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP37:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[TMP36]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP37]] ; @@ -838,7 +1523,30 @@ define <vscale x 1 x i32> @test_vlsseg3_mask_nxv1i32(ptr %base, i64 %offset, i64 ; CHECK-LABEL: @test_vlsseg3_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP36:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlsseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP9]], i64 12) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[TMP36:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlsseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP37:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[TMP36]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP37]] ; @@ -856,7 +1564,30 @@ define <vscale x 1 x i32> @test_vlsseg4_nxv1i32(ptr %base, i64 %offset, i64 %vl) ; CHECK-LABEL: @test_vlsseg4_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP48:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlsseg4.triscv.vector.tuple_nxv4i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP9]], i64 16) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[TMP48:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlsseg4.triscv.vector.tuple_nxv4i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP49:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[TMP48]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP49]] ; @@ -870,7 +1601,30 @@ define <vscale x 1 x i32> @test_vlsseg4_mask_nxv1i32(ptr %base, i64 %offset, i64 ; CHECK-LABEL: @test_vlsseg4_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP48:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlsseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP9]], i64 16) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[TMP48:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlsseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP49:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[TMP48]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP49]] ; @@ -888,7 +1642,30 @@ define <vscale x 1 x i32> @test_vlsseg5_nxv1i32(ptr %base, i64 %offset, i64 %vl) ; CHECK-LABEL: @test_vlsseg5_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP60:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlsseg5.triscv.vector.tuple_nxv4i8_5t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP9]], i64 20) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[TMP60:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlsseg5.triscv.vector.tuple_nxv4i8_5t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP61:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[TMP60]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP61]] ; @@ -902,7 +1679,30 @@ define <vscale x 1 x i32> @test_vlsseg5_mask_nxv1i32(ptr %base, i64 %offset, i64 ; CHECK-LABEL: @test_vlsseg5_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP60:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlsseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP9]], i64 20) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[TMP60:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlsseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP61:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[TMP60]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP61]] ; @@ -920,7 +1720,30 @@ define <vscale x 1 x i32> @test_vlsseg6_nxv1i32(ptr %base, i64 %offset, i64 %vl) ; CHECK-LABEL: @test_vlsseg6_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP72:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlsseg6.triscv.vector.tuple_nxv4i8_6t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP9]], i64 24) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[TMP72:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlsseg6.triscv.vector.tuple_nxv4i8_6t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP73:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[TMP72]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP73]] ; @@ -934,7 +1757,30 @@ define <vscale x 1 x i32> @test_vlsseg6_mask_nxv1i32(ptr %base, i64 %offset, i64 ; CHECK-LABEL: @test_vlsseg6_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP72:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlsseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP9]], i64 24) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[TMP72:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlsseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP73:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[TMP72]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP73]] ; @@ -952,7 +1798,30 @@ define <vscale x 1 x i32> @test_vlsseg7_nxv1i32(ptr %base, i64 %offset, i64 %vl) ; CHECK-LABEL: @test_vlsseg7_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP84:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlsseg7.triscv.vector.tuple_nxv4i8_7t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP9]], i64 28) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[TMP84:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlsseg7.triscv.vector.tuple_nxv4i8_7t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP85:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[TMP84]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP85]] ; @@ -966,7 +1835,30 @@ define <vscale x 1 x i32> @test_vlsseg7_mask_nxv1i32(ptr %base, i64 %offset, i64 ; CHECK-LABEL: @test_vlsseg7_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP84:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlsseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP9]], i64 28) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[TMP84:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlsseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP85:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[TMP84]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP85]] ; @@ -984,7 +1876,30 @@ define <vscale x 1 x i32> @test_vlsseg8_nxv1i32(ptr %base, i64 %offset, i64 %vl) ; CHECK-LABEL: @test_vlsseg8_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP96:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlsseg8.triscv.vector.tuple_nxv4i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP9]], i64 32) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[TMP96:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlsseg8.triscv.vector.tuple_nxv4i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP97:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[TMP96]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP97]] ; @@ -998,7 +1913,30 @@ define <vscale x 1 x i32> @test_vlsseg8_mask_nxv1i32(ptr %base, i64 %offset, i64 ; CHECK-LABEL: @test_vlsseg8_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP96:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlsseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP9]], i64 32) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[TMP96:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlsseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP97:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[TMP96]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP97]] ; @@ -1016,7 +1954,30 @@ define void @test_vssseg2_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8> ; CHECK-LABEL: @test_vssseg2_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vssseg2.triscv.vector.tuple_nxv4i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP9]], i64 8) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: tail call void @llvm.riscv.vssseg2.triscv.vector.tuple_nxv4i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -1028,7 +1989,30 @@ define void @test_vssseg2_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 ; CHECK-LABEL: @test_vssseg2_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vssseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP9]], i64 8) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: tail call void @llvm.riscv.vssseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -1044,7 +2028,30 @@ define void @test_vssseg3_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8> ; CHECK-LABEL: @test_vssseg3_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vssseg3.triscv.vector.tuple_nxv4i8_3t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP9]], i64 12) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: tail call void @llvm.riscv.vssseg3.triscv.vector.tuple_nxv4i8_3t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -1056,7 +2063,30 @@ define void @test_vssseg3_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 ; CHECK-LABEL: @test_vssseg3_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vssseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP9]], i64 12) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: tail call void @llvm.riscv.vssseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -1072,7 +2102,30 @@ define void @test_vssseg4_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8> ; CHECK-LABEL: @test_vssseg4_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vssseg4.triscv.vector.tuple_nxv4i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP9]], i64 16) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: tail call void @llvm.riscv.vssseg4.triscv.vector.tuple_nxv4i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -1084,7 +2137,30 @@ define void @test_vssseg4_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 ; CHECK-LABEL: @test_vssseg4_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vssseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP9]], i64 16) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: tail call void @llvm.riscv.vssseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -1100,7 +2176,30 @@ define void @test_vssseg5_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8> ; CHECK-LABEL: @test_vssseg5_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vssseg5.triscv.vector.tuple_nxv4i8_5t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP9]], i64 20) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: tail call void @llvm.riscv.vssseg5.triscv.vector.tuple_nxv4i8_5t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -1112,7 +2211,30 @@ define void @test_vssseg5_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 ; CHECK-LABEL: @test_vssseg5_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vssseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP9]], i64 20) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: tail call void @llvm.riscv.vssseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -1128,7 +2250,30 @@ define void @test_vssseg6_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8> ; CHECK-LABEL: @test_vssseg6_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vssseg6.triscv.vector.tuple_nxv4i8_6t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP9]], i64 24) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: tail call void @llvm.riscv.vssseg6.triscv.vector.tuple_nxv4i8_6t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -1140,7 +2285,30 @@ define void @test_vssseg6_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 ; CHECK-LABEL: @test_vssseg6_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vssseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP9]], i64 24) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: tail call void @llvm.riscv.vssseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -1156,7 +2324,30 @@ define void @test_vssseg7_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8> ; CHECK-LABEL: @test_vssseg7_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vssseg7.triscv.vector.tuple_nxv4i8_7t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP9]], i64 28) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: tail call void @llvm.riscv.vssseg7.triscv.vector.tuple_nxv4i8_7t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -1168,7 +2359,30 @@ define void @test_vssseg7_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 ; CHECK-LABEL: @test_vssseg7_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vssseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP9]], i64 28) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: tail call void @llvm.riscv.vssseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -1184,7 +2398,30 @@ define void @test_vssseg8_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8> ; CHECK-LABEL: @test_vssseg8_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vssseg8.triscv.vector.tuple_nxv4i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP9]], i64 32) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: tail call void @llvm.riscv.vssseg8.triscv.vector.tuple_nxv4i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -1196,7 +2433,30 @@ define void @test_vssseg8_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 ; CHECK-LABEL: @test_vssseg8_mask_nxv1i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vssseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP9]], i64 32) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: tail call void @llvm.riscv.vssseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -1687,7 +2947,31 @@ define <vscale x 1 x i32> @test_vloxseg2_nxv1i32_nxv1i16(ptr %base, <vscale x 1 ; CHECK-LABEL: @test_vloxseg2_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP25:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 8) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP25:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP26:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[TMP25]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP26]] ; @@ -1701,7 +2985,31 @@ define <vscale x 1 x i32> @test_vloxseg2_mask_nxv1i32_nxv1i16(ptr %base, <vscale ; CHECK-LABEL: @test_vloxseg2_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP25:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 8) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP25:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP26:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[TMP25]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP26]] ; @@ -1719,7 +3027,31 @@ define <vscale x 1 x i32> @test_vloxseg3_nxv1i32_nxv1i16(ptr %base, <vscale x 1 ; CHECK-LABEL: @test_vloxseg3_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP37:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 12) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP37:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP38:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[TMP37]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP38]] ; @@ -1733,7 +3065,31 @@ define <vscale x 1 x i32> @test_vloxseg3_mask_nxv1i32_nxv1i16(ptr %base, <vscale ; CHECK-LABEL: @test_vloxseg3_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP37:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 12) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP37:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP38:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[TMP37]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP38]] ; @@ -1751,7 +3107,31 @@ define <vscale x 1 x i32> @test_vloxseg4_nxv1i32_nxv1i16(ptr %base, <vscale x 1 ; CHECK-LABEL: @test_vloxseg4_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP49:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 16) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP49:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP50:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[TMP49]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP50]] ; @@ -1765,7 +3145,31 @@ define <vscale x 1 x i32> @test_vloxseg4_mask_nxv1i32_nxv1i16(ptr %base, <vscale ; CHECK-LABEL: @test_vloxseg4_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP49:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 16) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP49:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP50:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[TMP49]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP50]] ; @@ -1783,7 +3187,31 @@ define <vscale x 1 x i32> @test_vloxseg5_nxv1i32_nxv1i16(ptr %base, <vscale x 1 ; CHECK-LABEL: @test_vloxseg5_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP61:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 20) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP61:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP62:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[TMP61]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP62]] ; @@ -1797,7 +3225,31 @@ define <vscale x 1 x i32> @test_vloxseg5_mask_nxv1i32_nxv1i16(ptr %base, <vscale ; CHECK-LABEL: @test_vloxseg5_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP61:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 20) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP61:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP62:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[TMP61]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP62]] ; @@ -1815,7 +3267,31 @@ define <vscale x 1 x i32> @test_vloxseg6_nxv1i32_nxv1i16(ptr %base, <vscale x 1 ; CHECK-LABEL: @test_vloxseg6_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP73:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 24) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP73:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP74:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[TMP73]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP74]] ; @@ -1829,7 +3305,31 @@ define <vscale x 1 x i32> @test_vloxseg6_mask_nxv1i32_nxv1i16(ptr %base, <vscale ; CHECK-LABEL: @test_vloxseg6_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP73:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 24) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP73:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP74:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[TMP73]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP74]] ; @@ -1847,7 +3347,31 @@ define <vscale x 1 x i32> @test_vloxseg7_nxv1i32_nxv1i16(ptr %base, <vscale x 1 ; CHECK-LABEL: @test_vloxseg7_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP85:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 28) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP85:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP86:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[TMP85]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP86]] ; @@ -1861,7 +3385,31 @@ define <vscale x 1 x i32> @test_vloxseg7_mask_nxv1i32_nxv1i16(ptr %base, <vscale ; CHECK-LABEL: @test_vloxseg7_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP85:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 28) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP85:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP86:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[TMP85]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP86]] ; @@ -1879,7 +3427,31 @@ define <vscale x 1 x i32> @test_vloxseg8_nxv1i32_nxv1i16(ptr %base, <vscale x 1 ; CHECK-LABEL: @test_vloxseg8_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP97:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 32) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP97:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP98:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[TMP97]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP98]] ; @@ -1893,7 +3465,31 @@ define <vscale x 1 x i32> @test_vloxseg8_mask_nxv1i32_nxv1i16(ptr %base, <vscale ; CHECK-LABEL: @test_vloxseg8_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP97:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 32) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP97:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP98:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[TMP97]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP98]] ; @@ -1911,7 +3507,31 @@ define <vscale x 1 x i32> @test_vluxseg2_nxv1i32_nxv1i16(ptr %base, <vscale x 1 ; CHECK-LABEL: @test_vluxseg2_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP25:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 8) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP25:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP26:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[TMP25]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP26]] ; @@ -1925,7 +3545,31 @@ define <vscale x 1 x i32> @test_vluxseg2_mask_nxv1i32_nxv1i16(ptr %base, <vscale ; CHECK-LABEL: @test_vluxseg2_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP25:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 8) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP25:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP26:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[TMP25]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP26]] ; @@ -1943,7 +3587,31 @@ define <vscale x 1 x i32> @test_vluxseg3_nxv1i32_nxv1i16(ptr %base, <vscale x 1 ; CHECK-LABEL: @test_vluxseg3_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP37:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 12) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP37:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP38:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[TMP37]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP38]] ; @@ -1957,7 +3625,31 @@ define <vscale x 1 x i32> @test_vluxseg3_mask_nxv1i32_nxv1i16(ptr %base, <vscale ; CHECK-LABEL: @test_vluxseg3_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP37:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 12) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP37:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP38:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[TMP37]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP38]] ; @@ -1975,7 +3667,31 @@ define <vscale x 1 x i32> @test_vluxseg4_nxv1i32_nxv1i16(ptr %base, <vscale x 1 ; CHECK-LABEL: @test_vluxseg4_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP49:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 16) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP49:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP50:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[TMP49]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP50]] ; @@ -1989,7 +3705,31 @@ define <vscale x 1 x i32> @test_vluxseg4_mask_nxv1i32_nxv1i16(ptr %base, <vscale ; CHECK-LABEL: @test_vluxseg4_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP49:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 16) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP49:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP50:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[TMP49]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP50]] ; @@ -2007,7 +3747,31 @@ define <vscale x 1 x i32> @test_vluxseg5_nxv1i32_nxv1i16(ptr %base, <vscale x 1 ; CHECK-LABEL: @test_vluxseg5_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP61:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 20) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP61:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP62:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[TMP61]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP62]] ; @@ -2021,7 +3785,31 @@ define <vscale x 1 x i32> @test_vluxseg5_mask_nxv1i32_nxv1i16(ptr %base, <vscale ; CHECK-LABEL: @test_vluxseg5_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP61:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 20) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP61:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP62:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[TMP61]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP62]] ; @@ -2039,7 +3827,31 @@ define <vscale x 1 x i32> @test_vluxseg6_nxv1i32_nxv1i16(ptr %base, <vscale x 1 ; CHECK-LABEL: @test_vluxseg6_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP73:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 24) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP73:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP74:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[TMP73]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP74]] ; @@ -2053,7 +3865,31 @@ define <vscale x 1 x i32> @test_vluxseg6_mask_nxv1i32_nxv1i16(ptr %base, <vscale ; CHECK-LABEL: @test_vluxseg6_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP73:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 24) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP73:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP74:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[TMP73]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP74]] ; @@ -2071,7 +3907,31 @@ define <vscale x 1 x i32> @test_vluxseg7_nxv1i32_nxv1i16(ptr %base, <vscale x 1 ; CHECK-LABEL: @test_vluxseg7_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP85:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 28) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP85:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP86:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[TMP85]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP86]] ; @@ -2085,7 +3945,31 @@ define <vscale x 1 x i32> @test_vluxseg7_mask_nxv1i32_nxv1i16(ptr %base, <vscale ; CHECK-LABEL: @test_vluxseg7_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP85:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 28) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP85:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP86:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[TMP85]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP86]] ; @@ -2103,7 +3987,31 @@ define <vscale x 1 x i32> @test_vluxseg8_nxv1i32_nxv1i16(ptr %base, <vscale x 1 ; CHECK-LABEL: @test_vluxseg8_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP97:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 32) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP97:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: [[TMP98:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[TMP97]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP98]] ; @@ -2117,7 +4025,31 @@ define <vscale x 1 x i32> @test_vluxseg8_mask_nxv1i32_nxv1i16(ptr %base, <vscale ; CHECK-LABEL: @test_vluxseg8_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: [[TMP97:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP10]], i64 32) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP97:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5) ; CHECK-NEXT: [[TMP98:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[TMP97]], i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[TMP98]] ; @@ -2135,7 +4067,31 @@ define void @test_vsoxseg2_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale ; CHECK-LABEL: @test_vsoxseg2_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 8) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2147,7 +4103,31 @@ define void @test_vsoxseg2_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs ; CHECK-LABEL: @test_vsoxseg2_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 8) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2163,7 +4143,31 @@ define void @test_vsoxseg3_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale ; CHECK-LABEL: @test_vsoxseg3_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 12) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2175,7 +4179,31 @@ define void @test_vsoxseg3_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs ; CHECK-LABEL: @test_vsoxseg3_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 12) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2191,7 +4219,31 @@ define void @test_vsoxseg4_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale ; CHECK-LABEL: @test_vsoxseg4_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 16) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2203,7 +4255,31 @@ define void @test_vsoxseg4_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs ; CHECK-LABEL: @test_vsoxseg4_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 16) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2219,7 +4295,31 @@ define void @test_vsoxseg5_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale ; CHECK-LABEL: @test_vsoxseg5_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 20) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2231,7 +4331,31 @@ define void @test_vsoxseg5_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs ; CHECK-LABEL: @test_vsoxseg5_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 20) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2247,7 +4371,31 @@ define void @test_vsoxseg6_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale ; CHECK-LABEL: @test_vsoxseg6_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 24) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2259,7 +4407,31 @@ define void @test_vsoxseg6_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs ; CHECK-LABEL: @test_vsoxseg6_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 24) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2275,7 +4447,31 @@ define void @test_vsoxseg7_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale ; CHECK-LABEL: @test_vsoxseg7_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 28) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2287,7 +4483,31 @@ define void @test_vsoxseg7_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs ; CHECK-LABEL: @test_vsoxseg7_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 28) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2303,7 +4523,31 @@ define void @test_vsoxseg8_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale ; CHECK-LABEL: @test_vsoxseg8_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 32) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2315,7 +4559,31 @@ define void @test_vsoxseg8_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs ; CHECK-LABEL: @test_vsoxseg8_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 32) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2331,7 +4599,31 @@ define void @test_vsuxseg2_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale ; CHECK-LABEL: @test_vsuxseg2_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 8) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2343,7 +4635,31 @@ define void @test_vsuxseg2_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs ; CHECK-LABEL: @test_vsuxseg2_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 8) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2359,7 +4675,31 @@ define void @test_vsuxseg3_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale ; CHECK-LABEL: @test_vsuxseg3_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 12) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2371,7 +4711,31 @@ define void @test_vsuxseg3_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs ; CHECK-LABEL: @test_vsuxseg3_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 12) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2387,7 +4751,31 @@ define void @test_vsuxseg4_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale ; CHECK-LABEL: @test_vsuxseg4_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 16) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2399,7 +4787,31 @@ define void @test_vsuxseg4_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs ; CHECK-LABEL: @test_vsuxseg4_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 16) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2415,7 +4827,31 @@ define void @test_vsuxseg5_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale ; CHECK-LABEL: @test_vsuxseg5_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 20) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2427,7 +4863,31 @@ define void @test_vsuxseg5_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs ; CHECK-LABEL: @test_vsuxseg5_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 20) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2443,7 +4903,31 @@ define void @test_vsuxseg6_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale ; CHECK-LABEL: @test_vsuxseg6_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 24) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2455,7 +4939,31 @@ define void @test_vsuxseg6_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs ; CHECK-LABEL: @test_vsuxseg6_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 24) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2471,7 +4979,31 @@ define void @test_vsuxseg7_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale ; CHECK-LABEL: @test_vsuxseg7_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 28) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2483,7 +5015,31 @@ define void @test_vsuxseg7_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs ; CHECK-LABEL: @test_vsuxseg7_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 28) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2499,7 +5055,31 @@ define void @test_vsuxseg8_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale ; CHECK-LABEL: @test_vsuxseg8_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 32) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: @@ -2511,7 +5091,31 @@ define void @test_vsuxseg8_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs ; CHECK-LABEL: @test_vsuxseg8_mask_nxv1i32_nxv1i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 -; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5) +; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP10]], i64 32) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/abilist_aggregate.ll b/llvm/test/Instrumentation/DataFlowSanitizer/abilist_aggregate.ll index 2cf5771..3cab62b 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/abilist_aggregate.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/abilist_aggregate.ll @@ -13,7 +13,7 @@ define {i1, i7} @functional({i32, i1} %a, [2 x i7] %b) { define {i1, i7} @call_functional({i32, i1} %a, [2 x i7] %b) { ; CHECK-LABEL: @call_functional.dfsan - ; CHECK-NEXT: %[[#REG:]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]] + ; CHECK-NEXT: %[[#REG:]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]] ; CHECK-NEXT: %[[#REG+1]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN]] ; CHECK-NEXT: %[[#REG+2]] = extractvalue { i8, i8 } %[[#REG+1]], 0 ; CHECK-NEXT: %[[#REG+3]] = extractvalue { i8, i8 } %[[#REG+1]], 1 @@ -68,7 +68,7 @@ define {i1, i7} @call_uninstrumented({i32, i1} %a, [2 x i7] %b) { define {i1, i7} @call_custom_with_ret({i32, i1} %a, [2 x i7] %b) { ; CHECK: @call_custom_with_ret.dfsan ; CHECK: %labelreturn = alloca i8, align 1 - ; CHECK: [[B:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]] + ; CHECK: [[B:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]] ; CHECK: [[A:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN]] ; CHECK: [[A0:%.*]] = extractvalue { i8, i8 } [[A]], 0 ; CHECK: [[A1:%.*]] = extractvalue { i8, i8 } [[A]], 1 @@ -89,7 +89,7 @@ define {i1, i7} @call_custom_with_ret({i32, i1} %a, [2 x i7] %b) { define void @call_custom_without_ret({i32, i1} %a, [2 x i7] %b) { ; CHECK: @call_custom_without_ret.dfsan - ; CHECK: [[B:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]] + ; CHECK: [[B:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]] ; CHECK: [[A:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN]] ; CHECK: [[A0:%.*]] = extractvalue { i8, i8 } [[A]], 0 ; CHECK: [[A1:%.*]] = extractvalue { i8, i8 } [[A]], 1 @@ -105,7 +105,7 @@ define void @call_custom_without_ret({i32, i1} %a, [2 x i7] %b) { define void @call_custom_varg({i32, i1} %a, [2 x i7] %b) { ; CHECK: @call_custom_varg.dfsan - ; CHECK: [[B:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]] + ; CHECK: [[B:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]] ; CHECK: %labelva = alloca [1 x i8], align 1 ; CHECK: [[A:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN]] ; CHECK: [[A0:%.*]] = extractvalue { i8, i8 } [[A]], 0 @@ -126,7 +126,7 @@ define void @call_custom_varg({i32, i1} %a, [2 x i7] %b) { define {i1, i7} @call_custom_cb({i32, i1} %a, [2 x i7] %b) { ; CHECK: define { i1, i7 } @call_custom_cb.dfsan({ i32, i1 } %a, [2 x i7] %b) { ; CHECK: %labelreturn = alloca i8, align 1 - ; CHECK: [[B:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]] + ; CHECK: [[B:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]] ; CHECK: [[A:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN]] ; CHECK: [[A0:%.*]] = extractvalue { i8, i8 } [[A]], 0 ; CHECK: [[A1:%.*]] = extractvalue { i8, i8 } [[A]], 1 @@ -153,7 +153,7 @@ define {i1, i7} @custom_cb(ptr %cb, {i32, i1} %a, [2 x i7] %b) { define {i1, i7} @cb({i32, i1} %a, [2 x i7] %b) { ; CHECK: define { i1, i7 } @cb.dfsan({ i32, i1 } %a, [2 x i7] %b) - ; CHECK: [[BL:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]] + ; CHECK: [[BL:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]] ; CHECK: [[AL:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN]] ; CHECK: [[AL1:%.*]] = extractvalue { i8, i8 } [[AL]], 1 ; CHECK: [[BL0:%.*]] = extractvalue [2 x i8] [[BL]], 0 @@ -180,8 +180,8 @@ define ptr @ret_custom() { ; COMM: TODO simplify the expression [[#mul(2,SBYTES) + max(SBYTES,2)]] to ; COMM: [[#mul(3,SBYTES)]], if shadow-tls-alignment is updated to match shadow ; COMM: width bytes. -; CHECK: [[B:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN:2]] -; CHECK: [[A:%.*]] = load { i8, i8 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]] +; CHECK: [[B:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align [[ALIGN:2]] +; CHECK: [[A:%.*]] = load { i8, i8 }, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]] ; CHECK: [[CB:%.*]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]] ; CHECK: [[A0:%.*]] = extractvalue { i8, i8 } [[A]], 0 ; CHECK: [[A1:%.*]] = extractvalue { i8, i8 } [[A]], 1 @@ -198,7 +198,7 @@ define ptr @ret_custom() { define {i1, i7} @custom_with_ret({i32, i1} %a, [2 x i7] %b) { ; CHECK: define linkonce_odr { i1, i7 } @"dfsw$custom_with_ret"({ i32, i1 } %0, [2 x i7] %1) ; CHECK: %labelreturn = alloca i8, align 1 - ; CHECK: [[B:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]] + ; CHECK: [[B:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]] ; CHECK: [[A:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN]] ; CHECK: [[A0:%.*]] = extractvalue { i8, i8 } [[A]], 0 ; CHECK: [[A1:%.*]] = extractvalue { i8, i8 } [[A]], 1 @@ -221,7 +221,7 @@ define {i1, i7} @custom_with_ret({i32, i1} %a, [2 x i7] %b) { define void @custom_without_ret({i32, i1} %a, [2 x i7] %b) { ; CHECK: define linkonce_odr void @"dfsw$custom_without_ret"({ i32, i1 } %0, [2 x i7] %1) - ; CHECK: [[B:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]] + ; CHECK: [[B:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]] ; CHECK: [[A:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN]] ; CHECK: [[A0:%.*]] = extractvalue { i8, i8 } [[A]], 0 ; CHECK: [[A1:%.*]] = extractvalue { i8, i8 } [[A]], 1 diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/arith.ll b/llvm/test/Instrumentation/DataFlowSanitizer/arith.ll index 8c9eb5f..b474383 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/arith.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/arith.ll @@ -1,73 +1,86 @@ -; RUN: opt < %s -passes=dfsan -S | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=dfsan -dfsan-add-global-name-suffix=0 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" define i8 @add(i8 %a, i8 %b) { - ; CHECK: @add.dfsan - ; CHECK-DAG: %[[#ALABEL:]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN:2]] - ; CHECK-DAG: %[[#BLABEL:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]] - ; CHECK: %[[#UNION:]] = or i8 %[[#ALABEL]], %[[#BLABEL]] - ; CHECK: %c = add i8 %a, %b - ; CHECK: store i8 %[[#UNION]], ptr @__dfsan_retval_tls, align [[ALIGN]] - ; CHECK: ret i8 %c +; CHECK-LABEL: define i8 @add( +; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: store i8 [[TMP3]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: ret i8 [[C]] +; %c = add i8 %a, %b ret i8 %c } define i8 @sub(i8 %a, i8 %b) { - ; CHECK: @sub.dfsan - ; CHECK: load{{.*}}__dfsan_arg_tls - ; CHECK: load{{.*}}__dfsan_arg_tls - ; CHECK: or i8 - ; CHECK: %c = sub i8 %a, %b - ; CHECK: store{{.*}}__dfsan_retval_tls - ; CHECK: ret i8 %c +; CHECK-LABEL: define i8 @sub( +; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[C:%.*]] = sub i8 [[A]], [[B]] +; CHECK-NEXT: store i8 [[TMP3]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: ret i8 [[C]] +; %c = sub i8 %a, %b ret i8 %c } define i8 @mul(i8 %a, i8 %b) { - ; CHECK: @mul.dfsan - ; CHECK: load{{.*}}__dfsan_arg_tls - ; CHECK: load{{.*}}__dfsan_arg_tls - ; CHECK: or i8 - ; CHECK: %c = mul i8 %a, %b - ; CHECK: store{{.*}}__dfsan_retval_tls - ; CHECK: ret i8 %c +; CHECK-LABEL: define i8 @mul( +; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[C:%.*]] = mul i8 [[A]], [[B]] +; CHECK-NEXT: store i8 [[TMP3]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: ret i8 [[C]] +; %c = mul i8 %a, %b ret i8 %c } define i8 @sdiv(i8 %a, i8 %b) { - ; CHECK: @sdiv.dfsan - ; CHECK: load{{.*}}__dfsan_arg_tls - ; CHECK: load{{.*}}__dfsan_arg_tls - ; CHECK: or i8 - ; CHECK: %c = sdiv i8 %a, %b - ; CHECK: store{{.*}}__dfsan_retval_tls - ; CHECK: ret i8 %c +; CHECK-LABEL: define i8 @sdiv( +; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[C:%.*]] = sdiv i8 [[A]], [[B]] +; CHECK-NEXT: store i8 [[TMP3]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: ret i8 [[C]] +; %c = sdiv i8 %a, %b ret i8 %c } define i8 @udiv(i8 %a, i8 %b) { - ; CHECK: @udiv.dfsan - ; CHECK: load{{.*}}__dfsan_arg_tls - ; CHECK: load{{.*}}__dfsan_arg_tls - ; CHECK: or i8 - ; CHECK: %c = udiv i8 %a, %b - ; CHECK: store{{.*}}__dfsan_retval_tls - ; CHECK: ret i8 %c +; CHECK-LABEL: define i8 @udiv( +; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[C:%.*]] = udiv i8 [[A]], [[B]] +; CHECK-NEXT: store i8 [[TMP3]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: ret i8 [[C]] +; %c = udiv i8 %a, %b ret i8 %c } define double @fneg(double %a) { - ; CHECK: @fneg.dfsan - ; CHECK: load{{.*}}__dfsan_arg_tls - ; CHECK: %c = fneg double %a - ; CHECK: store{{.*}}__dfsan_retval_tls - ; CHECK: ret double %c +; CHECK-LABEL: define double @fneg( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[C:%.*]] = fneg double [[A]] +; CHECK-NEXT: store i8 [[TMP1]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: ret double [[C]] +; %c = fneg double %a ret double %c } diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/array.ll b/llvm/test/Instrumentation/DataFlowSanitizer/array.ll index 5642edc..14468c1 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/array.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/array.ll @@ -158,7 +158,7 @@ define i1 @extract_array([4 x i1] %a) { define [4 x i1] @insert_array([4 x i1] %a, i1 %e2) { ; NO_COMBINE_LOAD_PTR: @insert_array.dfsan ; NO_COMBINE_LOAD_PTR: [[EM:%.*]] = load i8, ptr - ; NO_COMBINE_LOAD_PTR-SAME: inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN:2]] + ; NO_COMBINE_LOAD_PTR-SAME: getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align [[ALIGN:2]] ; NO_COMBINE_LOAD_PTR: [[AM:%.*]] = load [4 x i8], ptr @__dfsan_arg_tls, align [[ALIGN]] ; NO_COMBINE_LOAD_PTR: [[AM1:%.*]] = insertvalue [4 x i8] [[AM]], i8 [[EM]], 0 ; NO_COMBINE_LOAD_PTR: store [4 x i8] [[AM1]], ptr @__dfsan_retval_tls, align [[ALIGN]] diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/dfsan-pass-second-run.ll b/llvm/test/Instrumentation/DataFlowSanitizer/dfsan-pass-second-run.ll index 7da647b..7f49c14 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/dfsan-pass-second-run.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/dfsan-pass-second-run.ll @@ -5,7 +5,7 @@ target triple = "x86_64-unknown-linux-gnu" define i8 @add(i8 %a, i8 %b) { ; CHECK: @add.dfsan ; CHECK-DAG: %[[#ALABEL:]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN:2]] - ; CHECK-DAG: %[[#BLABEL:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]] + ; CHECK-DAG: %[[#BLABEL:]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]] ; CHECK: %[[#UNION:]] = or i8 %[[#ALABEL]], %[[#BLABEL]] ; CHECK: %c = add i8 %a, %b ; CHECK: store i8 %[[#UNION]], ptr @__dfsan_retval_tls, align [[ALIGN]] diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/dont_combine_offset_labels_on_gep.ll b/llvm/test/Instrumentation/DataFlowSanitizer/dont_combine_offset_labels_on_gep.ll index 997681b..7574346 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/dont_combine_offset_labels_on_gep.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/dont_combine_offset_labels_on_gep.ll @@ -1,19 +1,26 @@ -; RUN: opt < %s -passes=dfsan -dfsan-combine-offset-labels-on-gep=false -S | FileCheck %s -; RUN: opt < %s -passes=dfsan -dfsan-combine-offset-labels-on-gep=false -dfsan-track-origins=1 -S | FileCheck %s --check-prefixes=CHECK,CHECK_ORIGIN +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=dfsan -dfsan-combine-offset-labels-on-gep=false -dfsan-add-global-name-suffix=0 -S | FileCheck %s +; RUN: opt < %s -passes=dfsan -dfsan-combine-offset-labels-on-gep=false -dfsan-track-origins=1 -dfsan-add-global-name-suffix=0 -S | FileCheck %s --check-prefix=CHECK_ORIGIN target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; CHECK: @__dfsan_arg_tls = external thread_local(initialexec) global [[TLS_ARR:\[100 x i64\]]] -; CHECK: @__dfsan_retval_tls = external thread_local(initialexec) global [[TLS_ARR]] define ptr @gepop(ptr %p, i32 %a, i32 %b, i32 %c) { - ; CHECK: @gepop.dfsan - ; CHECK_ORIGIN: %[[#PO:]] = load i32, ptr @__dfsan_arg_origin_tls, align [[ALIGN_O:4]] - ; CHECK: %[[#PS:]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN_S:2]] - ; CHECK: %e = getelementptr [10 x [20 x i32]], ptr %p, i32 %a, i32 %b, i32 %c - ; CHECK: store i8 %[[#PS]], ptr @__dfsan_retval_tls, align [[ALIGN_S]] - ; CHECK_ORIGIN: store i32 %[[#PO]], ptr @__dfsan_retval_origin_tls, align [[ALIGN_O]] - +; CHECK-LABEL: define ptr @gepop( +; CHECK-SAME: ptr [[P:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[E:%.*]] = getelementptr [10 x [20 x i32]], ptr [[P]], i32 [[A]], i32 [[B]], i32 [[C]] +; CHECK-NEXT: store i8 [[TMP1]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: ret ptr [[E]] +; +; CHECK_ORIGIN-LABEL: define ptr @gepop( +; CHECK_ORIGIN-SAME: ptr [[P:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK_ORIGIN-NEXT: [[TMP1:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 +; CHECK_ORIGIN-NEXT: [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK_ORIGIN-NEXT: [[E:%.*]] = getelementptr [10 x [20 x i32]], ptr [[P]], i32 [[A]], i32 [[B]], i32 [[C]] +; CHECK_ORIGIN-NEXT: store i8 [[TMP2]], ptr @__dfsan_retval_tls, align 2 +; CHECK_ORIGIN-NEXT: store i32 [[TMP1]], ptr @__dfsan_retval_origin_tls, align 4 +; CHECK_ORIGIN-NEXT: ret ptr [[E]] +; %e = getelementptr [10 x [20 x i32]], ptr %p, i32 %a, i32 %b, i32 %c ret ptr %e } - diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_abilist.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_abilist.ll index 031fd1c..fbcdb3d 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_abilist.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_abilist.ll @@ -114,7 +114,7 @@ define void @call_custom_without_ret(i32 %a, i32 %b) { ; CHECK: @call_custom_without_ret.dfsan ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 - ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 + ; CHECK: [[BS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 ; CHECK: [[AS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 ; CHECK: call void @__dfso_custom_without_ret(i32 %a, i32 %b, i8 zeroext [[AS]], i8 zeroext [[BS]], i32 zeroext [[AO]], i32 zeroext [[BO]]) ; CHECK-NEXT: ret void @@ -129,7 +129,7 @@ define i32 @call_custom_with_ret(i32 %a, i32 %b) { ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 ; CHECK: %labelreturn = alloca i8, align 1 - ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 + ; CHECK: [[BS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 ; CHECK: [[AS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 ; CHECK: {{.*}} = call i32 @__dfso_custom_with_ret(i32 %a, i32 %b, i8 zeroext [[AS]], i8 zeroext [[BS]], ptr %labelreturn, i32 zeroext [[AO]], i32 zeroext [[BO]], ptr %originreturn) ; CHECK: [[RS:%.*]] = load i8, ptr %labelreturn, align 1 @@ -147,7 +147,7 @@ define void @call_custom_varg_without_ret(i32 %a, i32 %b) { ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 ; CHECK: %labelva = alloca [1 x i8], align 1 - ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 + ; CHECK: [[BS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 ; CHECK: [[AS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 ; CHECK: [[VS0:%.*]] = getelementptr inbounds nuw [1 x i8], ptr %labelva, i32 0, i32 0 ; CHECK: store i8 [[AS]], ptr [[VS0]], align 1 @@ -170,7 +170,7 @@ define i32 @call_custom_varg_with_ret(i32 %a, i32 %b) { ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls ; CHECK: %labelreturn = alloca i8, align 1 ; CHECK: %labelva = alloca [1 x i8], align 1 - ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 + ; CHECK: [[BS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 ; CHECK: [[AS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 ; CHECK: [[VS0:%.*]] = getelementptr inbounds nuw [1 x i8], ptr %labelva, i32 0, i32 0 ; CHECK: store i8 [[BS]], ptr [[VS0]], align 1 @@ -194,7 +194,7 @@ define i32 @call_custom_cb_with_ret(i32 %a, i32 %b) { ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 ; CHECK: %labelreturn = alloca i8, align 1 - ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 + ; CHECK: [[BS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 ; CHECK: [[AS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 ; CHECK: {{.*}} = call i32 @__dfso_custom_cb_with_ret(ptr @cb_with_ret.dfsan, i32 %a, i32 %b, i8 zeroext 0, i8 zeroext [[AS]], i8 zeroext [[BS]], ptr %labelreturn, i32 zeroext 0, i32 zeroext [[AO]], i32 zeroext [[BO]], ptr %originreturn) ; CHECK: [[RS:%.*]] = load i8, ptr %labelreturn, align 1 @@ -210,7 +210,7 @@ define void @call_custom_cb_without_ret(i32 %a, i32 %b) { ; CHECK-LABEL: @call_custom_cb_without_ret.dfsan ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 - ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 + ; CHECK: [[BS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 ; CHECK: [[AS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 ; CHECK: call void @__dfso_custom_cb_without_ret(ptr @cb_without_ret.dfsan, i32 %a, i32 %b, i8 zeroext 0, i8 zeroext [[AS]], i8 zeroext [[BS]], i32 zeroext 0, i32 zeroext [[AO]], i32 zeroext [[BO]]) ; CHECK-NEXT: ret void @@ -228,7 +228,7 @@ define void @call_custom_cb_without_ret(i32 %a, i32 %b) { ; CHECK: define linkonce_odr void @"dfso$custom_without_ret"(i32 %0, i32 %1) ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 ; CHECK-NEXT: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 -; CHECK-NEXT: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 +; CHECK-NEXT: [[BS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 ; CHECK-NEXT: [[AS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 ; CHECK-NEXT: call void @__dfso_custom_without_ret(i32 %0, i32 %1, i8 zeroext [[AS]], i8 zeroext [[BS]], i32 zeroext [[AO]], i32 zeroext [[BO]]) ; CHECK-NEXT: ret void @@ -238,7 +238,7 @@ define void @call_custom_cb_without_ret(i32 %a, i32 %b) { ; CHECK-NEXT: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 ; CHECK-NEXT: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 ; CHECK-NEXT: %labelreturn = alloca i8, align 1 -; CHECK-NEXT: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 +; CHECK-NEXT: [[BS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 ; CHECK-NEXT: [[AS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 ; CHECK-NEXT: [[R:%.*]] = call i32 @__dfso_custom_with_ret(i32 %0, i32 %1, i8 zeroext [[AS]], i8 zeroext [[BS]], ptr %labelreturn, i32 zeroext [[AO]], i32 zeroext [[BO]], ptr %originreturn) ; CHECK-NEXT: [[RS:%.*]] = load i8, ptr %labelreturn, align 1 @@ -261,8 +261,8 @@ define void @call_custom_cb_without_ret(i32 %a, i32 %b) { ; CHECK-NEXT: [[AO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 ; CHECK-NEXT: [[CO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 ; CHECK-NEXT: %labelreturn = alloca i8, align 1 -; CHECK-NEXT: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align 2 -; CHECK-NEXT: [[AS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 +; CHECK-NEXT: [[BS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2 +; CHECK-NEXT: [[AS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 ; CHECK-NEXT: [[CS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 ; CHECK-NEXT: [[R:%.*]] = call i32 @__dfso_custom_cb_with_ret(ptr %0, i32 %1, i32 %2, i8 zeroext [[CS]], i8 zeroext [[AS]], i8 zeroext [[BS]], ptr %labelreturn, i32 zeroext [[CO]], i32 zeroext [[AO]], i32 zeroext [[BO]], ptr %originreturn) ; CHECK-NEXT: [[RS:%.*]] = load i8, ptr %labelreturn, align 1 @@ -275,8 +275,8 @@ define void @call_custom_cb_without_ret(i32 %a, i32 %b) { ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 2), align 4 ; CHECK-NEXT: [[AO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 ; CHECK-NEXT: [[CO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 -; CHECK-NEXT: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align 2 -; CHECK-NEXT: [[AS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 +; CHECK-NEXT: [[BS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2 +; CHECK-NEXT: [[AS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 ; CHECK-NEXT: [[CS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 ; CHECK-NEXT: call void @__dfso_custom_cb_without_ret(ptr %0, i32 %1, i32 %2, i8 zeroext [[CS]], i8 zeroext [[AS]], i8 zeroext [[BS]], i32 zeroext [[CO]], i32 zeroext [[AO]], i32 zeroext [[BO]]) ; CHECK-NEXT: ret void diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_cached_shadows.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_cached_shadows.ll index cb9a306e..194a193 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_cached_shadows.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_cached_shadows.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1 -S | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1 -dfsan-add-global-name-suffix=0 -S | FileCheck %s ; ; %i13 and %i15 have the same key in shadow cache. They should not reuse the same ; shadow because their blocks do not dominate each other. Origin tracking @@ -7,43 +8,129 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; CHECK: @__dfsan_arg_tls = external thread_local(initialexec) global [[TLS_ARR:\[100 x i64\]]] define void @cached_shadows(double %arg) { - ; CHECK: @cached_shadows.dfsan - ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align - ; CHECK: [[AS:%.*]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN:2]] - ; CHECK: [[L1:.+]]: - ; CHECK: {{.*}} = phi i8 - ; CHECK: {{.*}} = phi i32 - ; CHECK: {{.*}} = phi double [ 3.000000e+00 - ; CHECK: [[S_L1:%.*]] = phi i8 [ 0, %[[L0:.*]] ], [ [[S_L7:%.*]], %[[L7:.*]] ] - ; CHECK: [[O_L1:%.*]] = phi i32 [ 0, %[[L0]] ], [ [[O_L7:%.*]], %[[L7]] ] - ; CHECK: [[V_L1:%.*]] = phi double [ 4.000000e+00, %[[L0]] ], [ [[V_L7:%.*]], %[[L7]] ] - ; CHECK: br i1 {{%.+}}, label %[[L2:.*]], label %[[L4:.*]] - ; CHECK: [[L2]]: - ; CHECK: br i1 {{%.+}}, label %[[L3:.+]], label %[[L7]] - ; CHECK: [[L3]]: - ; CHECK: [[S_L3:%.*]] = or i8 - ; CHECK: [[AS_NE_L3:%.*]] = icmp ne i8 [[AS]], 0 - ; CHECK: [[O_L3:%.*]] = select i1 [[AS_NE_L3]], i32 %{{[0-9]+}}, i32 [[O_L1]] - ; CHECK: [[V_L3:%.*]] = fsub double [[V_L1]], %{{.+}} - ; CHECK: br label %[[L7]] - ; CHECK: [[L4]]: - ; CHECK: br i1 %_dfscmp, label %[[L5:.+]], label %[[L6:.+]], - ; CHECK: [[L5]]: - ; CHECK: br label %[[L6]] - ; CHECK: [[L6]]: - ; CHECK: [[S_L6:%.*]] = or i8 - ; CHECK: [[AS_NE_L6:%.*]] = icmp ne i8 [[AS]], 0 - ; CHECK: [[O_L6:%.*]] = select i1 [[AS_NE_L6]], i32 [[AO]], i32 [[O_L1]] - ; CHECK: [[V_L6:%.*]] = fadd double [[V_L1]], %{{.+}} - ; CHECK: br label %[[L7]] - ; CHECK: [[L7]]: - ; CHECK: [[S_L7]] = phi i8 [ [[S_L3]], %[[L3]] ], [ [[S_L1]], %[[L2]] ], [ [[S_L6]], %[[L6]] ] - ; CHECK: [[O_L7]] = phi i32 [ [[O_L3]], %[[L3]] ], [ [[O_L1]], %[[L2]] ], [ [[O_L6]], %[[L6]] ] - ; CHECK: [[V_L7]] = phi double [ [[V_L3]], %[[L3]] ], [ [[V_L1]], %[[L2]] ], [ [[V_L6]], %[[L6]] ] - ; CHECK: br i1 %{{.+}}, label %[[L1]], label %[[L8:.+]] - ; CHECK: [[L8]]: +; CHECK-LABEL: define void @cached_shadows( +; CHECK-SAME: double [[ARG:%.*]]) { +; CHECK-NEXT: [[BB:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[I:%.*]] = alloca double, align 8 +; CHECK-NEXT: [[I1:%.*]] = alloca double, align 8 +; CHECK-NEXT: [[I2:%.*]] = bitcast ptr [[I]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[I]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: store i64 0, ptr [[TMP4]], align 1 +; CHECK-NEXT: store volatile double 1.000000e+00, ptr [[I]], align 8 +; CHECK-NEXT: [[I3:%.*]] = bitcast ptr [[I1]] to ptr +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[I1]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: store i64 0, ptr [[TMP7]], align 1 +; CHECK-NEXT: store volatile double 2.000000e+00, ptr [[I1]], align 8 +; CHECK-NEXT: br label %[[BB4:.*]] +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP8:%.*]] = phi i8 [ 0, %[[BB]] ], [ [[TMP76:%.*]], %[[BB16:.*]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP77:%.*]], %[[BB16]] ] +; CHECK-NEXT: [[I5:%.*]] = phi double [ 3.000000e+00, %[[BB]] ], [ [[I17:%.*]], %[[BB16]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi i8 [ 0, %[[BB]] ], [ [[TMP78:%.*]], %[[BB16]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP79:%.*]], %[[BB16]] ] +; CHECK-NEXT: [[I6:%.*]] = phi double [ 4.000000e+00, %[[BB]] ], [ [[I18:%.*]], %[[BB16]] ] +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[I1]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP13]], 17592186044416 +; CHECK-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 8 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP18]], 32 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP16]], i64 1 +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 8 +; CHECK-NEXT: [[TMP22:%.*]] = lshr i64 [[TMP18]], 32 +; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP18]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = lshr i64 [[TMP23]], 16 +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = lshr i64 [[TMP25]], 8 +; CHECK-NEXT: [[TMP27:%.*]] = or i64 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i8 +; CHECK-NEXT: [[TMP29:%.*]] = icmp ne i64 [[TMP19]], 0 +; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP17]], i32 [[TMP21]] +; CHECK-NEXT: [[I7:%.*]] = load volatile double, ptr [[I1]], align 8 +; CHECK-NEXT: [[I8:%.*]] = fcmp une double [[I7]], 0.000000e+00 +; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[I1]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = xor i64 [[TMP31]], 87960930222080 +; CHECK-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP32]] to ptr +; CHECK-NEXT: [[TMP34:%.*]] = add i64 [[TMP32]], 17592186044416 +; CHECK-NEXT: [[TMP35:%.*]] = inttoptr i64 [[TMP34]] to ptr +; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 8 +; CHECK-NEXT: [[TMP37:%.*]] = load i64, ptr [[TMP33]], align 1 +; CHECK-NEXT: [[TMP38:%.*]] = shl i64 [[TMP37]], 32 +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[TMP35]], i64 1 +; CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 8 +; CHECK-NEXT: [[TMP41:%.*]] = lshr i64 [[TMP37]], 32 +; CHECK-NEXT: [[TMP42:%.*]] = or i64 [[TMP37]], [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = lshr i64 [[TMP42]], 16 +; CHECK-NEXT: [[TMP44:%.*]] = or i64 [[TMP42]], [[TMP43]] +; CHECK-NEXT: [[TMP45:%.*]] = lshr i64 [[TMP44]], 8 +; CHECK-NEXT: [[TMP46:%.*]] = or i64 [[TMP44]], [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = trunc i64 [[TMP46]] to i8 +; CHECK-NEXT: [[TMP48:%.*]] = icmp ne i64 [[TMP38]], 0 +; CHECK-NEXT: [[TMP49:%.*]] = select i1 [[TMP48]], i32 [[TMP36]], i32 [[TMP40]] +; CHECK-NEXT: [[I9:%.*]] = load volatile double, ptr [[I1]], align 8 +; CHECK-NEXT: br i1 [[I8]], label %[[BB10:.*]], label %[[BB14:.*]] +; CHECK: [[BB10]]: +; CHECK-NEXT: [[I11:%.*]] = fcmp une double [[I9]], 0.000000e+00 +; CHECK-NEXT: br i1 [[I11]], label %[[BB12:.*]], label %[[BB16]] +; CHECK: [[BB12]]: +; CHECK-NEXT: [[TMP50:%.*]] = or i8 [[TMP10]], [[TMP1]] +; CHECK-NEXT: [[TMP51:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[TMP0]], i32 [[TMP11]] +; CHECK-NEXT: [[I13:%.*]] = fsub double [[I6]], [[ARG]] +; CHECK-NEXT: br label %[[BB16]] +; CHECK: [[BB14]]: +; CHECK-NEXT: [[TMP53:%.*]] = ptrtoint ptr [[I]] to i64 +; CHECK-NEXT: [[TMP54:%.*]] = xor i64 [[TMP53]], 87960930222080 +; CHECK-NEXT: [[TMP55:%.*]] = inttoptr i64 [[TMP54]] to ptr +; CHECK-NEXT: [[TMP56:%.*]] = add i64 [[TMP54]], 17592186044416 +; CHECK-NEXT: [[TMP57:%.*]] = inttoptr i64 [[TMP56]] to ptr +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <8 x i8> poison, i8 [[TMP47]], i32 0 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <8 x i8> [[TMP58]], i8 [[TMP47]], i32 1 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <8 x i8> [[TMP59]], i8 [[TMP47]], i32 2 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <8 x i8> [[TMP60]], i8 [[TMP47]], i32 3 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <8 x i8> [[TMP61]], i8 [[TMP47]], i32 4 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <8 x i8> [[TMP62]], i8 [[TMP47]], i32 5 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <8 x i8> [[TMP63]], i8 [[TMP47]], i32 6 +; CHECK-NEXT: [[TMP65:%.*]] = insertelement <8 x i8> [[TMP64]], i8 [[TMP47]], i32 7 +; CHECK-NEXT: [[TMP66:%.*]] = getelementptr <8 x i8>, ptr [[TMP55]], i32 0 +; CHECK-NEXT: store <8 x i8> [[TMP65]], ptr [[TMP66]], align 1 +; CHECK-NEXT: [[_DFSCMP:%.*]] = icmp ne i8 [[TMP47]], 0 +; CHECK-NEXT: br i1 [[_DFSCMP]], label %[[BB67:.*]], label %[[BB72:.*]], !prof [[PROF1:![0-9]+]] +; CHECK: [[BB67]]: +; CHECK-NEXT: [[TMP68:%.*]] = call i32 @__dfsan_chain_origin(i32 [[TMP49]]) +; CHECK-NEXT: [[TMP69:%.*]] = zext i32 [[TMP68]] to i64 +; CHECK-NEXT: [[TMP70:%.*]] = shl i64 [[TMP69]], 32 +; CHECK-NEXT: [[TMP71:%.*]] = or i64 [[TMP69]], [[TMP70]] +; CHECK-NEXT: store i64 [[TMP71]], ptr [[TMP57]], align 8 +; CHECK-NEXT: br label %[[BB72]] +; CHECK: [[BB72]]: +; CHECK-NEXT: store volatile double [[I9]], ptr [[I]], align 8 +; CHECK-NEXT: [[TMP73:%.*]] = or i8 [[TMP10]], [[TMP1]] +; CHECK-NEXT: [[TMP74:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[TMP0]], i32 [[TMP11]] +; CHECK-NEXT: [[I15:%.*]] = fadd double [[I6]], [[ARG]] +; CHECK-NEXT: br label %[[BB16]] +; CHECK: [[BB16]]: +; CHECK-NEXT: [[TMP76]] = phi i8 [ [[TMP10]], %[[BB12]] ], [ [[TMP8]], %[[BB10]] ], [ [[TMP10]], %[[BB72]] ] +; CHECK-NEXT: [[TMP77]] = phi i32 [ [[TMP11]], %[[BB12]] ], [ [[TMP9]], %[[BB10]] ], [ [[TMP11]], %[[BB72]] ] +; CHECK-NEXT: [[I17]] = phi double [ [[I6]], %[[BB12]] ], [ [[I5]], %[[BB10]] ], [ [[I6]], %[[BB72]] ] +; CHECK-NEXT: [[TMP78]] = phi i8 [ [[TMP50]], %[[BB12]] ], [ [[TMP10]], %[[BB10]] ], [ [[TMP73]], %[[BB72]] ] +; CHECK-NEXT: [[TMP79]] = phi i32 [ [[TMP52]], %[[BB12]] ], [ [[TMP11]], %[[BB10]] ], [ [[TMP75]], %[[BB72]] ] +; CHECK-NEXT: [[I18]] = phi double [ [[I13]], %[[BB12]] ], [ [[I6]], %[[BB10]] ], [ [[I15]], %[[BB72]] ] +; CHECK-NEXT: [[I19:%.*]] = fcmp olt double [[I17]], 9.900000e+01 +; CHECK-NEXT: br i1 [[I19]], label %[[BB4]], label %[[BB20:.*]] +; CHECK: [[BB20]]: +; CHECK-NEXT: ret void +; bb: %i = alloca double, align 8 %i1 = alloca double, align 8 @@ -83,3 +170,6 @@ bb16: ; preds = %bb14, %bb12, %bb10 bb20: ; preds = %bb16 ret void } +;. +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} +;. diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_call.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_call.ll index 5ee9927..9e8d015 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_call.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_call.ll @@ -37,8 +37,8 @@ i1 %a200 define i1 @param_overflow(i1 %a) { ; CHECK: @param_overflow.dfsan ; CHECK: store i32 %1, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 199), align 4 - ; CHECK-NEXT: store i8 %2, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 398) to ptr), align 2 - ; CHECK-NEXT: store i8 %2, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 400) to ptr), align 2 + ; CHECK-NEXT: store i8 %2, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 398), align 2 + ; CHECK-NEXT: store i8 %2, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 400), align 2 ; CHECK-NEXT: %r = call i1 @arg_overflow.dfsan ; CHECK: %_dfsret_o = load i32, ptr @__dfsan_retval_origin_tls, align 4 ; CHECK: store i32 %_dfsret_o, ptr @__dfsan_retval_origin_tls, align 4 diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll index 0c84c79..a0c642a 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll @@ -93,7 +93,7 @@ define i16 @load16(i1 %i, ptr %p) { ; CHECK-LABEL: @load16.dfsan ; COMBINE_LOAD_PTR-NEXT: %[[#PO:]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 - ; COMBINE_LOAD_PTR-NEXT: %[[#PS:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]] + ; COMBINE_LOAD_PTR-NEXT: %[[#PS:]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]] ; CHECK-NEXT: %[[#INTP:]] = ptrtoint ptr %p to i64 ; CHECK-NEXT: %[[#SHADOW_OFFSET:]] = xor i64 %[[#INTP]], [[#MASK]] diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_mem_intrinsic.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_mem_intrinsic.ll index f8adb01..f4f3cb5 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_mem_intrinsic.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_mem_intrinsic.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1 -S | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1 -dfsan-add-global-name-suffix=0 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -7,32 +8,54 @@ declare void @llvm.memmove.p0.p0.i32(ptr, ptr, i32, i1) declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) define void @memcpy(ptr %d, ptr %s, i32 %l) { - ; CHECK: @memcpy.dfsan - ; CHECK: [[L64:%.*]] = zext i32 %l to i64 - ; CHECK: call void @__dfsan_mem_origin_transfer(ptr %d, ptr %s, i64 [[L64]]) - ; CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 1 {{.*}}, ptr align 1 {{.*}}, i32 {{.*}}, i1 false) - ; CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %d, ptr %s, i32 %l, i1 false) - +; CHECK-LABEL: define void @memcpy( +; CHECK-SAME: ptr [[D:%.*]], ptr [[S:%.*]], i32 [[L:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[L]] to i64 +; CHECK-NEXT: call void @__dfsan_mem_origin_transfer(ptr [[D]], ptr [[S]], i64 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[D]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[S]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[L]], 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TMP4]], ptr align 1 [[TMP7]], i32 [[TMP8]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[D]], ptr [[S]], i32 [[L]], i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memcpy.p0.p0.i32(ptr %d, ptr %s, i32 %l, i1 0) ret void } define void @memmove(ptr %d, ptr %s, i32 %l) { - ; CHECK: @memmove.dfsan - ; CHECK: [[L64:%.*]] = zext i32 %l to i64 - ; CHECK: call void @__dfsan_mem_origin_transfer(ptr %d, ptr %s, i64 [[L64]]) - ; CHECK: call void @llvm.memmove.p0.p0.i32(ptr align 1 {{.*}}, ptr align 1 {{.*}}, i32 {{.*}}, i1 false) - ; CHECK: call void @llvm.memmove.p0.p0.i32(ptr %d, ptr %s, i32 %l, i1 false) - +; CHECK-LABEL: define void @memmove( +; CHECK-SAME: ptr [[D:%.*]], ptr [[S:%.*]], i32 [[L:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[L]] to i64 +; CHECK-NEXT: call void @__dfsan_mem_origin_transfer(ptr [[D]], ptr [[S]], i64 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[D]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[S]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[L]], 1 +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i32(ptr align 1 [[TMP4]], ptr align 1 [[TMP7]], i32 [[TMP8]], i1 false) +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i32(ptr [[D]], ptr [[S]], i32 [[L]], i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memmove.p0.p0.i32(ptr %d, ptr %s, i32 %l, i1 0) ret void } define void @memset(ptr %p, i8 %v) { - ; CHECK: @memset.dfsan - ; CHECK: [[O:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 - ; CHECK: [[S:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]] - ; CHECK: call void @__dfsan_set_label(i8 [[S]], i32 [[O]], ptr %p, i64 1) +; CHECK-LABEL: define void @memset( +; CHECK-SAME: ptr [[P:%.*]], i8 [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; CHECK-NEXT: call void @__dfsan_set_label(i8 [[TMP2]], i32 [[TMP1]], ptr [[P]], i64 1) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[P]], i8 [[V]], i64 1, i1 true) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0.i64(ptr %p, i8 %v, i64 1, i1 1) ret void -}
\ No newline at end of file +} diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_other_ops.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_other_ops.ll index 3b10204..f409143 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_other_ops.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_other_ops.ll @@ -1,140 +1,200 @@ -; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1 -S | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1 -dfsan-add-global-name-suffix=0 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; CHECK: @__dfsan_arg_tls = external thread_local(initialexec) global [[TLS_ARR:\[100 x i64\]]] -; CHECK: @__dfsan_retval_tls = external thread_local(initialexec) global [[TLS_ARR]] define float @unop(float %f) { - ; CHECK: @unop.dfsan - ; CHECK: [[FO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 - ; CHECK: store i32 [[FO]], ptr @__dfsan_retval_origin_tls, align 4 - +; CHECK-LABEL: define float @unop( +; CHECK-SAME: float [[F:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[R:%.*]] = fneg float [[F]] +; CHECK-NEXT: store i8 [[TMP2]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: store i32 [[TMP1]], ptr @__dfsan_retval_origin_tls, align 4 +; CHECK-NEXT: ret float [[R]] +; %r = fneg float %f ret float %r } define i1 @binop(i1 %a, i1 %b) { - ; CHECK: @binop.dfsan - ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 - ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 - ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 - ; CHECK: [[NE:%.*]] = icmp ne i8 [[BS]], 0 - ; CHECK: [[MO:%.*]] = select i1 [[NE]], i32 [[BO]], i32 [[AO]] - ; CHECK: store i32 [[MO]], ptr @__dfsan_retval_origin_tls, align 4 - +; CHECK-LABEL: define i1 @binop( +; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[TMP5:%.*]] = or i8 [[TMP4]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP1]], i32 [[TMP2]] +; CHECK-NEXT: [[R:%.*]] = add i1 [[A]], [[B]] +; CHECK-NEXT: store i8 [[TMP5]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: store i32 [[TMP7]], ptr @__dfsan_retval_origin_tls, align 4 +; CHECK-NEXT: ret i1 [[R]] +; %r = add i1 %a, %b ret i1 %r } define i8 @castop(ptr %p) { - ; CHECK: @castop.dfsan - ; CHECK: [[PO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 - ; CHECK: store i32 [[PO]], ptr @__dfsan_retval_origin_tls, align 4 - +; CHECK-LABEL: define i8 @castop( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[R:%.*]] = ptrtoint ptr [[P]] to i8 +; CHECK-NEXT: store i8 [[TMP2]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: store i32 [[TMP1]], ptr @__dfsan_retval_origin_tls, align 4 +; CHECK-NEXT: ret i8 [[R]] +; %r = ptrtoint ptr %p to i8 ret i8 %r } define i1 @cmpop(i1 %a, i1 %b) { - ; CHECK: @cmpop.dfsan - ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 - ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 - ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 - ; CHECK: [[NE:%.*]] = icmp ne i8 [[BS]], 0 - ; CHECK: [[MO:%.*]] = select i1 [[NE]], i32 [[BO]], i32 [[AO]] - ; CHECK: store i32 [[MO]], ptr @__dfsan_retval_origin_tls, align 4 - +; CHECK-LABEL: define i1 @cmpop( +; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[TMP5:%.*]] = or i8 [[TMP4]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP1]], i32 [[TMP2]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i1 [[A]], [[B]] +; CHECK-NEXT: store i8 [[TMP5]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: store i32 [[TMP7]], ptr @__dfsan_retval_origin_tls, align 4 +; CHECK-NEXT: ret i1 [[R]] +; %r = icmp eq i1 %a, %b ret i1 %r } define ptr @gepop(ptr %p, i32 %a, i32 %b, i32 %c) { - ; CHECK: @gepop.dfsan - ; CHECK: [[CO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 3), align 4 - ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 2), align 4 - ; CHECK: [[AO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 - ; CHECK: [[PO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 - ; CHECK: [[CS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 6) to ptr), align 2 - ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align 2 - ; CHECK: [[AS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 - ; CHECK: [[AS_NE:%.*]] = icmp ne i8 [[AS]], 0 - ; CHECK: [[APO:%.*]] = select i1 [[AS_NE]], i32 [[AO]], i32 [[PO]] - ; CHECK: [[BS_NE:%.*]] = icmp ne i8 [[BS]], 0 - ; CHECK: [[ABPO:%.*]] = select i1 [[BS_NE]], i32 [[BO]], i32 [[APO]] - ; CHECK: [[CS_NE:%.*]] = icmp ne i8 [[CS]], 0 - ; CHECK: [[ABCPO:%.*]] = select i1 [[CS_NE]], i32 [[CO]], i32 [[ABPO]] - ; CHECK: store i32 [[ABCPO]], ptr @__dfsan_retval_origin_tls, align 4 - +; CHECK-LABEL: define ptr @gepop( +; CHECK-SAME: ptr [[P:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 3), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 2), align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 6), align 2 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[TMP9:%.*]] = or i8 [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = or i8 [[TMP9]], [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = or i8 [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP7]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP3]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp ne i8 [[TMP6]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP2]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP1]], i32 [[TMP15]] +; CHECK-NEXT: [[E:%.*]] = getelementptr [10 x [20 x i32]], ptr [[P]], i32 [[A]], i32 [[B]], i32 [[C]] +; CHECK-NEXT: store i8 [[TMP11]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: store i32 [[TMP17]], ptr @__dfsan_retval_origin_tls, align 4 +; CHECK-NEXT: ret ptr [[E]] +; %e = getelementptr [10 x [20 x i32]], ptr %p, i32 %a, i32 %b, i32 %c ret ptr %e } define i32 @eeop(<4 x i32> %a, i32 %b) { - ; CHECK: @eeop.dfsan - ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 - ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 - ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 - ; CHECK: [[NE:%.*]] = icmp ne i8 [[BS]], 0 - ; CHECK: [[MO:%.*]] = select i1 [[NE]], i32 [[BO]], i32 [[AO]] - ; CHECK: store i32 [[MO]], ptr @__dfsan_retval_origin_tls, align 4 - +; CHECK-LABEL: define i32 @eeop( +; CHECK-SAME: <4 x i32> [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[TMP5:%.*]] = or i8 [[TMP4]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP1]], i32 [[TMP2]] +; CHECK-NEXT: [[E:%.*]] = extractelement <4 x i32> [[A]], i32 [[B]] +; CHECK-NEXT: store i8 [[TMP5]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: store i32 [[TMP7]], ptr @__dfsan_retval_origin_tls, align 4 +; CHECK-NEXT: ret i32 [[E]] +; %e = extractelement <4 x i32> %a, i32 %b ret i32 %e } define <4 x i32> @ieop(<4 x i32> %p, i32 %a, i32 %b) { - ; CHECK: @ieop.dfsan - ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 2), align 4 - ; CHECK: [[AO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 - ; CHECK: [[PO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 - ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align 2 - ; CHECK: [[AS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 - ; CHECK: [[AS_NE:%.*]] = icmp ne i8 [[AS]], 0 - ; CHECK: [[APO:%.*]] = select i1 [[AS_NE]], i32 [[AO]], i32 [[PO]] - ; CHECK: [[BS_NE:%.*]] = icmp ne i8 [[BS]], 0 - ; CHECK: [[ABPO:%.*]] = select i1 [[BS_NE]], i32 [[BO]], i32 [[APO]] - ; CHECK: store i32 [[ABPO]], ptr @__dfsan_retval_origin_tls, align 4 - +; CHECK-LABEL: define <4 x i32> @ieop( +; CHECK-SAME: <4 x i32> [[P:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 2), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2 +; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[TMP7:%.*]] = or i8 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = or i8 [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP2]], i32 [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP1]], i32 [[TMP10]] +; CHECK-NEXT: [[E:%.*]] = insertelement <4 x i32> [[P]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: store i8 [[TMP8]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: store i32 [[TMP12]], ptr @__dfsan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <4 x i32> [[E]] +; %e = insertelement <4 x i32> %p, i32 %a, i32 %b ret <4 x i32> %e } define <4 x i32> @svop(<4 x i32> %a, <4 x i32> %b) { - ; CHECK: @svop.dfsan - ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 - ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 - ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 - ; CHECK: [[NE:%.*]] = icmp ne i8 [[BS]], 0 - ; CHECK: [[MO:%.*]] = select i1 [[NE]], i32 [[BO]], i32 [[AO]] - ; CHECK: store i32 [[MO]], ptr @__dfsan_retval_origin_tls, align 4 - +; CHECK-LABEL: define <4 x i32> @svop( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[TMP5:%.*]] = or i8 [[TMP4]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP1]], i32 [[TMP2]] +; CHECK-NEXT: [[E:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> +; CHECK-NEXT: store i8 [[TMP5]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: store i32 [[TMP7]], ptr @__dfsan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <4 x i32> [[E]] +; %e = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5> ret <4 x i32> %e -} +} define i32 @evop({i32, float} %a) { - ; CHECK: @evop.dfsan - ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 - ; CHECK: store i32 [[AO]], ptr @__dfsan_retval_origin_tls, align 4 - +; CHECK-LABEL: define i32 @evop( +; CHECK-SAME: { i32, float } [[A:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i8, i8 } [[TMP2]], 0 +; CHECK-NEXT: [[E:%.*]] = extractvalue { i32, float } [[A]], 0 +; CHECK-NEXT: store i8 [[TMP3]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: store i32 [[TMP1]], ptr @__dfsan_retval_origin_tls, align 4 +; CHECK-NEXT: ret i32 [[E]] +; %e = extractvalue {i32, float} %a, 0 ret i32 %e } +; COMM: TODO simplify the expression 4 to +; COMM: 6, if shadow-tls-alignment is updated to match shadow define {i32, {float, float}} @ivop({i32, {float, float}} %a, {float, float} %b) { - ; CHECK: @ivop.dfsan - ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 - ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 - ; COMM: TODO simplify the expression 4 to - ; COMM: 6, if shadow-tls-alignment is updated to match shadow - ; CHECK: [[BS:%.*]] = load { i8, i8 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align 2 - ; CHECK: [[BS0:%.*]] = extractvalue { i8, i8 } [[BS]], 0 - ; CHECK: [[BS1:%.*]] = extractvalue { i8, i8 } [[BS]], 1 - ; CHECK: [[BS01:%.*]] = or i8 [[BS0]], [[BS1]] - ; CHECK: [[NE:%.*]] = icmp ne i8 [[BS01]], 0 - ; CHECK: [[MO:%.*]] = select i1 [[NE]], i32 [[BO]], i32 [[AO]] - ; CHECK: store i32 [[MO]], ptr @__dfsan_retval_origin_tls, align 4 - +; CHECK-LABEL: define { i32, { float, float } } @ivop( +; CHECK-SAME: { i32, { float, float } } [[A:%.*]], { float, float } [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load { i8, i8 }, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2 +; CHECK-NEXT: [[TMP4:%.*]] = load { i8, { i8, i8 } }, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { i8, { i8, i8 } } [[TMP4]], { i8, i8 } [[TMP3]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i8, i8 } [[TMP3]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i8, i8 } [[TMP3]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = or i8 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i8 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP1]], i32 [[TMP2]] +; CHECK-NEXT: [[E:%.*]] = insertvalue { i32, { float, float } } [[A]], { float, float } [[B]], 1 +; CHECK-NEXT: store { i8, { i8, i8 } } [[TMP5]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: store i32 [[TMP10]], ptr @__dfsan_retval_origin_tls, align 4 +; CHECK-NEXT: ret { i32, { float, float } } [[E]] +; %e = insertvalue {i32, {float, float}} %a, {float, float} %b, 1 ret {i32, {float, float}} %e } diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_phi.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_phi.ll index e98dd2b..b69c383 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_phi.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_phi.ll @@ -1,41 +1,50 @@ -; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1 -S | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1 -dfsan-add-global-name-suffix=0 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; CHECK: @__dfsan_arg_tls = external thread_local(initialexec) global [[TLS_ARR:\[100 x i64\]]] define i32 @phiop(i32 %a, i32 %b, i1 %c) { - ; CHECK: @phiop.dfsan - ; CHECK: entry: - ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 - ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 - ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]] - ; CHECK: [[AS:%.*]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]] - ; CHECK: br i1 %c, label %next, label %done - ; CHECK: next: - ; CHECK: br i1 %c, label %T, label %F - ; CHECK: T: - ; CHECK: [[BS_NE:%.*]] = icmp ne i8 [[BS]], 0 - ; CHECK: [[BAO_T:%.*]] = select i1 [[BS_NE]], i32 [[BO]], i32 [[AO]] - ; CHECK: br label %done - ; CHECK: F: - ; CHECK: [[AS_NE:%.*]] = icmp ne i8 [[AS]], 0 - ; CHECK: [[BAO_F:%.*]] = select i1 [[AS_NE]], i32 [[AO]], i32 [[BO]] - ; CHECK: br label %done - ; CHECK: done: - ; CHECK: [[PO:%.*]] = phi i32 [ [[BAO_T]], %T ], [ [[BAO_F]], %F ], [ [[AO]], %entry ] - ; CHECK: store i32 [[PO]], ptr @__dfsan_retval_origin_tls, align 4 - +; CHECK-LABEL: define i32 @phiop( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: br i1 [[C]], label %[[NEXT:.*]], label %[[DONE:.*]] +; CHECK: [[NEXT]]: +; CHECK-NEXT: br i1 [[C]], label %[[T:.*]], label %[[F:.*]] +; CHECK: [[T]]: +; CHECK-NEXT: [[TMP4:%.*]] = or i8 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i8 [[TMP2]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP0]], i32 [[TMP1]] +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[A]], [[B]] +; CHECK-NEXT: br label %[[DONE]] +; CHECK: [[F]]: +; CHECK-NEXT: [[TMP7:%.*]] = or i8 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP1]], i32 [[TMP0]] +; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[B]], [[A]] +; CHECK-NEXT: br label %[[DONE]] +; CHECK: [[DONE]]: +; CHECK-NEXT: [[TMP10:%.*]] = phi i8 [ [[TMP4]], %[[T]] ], [ [[TMP7]], %[[F]] ], [ [[TMP3]], %[[ENTRY]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ [[TMP6]], %[[T]] ], [ [[TMP9]], %[[F]] ], [ [[TMP1]], %[[ENTRY]] ] +; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[SUM]], %[[T]] ], [ [[DIFF]], %[[F]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-NEXT: store i8 [[TMP10]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: store i32 [[TMP11]], ptr @__dfsan_retval_origin_tls, align 4 +; CHECK-NEXT: ret i32 [[R]] +; entry: br i1 %c, label %next, label %done -next: - br i1 %c, label %T, label %F +next: + br i1 %c, label %T, label %F T: - %sum = add i32 %a, %b + %sum = add i32 %a, %b br label %done F: - %diff = sub i32 %b, %a + %diff = sub i32 %b, %a br label %done done: %r = phi i32 [%sum, %T], [%diff, %F], [%a, %entry] ret i32 %r -}
\ No newline at end of file +} diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_select.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_select.ll index 133bf22..2839897 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_select.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_select.ll @@ -48,7 +48,7 @@ define <4 x i8> @select8v(<4 x i1> %c, <4 x i8> %t, <4 x i8> %f) { ; TRACK_CONTROL_FLOW: [[CO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 ; TRACK_CONTROL_FLOW: [[FO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 2), align 4 ; TRACK_CONTROL_FLOW: [[TO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 - ; TRACK_CONTROL_FLOW: [[FS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align 2 + ; TRACK_CONTROL_FLOW: [[FS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2 ; TRACK_CONTROL_FLOW: [[CS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 ; TRACK_CONTROL_FLOW: [[FS_NE:%.*]] = icmp ne i8 [[FS]], 0 ; TRACK_CONTROL_FLOW: [[FTO:%.*]] = select i1 [[FS_NE]], i32 [[FO]], i32 [[TO]] @@ -59,11 +59,11 @@ define <4 x i8> @select8v(<4 x i1> %c, <4 x i8> %t, <4 x i8> %f) { ; NO_TRACK_CONTROL_FLOW: @select8v.dfsan ; NO_TRACK_CONTROL_FLOW: [[FO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 2), align 4 ; NO_TRACK_CONTROL_FLOW: [[TO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 - ; NO_TRACK_CONTROL_FLOW: [[FS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align 2 + ; NO_TRACK_CONTROL_FLOW: [[FS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2 ; NO_TRACK_CONTROL_FLOW: [[FS_NE:%.*]] = icmp ne i8 [[FS]], 0 ; NO_TRACK_CONTROL_FLOW: [[FTO:%.*]] = select i1 [[FS_NE]], i32 [[FO]], i32 [[TO]] ; NO_TRACK_CONTROL_FLOW: store i32 [[FTO]], ptr @__dfsan_retval_origin_tls, align 4 %a = select <4 x i1> %c, <4 x i8> %t, <4 x i8> %f ret <4 x i8> %a -}
\ No newline at end of file +} diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll index 0b0ba40..55b0a01 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll @@ -75,7 +75,7 @@ define void @store64_align8(ptr %p, i64 %a) { ; COMBINE_STORE_PTR-NEXT: %[[#PS:]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]] ; CHECK-NEXT: %[[#AO:]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 - ; CHECK-NEXT: %[[#AS:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]] + ; CHECK-NEXT: %[[#AS:]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]] ; COMBINE_STORE_PTR-NEXT: %[[#AS:]] = or i8 %[[#AS]], %[[#PS]] ; COMBINE_STORE_PTR-NEXT: %[[#NE:]] = icmp ne i8 %[[#PS]], 0 @@ -104,7 +104,7 @@ define void @store64_align2(ptr %p, i64 %a) { ; COMBINE_STORE_PTR-NEXT: %[[#PS:]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]] ; CHECK-NEXT: %[[#AO:]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 - ; CHECK-NEXT: %[[#AS:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]] + ; CHECK-NEXT: %[[#AS:]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]] ; COMBINE_STORE_PTR-NEXT: %[[#AS:]] = or i8 %[[#AS]], %[[#PS]] ; COMBINE_STORE_PTR-NEXT: %[[#NE:]] = icmp ne i8 %[[#PS]], 0 @@ -131,7 +131,7 @@ define void @store96_align8(ptr %p, i96 %a) { ; COMBINE_STORE_PTR-NEXT: %[[#PS:]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]] ; CHECK-NEXT: %[[#AO:]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 - ; CHECK-NEXT: %[[#AS:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]] + ; CHECK-NEXT: %[[#AS:]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]] ; COMBINE_STORE_PTR-NEXT: %[[#AS:]] = or i8 %[[#AS]], %[[#PS]] ; COMBINE_STORE_PTR-NEXT: %[[#NE:]] = icmp ne i8 %[[#PS]], 0 diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_store_threshold.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_store_threshold.ll index 3630ebc..8b526f1 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_store_threshold.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_store_threshold.ll @@ -1,16 +1,37 @@ -; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1 -dfsan-instrument-with-call-threshold=0 -S | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1 -dfsan-instrument-with-call-threshold=0 -dfsan-add-global-name-suffix=0 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" define void @store_threshold(ptr %p, [2 x i64] %a) { - ; CHECK: @store_threshold.dfsan - ; CHECK: [[AO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 - ; CHECK: [[AS:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 - ; CHECK: [[AS0:%.*]] = extractvalue [2 x i8] [[AS]], 0 - ; CHECK: [[AS1:%.*]] = extractvalue [2 x i8] [[AS]], 1 - ; CHECK: [[AS01:%.*]] = or i8 [[AS0]], [[AS1]] - ; CHECK: call void @__dfsan_maybe_store_origin(i8 [[AS01]], ptr %p, i64 16, i32 [[AO]]) - ; CHECK: store [2 x i64] %a, ptr %p, align 8 +; CHECK-LABEL: define void @store_threshold( +; CHECK-SAME: ptr [[P:%.*]], [2 x i64] [[A:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [2 x i8] [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [2 x i8] [[TMP2]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = or i8 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP7]], 17592186044416 +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x i8> [[TMP11]], i8 [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x i8> [[TMP12]], i8 [[TMP5]], i32 2 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP5]], i32 3 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i8> [[TMP14]], i8 [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i8> [[TMP15]], i8 [[TMP5]], i32 5 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x i8> [[TMP16]], i8 [[TMP5]], i32 6 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x i8> [[TMP17]], i8 [[TMP5]], i32 7 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr <8 x i8>, ptr [[TMP8]], i32 0 +; CHECK-NEXT: store <8 x i8> [[TMP18]], ptr [[TMP19]], align 1 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr <8 x i8>, ptr [[TMP8]], i32 1 +; CHECK-NEXT: store <8 x i8> [[TMP18]], ptr [[TMP20]], align 1 +; CHECK-NEXT: call void @__dfsan_maybe_store_origin(i8 [[TMP5]], ptr [[P]], i64 16, i32 [[TMP1]]) +; CHECK-NEXT: store [2 x i64] [[A]], ptr [[P]], align 8 +; CHECK-NEXT: ret void +; store [2 x i64] %a, ptr %p ret void diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_track_load.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_track_load.ll index b93d2eb..f967ccf 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_track_load.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_track_load.ll @@ -1,27 +1,26 @@ -; RUN: opt < %s -passes=dfsan -dfsan-track-origins=2 -S | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=dfsan -dfsan-track-origins=2 -dfsan-add-global-name-suffix=0 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" define i64 @load64(ptr %p) { - ; CHECK-LABEL: @load64.dfsan - - ; CHECK-NEXT: %[[#PO:]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 - ; CHECK-NEXT: %[[#PS:]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN:2]] - - ; CHECK-NEXT: %[[#LABEL_ORIGIN:]] = call zeroext i64 @__dfsan_load_label_and_origin(ptr %p, i64 8) - ; CHECK-NEXT: %[[#LABEL_ORIGIN_H32:]] = lshr i64 %[[#LABEL_ORIGIN]], 32 - ; CHECK-NEXT: %[[#LABEL:]] = trunc i64 %[[#LABEL_ORIGIN_H32]] to i8 - ; CHECK-NEXT: %[[#ORIGIN:]] = trunc i64 %[[#LABEL_ORIGIN]] to i32 - ; CHECK-NEXT: %[[#ORIGIN_CHAINED:]] = call i32 @__dfsan_chain_origin_if_tainted(i8 %[[#LABEL]], i32 %[[#ORIGIN]]) - - ; CHECK-NEXT: %[[#LABEL:]] = or i8 %[[#LABEL]], %[[#PS]] - ; CHECK-NEXT: %[[#NZ:]] = icmp ne i8 %[[#PS]], 0 - ; CHECK-NEXT: %[[#ORIGIN_SEL:]] = select i1 %[[#NZ]], i32 %[[#PO]], i32 %[[#ORIGIN_CHAINED]] - - ; CHECK-NEXT: %a = load i64, ptr %p - ; CHECK-NEXT: store i8 %[[#LABEL]], ptr @__dfsan_retval_tls, align [[ALIGN]] - ; CHECK-NEXT: store i32 %[[#ORIGIN_SEL]], ptr @__dfsan_retval_origin_tls, align 4 - +; CHECK-LABEL: define i64 @load64( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[TMP3:%.*]] = call zeroext i64 @__dfsan_load_label_and_origin(ptr [[P]], i64 8) +; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP3]], 32 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i8 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @__dfsan_chain_origin_if_tainted(i8 [[TMP5]], i32 [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = or i8 [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i8 [[TMP2]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP1]], i32 [[TMP7]] +; CHECK-NEXT: [[A:%.*]] = load i64, ptr [[P]], align 8 +; CHECK-NEXT: store i8 [[TMP8]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: store i32 [[TMP10]], ptr @__dfsan_retval_origin_tls, align 4 +; CHECK-NEXT: ret i64 [[A]] +; %a = load i64, ptr %p ret i64 %a } diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/phi.ll b/llvm/test/Instrumentation/DataFlowSanitizer/phi.ll index 592d3eb..ecf0d9c8 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/phi.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/phi.ll @@ -1,26 +1,41 @@ -; RUN: opt < %s -passes=dfsan -S | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=dfsan -dfsan-add-global-name-suffix=0 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" define {i32, i32} @test({i32, i32} %a, i1 %c) { - ; CHECK: %[[#AL:]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN:2]] - ; CHECK: %[[#AL0:]] = insertvalue { i8, i8 } %[[#AL]], i8 0, 0 - ; CHECK: %[[#AL1:]] = insertvalue { i8, i8 } %[[#AL]], i8 0, 1 - ; CHECK: %[[#PL:]] = phi { i8, i8 } [ %[[#AL0]], %T ], [ %[[#AL1]], %F ] - ; CHECK: store { i8, i8 } %[[#PL]], ptr @__dfsan_retval_tls, align [[ALIGN]] +; CHECK-LABEL: define { i32, i32 } @test( +; CHECK-SAME: { i32, i32 } [[A:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: br i1 [[C]], label %[[T:.*]], label %[[F:.*]] +; CHECK: [[T]]: +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i8, i8 } [[TMP0]], i8 0, 0 +; CHECK-NEXT: [[AT:%.*]] = insertvalue { i32, i32 } [[A]], i32 1, 0 +; CHECK-NEXT: br label %[[DONE:.*]] +; CHECK: [[F]]: +; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i8, i8 } [[TMP0]], i8 0, 1 +; CHECK-NEXT: [[AF:%.*]] = insertvalue { i32, i32 } [[A]], i32 1, 1 +; CHECK-NEXT: br label %[[DONE]] +; CHECK: [[DONE]]: +; CHECK-NEXT: [[TMP3:%.*]] = phi { i8, i8 } [ [[TMP1]], %[[T]] ], [ [[TMP2]], %[[F]] ] +; CHECK-NEXT: [[B:%.*]] = phi { i32, i32 } [ [[AT]], %[[T]] ], [ [[AF]], %[[F]] ] +; CHECK-NEXT: store { i8, i8 } [[TMP3]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: ret { i32, i32 } [[B]] +; entry: br i1 %c, label %T, label %F - + T: %at = insertvalue {i32, i32} %a, i32 1, 0 br label %done - + F: %af = insertvalue {i32, i32} %a, i32 1, 1 br label %done - + done: %b = phi {i32, i32} [%at, %T], [%af, %F] - ret {i32, i32} %b + ret {i32, i32} %b } diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/select.ll b/llvm/test/Instrumentation/DataFlowSanitizer/select.ll index 5056616..005648b 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/select.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/select.ll @@ -1,74 +1,81 @@ -; RUN: opt < %s -passes=dfsan -dfsan-track-select-control-flow=true -S | FileCheck %s --check-prefixes=CHECK,TRACK_CF -; RUN: opt < %s -passes=dfsan -dfsan-track-select-control-flow=false -S | FileCheck %s --check-prefixes=CHECK,NO_TRACK_CF +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=dfsan -dfsan-track-select-control-flow=true -dfsan-add-global-name-suffix=0 -S | FileCheck %s --check-prefixes=CHECK,TRACK_CF +; RUN: opt < %s -passes=dfsan -dfsan-track-select-control-flow=false -dfsan-add-global-name-suffix=0 -S | FileCheck %s --check-prefixes=CHECK,NO_TRACK_CF target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; CHECK: @__dfsan_arg_tls = external thread_local(initialexec) global [[TLS_ARR:\[100 x i64\]]] -; CHECK: @__dfsan_retval_tls = external thread_local(initialexec) global [[TLS_ARR]] define i8 @select8(i1 %c, i8 %t, i8 %f) { - ; TRACK_CF: @select8.dfsan - ; TRACK_CF: %[[#R:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN:2]] - ; TRACK_CF: %[[#R+1]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]] - ; TRACK_CF: %[[#R+2]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]] - ; TRACK_CF: %[[#R+3]] = select i1 %c, i8 %[[#R+1]], i8 %[[#R]] - ; TRACK_CF: %[[#RO:]] = or i8 %[[#R+2]], %[[#R+3]] - ; TRACK_CF: %a = select i1 %c, i8 %t, i8 %f - ; TRACK_CF: store i8 %[[#RO]], ptr @__dfsan_retval_tls, align [[ALIGN]] - ; TRACK_CF: ret i8 %a - - ; NO_TRACK_CF: @select8.dfsan - ; NO_TRACK_CF: %[[#R:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN:2]] - ; NO_TRACK_CF: %[[#R+1]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]] - ; NO_TRACK_CF: %[[#R+2]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]] - ; NO_TRACK_CF: %[[#R+3]] = select i1 %c, i8 %[[#R+1]], i8 %[[#R]] - ; NO_TRACK_CF: %a = select i1 %c, i8 %t, i8 %f - ; NO_TRACK_CF: store i8 %[[#R+3]], ptr @__dfsan_retval_tls, align [[ALIGN]] - ; NO_TRACK_CF: ret i8 %a - +; TRACK_CF-LABEL: define i8 @select8( +; TRACK_CF-SAME: i1 [[C:%.*]], i8 [[T:%.*]], i8 [[F:%.*]]) { +; TRACK_CF-NEXT: [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2 +; TRACK_CF-NEXT: [[TMP2:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; TRACK_CF-NEXT: [[TMP3:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; TRACK_CF-NEXT: [[TMP4:%.*]] = select i1 [[C]], i8 [[TMP2]], i8 [[TMP1]] +; TRACK_CF-NEXT: [[TMP5:%.*]] = or i8 [[TMP3]], [[TMP4]] +; TRACK_CF-NEXT: [[A:%.*]] = select i1 [[C]], i8 [[T]], i8 [[F]] +; TRACK_CF-NEXT: store i8 [[TMP5]], ptr @__dfsan_retval_tls, align 2 +; TRACK_CF-NEXT: ret i8 [[A]] +; +; NO_TRACK_CF-LABEL: define i8 @select8( +; NO_TRACK_CF-SAME: i1 [[C:%.*]], i8 [[T:%.*]], i8 [[F:%.*]]) { +; NO_TRACK_CF-NEXT: [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2 +; NO_TRACK_CF-NEXT: [[TMP2:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; NO_TRACK_CF-NEXT: [[TMP3:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; NO_TRACK_CF-NEXT: [[TMP4:%.*]] = select i1 [[C]], i8 [[TMP2]], i8 [[TMP1]] +; NO_TRACK_CF-NEXT: [[A:%.*]] = select i1 [[C]], i8 [[T]], i8 [[F]] +; NO_TRACK_CF-NEXT: store i8 [[TMP4]], ptr @__dfsan_retval_tls, align 2 +; NO_TRACK_CF-NEXT: ret i8 [[A]] +; %a = select i1 %c, i8 %t, i8 %f ret i8 %a } define i8 @select8e(i1 %c, i8 %tf) { - ; TRACK_CF: @select8e.dfsan - ; TRACK_CF: %[[#R:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]] - ; TRACK_CF: %[[#R+1]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]] - ; TRACK_CF: %[[#RO:]] = or i8 %[[#R+1]], %[[#R]] - ; TRACK_CF: %a = select i1 %c, i8 %tf, i8 %tf - ; TRACK_CF: store i8 %[[#RO]], ptr @__dfsan_retval_tls, align [[ALIGN]] - ; TRACK_CF: ret i8 %a - - ; NO_TRACK_CF: @select8e.dfsan - ; NO_TRACK_CF: %[[#R:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]] - ; NO_TRACK_CF: %[[#R+1]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]] - ; NO_TRACK_CF: %a = select i1 %c, i8 %tf, i8 %tf - ; NO_TRACK_CF: store i8 %[[#R]], ptr @__dfsan_retval_tls, align [[ALIGN]] - ; NO_TRACK_CF: ret i8 %a - +; TRACK_CF-LABEL: define i8 @select8e( +; TRACK_CF-SAME: i1 [[C:%.*]], i8 [[TF:%.*]]) { +; TRACK_CF-NEXT: [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; TRACK_CF-NEXT: [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; TRACK_CF-NEXT: [[TMP3:%.*]] = or i8 [[TMP2]], [[TMP1]] +; TRACK_CF-NEXT: [[A:%.*]] = select i1 [[C]], i8 [[TF]], i8 [[TF]] +; TRACK_CF-NEXT: store i8 [[TMP3]], ptr @__dfsan_retval_tls, align 2 +; TRACK_CF-NEXT: ret i8 [[A]] +; +; NO_TRACK_CF-LABEL: define i8 @select8e( +; NO_TRACK_CF-SAME: i1 [[C:%.*]], i8 [[TF:%.*]]) { +; NO_TRACK_CF-NEXT: [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; NO_TRACK_CF-NEXT: [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; NO_TRACK_CF-NEXT: [[A:%.*]] = select i1 [[C]], i8 [[TF]], i8 [[TF]] +; NO_TRACK_CF-NEXT: store i8 [[TMP1]], ptr @__dfsan_retval_tls, align 2 +; NO_TRACK_CF-NEXT: ret i8 [[A]] +; %a = select i1 %c, i8 %tf, i8 %tf ret i8 %a } define <4 x i8> @select8v(<4 x i1> %c, <4 x i8> %t, <4 x i8> %f) { - ; TRACK_CF: @select8v.dfsan - ; TRACK_CF: %[[#R:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN:2]] - ; TRACK_CF: %[[#R+1]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]] - ; TRACK_CF: %[[#R+2]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]] - ; TRACK_CF: %[[#R+3]] = or i8 %[[#R+1]], %[[#R]] - ; TRACK_CF: %[[#RO:]] = or i8 %[[#R+2]], %[[#R+3]] - ; TRACK_CF: %a = select <4 x i1> %c, <4 x i8> %t, <4 x i8> %f - ; TRACK_CF: store i8 %[[#RO]], ptr @__dfsan_retval_tls, align [[ALIGN]] - ; TRACK_CF: ret <4 x i8> %a - - ; NO_TRACK_CF: @select8v.dfsan - ; NO_TRACK_CF: %[[#R:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN:2]] - ; NO_TRACK_CF: %[[#R+1]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]] - ; NO_TRACK_CF: %[[#R+2]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]] - ; NO_TRACK_CF: %[[#RO:]] = or i8 %[[#R+1]], %[[#R]] - ; NO_TRACK_CF: %a = select <4 x i1> %c, <4 x i8> %t, <4 x i8> %f - ; NO_TRACK_CF: store i8 %[[#RO]], ptr @__dfsan_retval_tls, align [[ALIGN]] - ; NO_TRACK_CF: ret <4 x i8> %a - +; TRACK_CF-LABEL: define <4 x i8> @select8v( +; TRACK_CF-SAME: <4 x i1> [[C:%.*]], <4 x i8> [[T:%.*]], <4 x i8> [[F:%.*]]) { +; TRACK_CF-NEXT: [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2 +; TRACK_CF-NEXT: [[TMP2:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; TRACK_CF-NEXT: [[TMP3:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; TRACK_CF-NEXT: [[TMP4:%.*]] = or i8 [[TMP2]], [[TMP1]] +; TRACK_CF-NEXT: [[TMP5:%.*]] = or i8 [[TMP3]], [[TMP4]] +; TRACK_CF-NEXT: [[A:%.*]] = select <4 x i1> [[C]], <4 x i8> [[T]], <4 x i8> [[F]] +; TRACK_CF-NEXT: store i8 [[TMP5]], ptr @__dfsan_retval_tls, align 2 +; TRACK_CF-NEXT: ret <4 x i8> [[A]] +; +; NO_TRACK_CF-LABEL: define <4 x i8> @select8v( +; NO_TRACK_CF-SAME: <4 x i1> [[C:%.*]], <4 x i8> [[T:%.*]], <4 x i8> [[F:%.*]]) { +; NO_TRACK_CF-NEXT: [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2 +; NO_TRACK_CF-NEXT: [[TMP2:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; NO_TRACK_CF-NEXT: [[TMP3:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; NO_TRACK_CF-NEXT: [[TMP4:%.*]] = or i8 [[TMP2]], [[TMP1]] +; NO_TRACK_CF-NEXT: [[A:%.*]] = select <4 x i1> [[C]], <4 x i8> [[T]], <4 x i8> [[F]] +; NO_TRACK_CF-NEXT: store i8 [[TMP4]], ptr @__dfsan_retval_tls, align 2 +; NO_TRACK_CF-NEXT: ret <4 x i8> [[A]] +; %a = select <4 x i1> %c, <4 x i8> %t, <4 x i8> %f ret <4 x i8> %a } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/store.ll b/llvm/test/Instrumentation/DataFlowSanitizer/store.ll index bc2a70e..1c8ab65 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/store.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/store.ll @@ -16,7 +16,7 @@ define void @store0({} %v, ptr %p) { define void @store8(i8 %v, ptr %p) { ; CHECK-LABEL: @store8.dfsan ; NO_COMBINE_PTR_LABEL: load i8, ptr @__dfsan_arg_tls - ; COMBINE_PTR_LABEL: load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 + ; COMBINE_PTR_LABEL: load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 ; COMBINE_PTR_LABEL: load i8, ptr @__dfsan_arg_tls ; COMBINE_PTR_LABEL: or i8 @@ -35,7 +35,7 @@ define void @store8(i8 %v, ptr %p) { define void @store16(i16 %v, ptr %p) { ; CHECK-LABEL: @store16.dfsan ; NO_COMBINE_PTR_LABEL: load i8, ptr @__dfsan_arg_tls - ; COMBINE_PTR_LABEL: load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 + ; COMBINE_PTR_LABEL: load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 ; COMBINE_PTR_LABEL: load i8, ptr @__dfsan_arg_tls ; COMBINE_PTR_LABEL: or i8 ; CHECK: ptrtoint ptr {{.*}} i64 @@ -55,7 +55,7 @@ define void @store16(i16 %v, ptr %p) { define void @store32(i32 %v, ptr %p) { ; CHECK-LABEL: @store32.dfsan ; NO_COMBINE_PTR_LABEL: load i8, ptr @__dfsan_arg_tls - ; COMBINE_PTR_LABEL: load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 + ; COMBINE_PTR_LABEL: load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 ; COMBINE_PTR_LABEL: load i8, ptr @__dfsan_arg_tls ; COMBINE_PTR_LABEL: or i8 ; CHECK: ptrtoint ptr {{.*}} i64 @@ -79,7 +79,7 @@ define void @store32(i32 %v, ptr %p) { define void @store64(i64 %v, ptr %p) { ; CHECK-LABEL: @store64.dfsan ; NO_COMBINE_PTR_LABEL: load i8, ptr @__dfsan_arg_tls - ; COMBINE_PTR_LABEL: load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2 + ; COMBINE_PTR_LABEL: load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 ; COMBINE_PTR_LABEL: load i8, ptr @__dfsan_arg_tls ; COMBINE_PTR_LABEL: or i8 ; CHECK: ptrtoint ptr {{.*}} i64 diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/struct.ll b/llvm/test/Instrumentation/DataFlowSanitizer/struct.ll index 8069d28..9b4a350 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/struct.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/struct.ll @@ -56,15 +56,15 @@ define {i1, i32} @load_global_struct() { define {i1, i32} @select_struct(i1 %c, {i1, i32} %a, {i1, i32} %b) { ; NO_SELECT_CONTROL: @select_struct.dfsan - ; NO_SELECT_CONTROL: [[B:%.*]] = load { i8, i8 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN:2]] - ; NO_SELECT_CONTROL: [[A:%.*]] = load { i8, i8 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]] + ; NO_SELECT_CONTROL: [[B:%.*]] = load { i8, i8 }, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align [[ALIGN:2]] + ; NO_SELECT_CONTROL: [[A:%.*]] = load { i8, i8 }, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]] ; NO_SELECT_CONTROL: [[C:%.*]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]] ; NO_SELECT_CONTROL: [[S:%.*]] = select i1 %c, { i8, i8 } [[A]], { i8, i8 } [[B]] ; NO_SELECT_CONTROL: store { i8, i8 } [[S]], ptr @__dfsan_retval_tls, align [[ALIGN]] ; FAST: @select_struct.dfsan - ; FAST: %[[#R:]] = load { i8, i8 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN:2]] - ; FAST: %[[#R+1]] = load { i8, i8 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]] + ; FAST: %[[#R:]] = load { i8, i8 }, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align [[ALIGN:2]] + ; FAST: %[[#R+1]] = load { i8, i8 }, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]] ; FAST: %[[#R+2]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]] ; FAST: %[[#R+3]] = select i1 %c, { i8, i8 } %[[#R+1]], { i8, i8 } %[[#R]] ; FAST: %[[#R+4]] = extractvalue { i8, i8 } %[[#R+3]], 0 @@ -81,7 +81,7 @@ define {i1, i32} @select_struct(i1 %c, {i1, i32} %a, {i1, i32} %b) { define { i32, i32 } @asm_struct(i32 %0, i32 %1) { ; FAST: @asm_struct.dfsan - ; FAST: [[E1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]] + ; FAST: [[E1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]] ; FAST: [[E0:%.*]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]] ; FAST: [[E01:%.*]] = or i8 [[E0]], [[E1]] ; FAST: [[S0:%.*]] = insertvalue { i8, i8 } undef, i8 [[E01]], 0 @@ -111,7 +111,7 @@ define i1 @extract_struct({i1, i5} %s) { define {i1, i5} @insert_struct({i1, i5} %s, i5 %e1) { ; FAST: @insert_struct.dfsan - ; FAST: [[EM:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]] + ; FAST: [[EM:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]] ; FAST: [[SM:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN]] ; FAST: [[SM1:%.*]] = insertvalue { i8, i8 } [[SM]], i8 [[EM]], 1 ; FAST: store { i8, i8 } [[SM1]], ptr @__dfsan_retval_tls, align [[ALIGN]] @@ -138,7 +138,7 @@ define {i1, i1} @load_struct(ptr %p) { define void @store_struct(ptr %p, {i1, i1} %s) { ; FAST: @store_struct.dfsan - ; FAST: [[S:%.*]] = load { i8, i8 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]] + ; FAST: [[S:%.*]] = load { i8, i8 }, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]] ; FAST: [[E0:%.*]] = extractvalue { i8, i8 } [[S]], 0 ; FAST: [[E1:%.*]] = extractvalue { i8, i8 } [[S]], 1 ; FAST: [[E:%.*]] = or i8 [[E0]], [[E1]] @@ -153,7 +153,7 @@ define void @store_struct(ptr %p, {i1, i1} %s) { ; COMBINE_STORE_PTR: @store_struct.dfsan ; COMBINE_STORE_PTR: [[PL:%.*]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN:2]] - ; COMBINE_STORE_PTR: [[SL:%.*]] = load { i8, i8 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]] + ; COMBINE_STORE_PTR: [[SL:%.*]] = load { i8, i8 }, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]] ; COMBINE_STORE_PTR: [[SL0:%.*]] = extractvalue { i8, i8 } [[SL]], 0 ; COMBINE_STORE_PTR: [[SL1:%.*]] = extractvalue { i8, i8 } [[SL]], 1 ; COMBINE_STORE_PTR: [[SL01:%.*]] = or i8 [[SL0]], [[SL1]] @@ -215,7 +215,7 @@ define i1 @extract_struct_of_aggregate31(%StructOfAggr %s) { define %StructOfAggr @insert_struct_of_aggregate11(%StructOfAggr %s, i2 %e11) { ; FAST: @insert_struct_of_aggregate11.dfsan - ; FAST: [[E11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 8) to ptr), align [[ALIGN:2]] + ; FAST: [[E11:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 8), align [[ALIGN:2]] ; FAST: [[S:%.*]] = load { i8, [4 x i8], i8, { i8, i8 } }, ptr @__dfsan_arg_tls, align [[ALIGN]] ; FAST: [[S1:%.*]] = insertvalue { i8, [4 x i8], i8, { i8, i8 } } [[S]], i8 [[E11]], 1, 1 ; FAST: store { i8, [4 x i8], i8, { i8, i8 } } [[S1]], ptr @__dfsan_retval_tls, align [[ALIGN]] @@ -239,12 +239,12 @@ declare %StructOfAggr @fun_with_many_aggr_args(<2 x i7> %v, [2 x i5] %a, {i3, i3 define %StructOfAggr @call_many_aggr_args(<2 x i7> %v, [2 x i5] %a, {i3, i3} %s) { ; FAST: @call_many_aggr_args.dfsan - ; FAST: [[S:%.*]] = load { i8, i8 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN:2]] - ; FAST: [[A:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]] + ; FAST: [[S:%.*]] = load { i8, i8 }, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align [[ALIGN:2]] + ; FAST: [[A:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]] ; FAST: [[V:%.*]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]] ; FAST: store i8 [[V]], ptr @__dfsan_arg_tls, align [[ALIGN]] - ; FAST: store [2 x i8] [[A]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]] - ; FAST: store { i8, i8 } [[S]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN]] + ; FAST: store [2 x i8] [[A]], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]] + ; FAST: store { i8, i8 } [[S]], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align [[ALIGN]] ; FAST: %_dfsret = load { i8, [4 x i8], i8, { i8, i8 } }, ptr @__dfsan_retval_tls, align [[ALIGN]] ; FAST: store { i8, [4 x i8], i8, { i8, i8 } } %_dfsret, ptr @__dfsan_retval_tls, align [[ALIGN]] diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/vector.ll b/llvm/test/Instrumentation/DataFlowSanitizer/vector.ll index 64052d6..0580c18 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/vector.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/vector.ll @@ -1,19 +1,43 @@ -; RUN: opt < %s -passes=dfsan -S | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=dfsan -dfsan-add-global-name-suffix=0 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" define <4 x i4> @pass_vector(<4 x i4> %v) { - ; CHECK-LABEL: @pass_vector.dfsan - ; CHECK-NEXT: %[[#REG:]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN:2]] - ; CHECK-NEXT: store i8 %[[#REG]], ptr @__dfsan_retval_tls, align [[ALIGN]] - ; CHECK-NEXT: ret <4 x i4> %v +; CHECK-LABEL: define <4 x i4> @pass_vector( +; CHECK-SAME: <4 x i4> [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: store i8 [[TMP1]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: ret <4 x i4> [[V]] +; ret <4 x i4> %v } define void @load_update_store_vector(ptr %p) { - ; CHECK-LABEL: @load_update_store_vector.dfsan - ; CHECK: {{.*}} = load i8, ptr @__dfsan_arg_tls, align 2 - +; CHECK-LABEL: define void @load_update_store_vector( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = or i8 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i8 [[TMP8]], [[TMP1]] +; CHECK-NEXT: [[V:%.*]] = load <4 x i4>, ptr [[P]], align 2 +; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i4> [[V]], i32 2 +; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i4> [[V]], i4 [[E2]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 87960930222080 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i32 0 +; CHECK-NEXT: store i8 [[TMP9]], ptr [[TMP13]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP12]], i32 1 +; CHECK-NEXT: store i8 [[TMP9]], ptr [[TMP14]], align 1 +; CHECK-NEXT: store <4 x i4> [[V1]], ptr [[P]], align 2 +; CHECK-NEXT: ret void +; %v = load <4 x i4>, ptr %p %e2 = extractelement <4 x i4> %v, i32 2 %v1 = insertelement <4 x i4> %v, i4 %e2, i32 0 @@ -22,36 +46,37 @@ define void @load_update_store_vector(ptr %p) { } define <4 x i1> @icmp_vector(<4 x i8> %a, <4 x i8> %b) { - ; CHECK-LABEL: @icmp_vector.dfsan - ; CHECK-NEXT: %[[B:.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]] - ; CHECK-NEXT: %[[A:.*]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]] - ; CHECK: %[[L:.*]] = or i8 %[[A]], %[[B]] - - ; CHECK: %r = icmp eq <4 x i8> %a, %b - ; CHECK: store i8 %[[L]], ptr @__dfsan_retval_tls, align [[ALIGN]] - ; CHECK: ret <4 x i1> %r - +; CHECK-LABEL: define <4 x i1> @icmp_vector( +; CHECK-SAME: <4 x i8> [[A:%.*]], <4 x i8> [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[A]], [[B]] +; CHECK-NEXT: store i8 [[TMP3]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: ret <4 x i1> [[R]] +; %r = icmp eq <4 x i8> %a, %b ret <4 x i1> %r } define <2 x i32> @const_vector() { - ; CHECK-LABEL: @const_vector.dfsan - ; CHECK-NEXT: store i8 0, ptr @__dfsan_retval_tls, align 2 - ; CHECK-NEXT: ret <2 x i32> <i32 42, i32 11> - +; CHECK-LABEL: define <2 x i32> @const_vector() { +; CHECK-NEXT: store i8 0, ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: ret <2 x i32> <i32 42, i32 11> +; ret <2 x i32> < i32 42, i32 11 > } define <4 x i4> @call_vector(<4 x i4> %v) { - ; CHECK-LABEL: @call_vector.dfsan - ; CHECK-NEXT: %[[V:.*]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN:2]] - ; CHECK-NEXT: store i8 %[[V]], ptr @__dfsan_arg_tls, align [[ALIGN]] - ; CHECK-NEXT: %r = call <4 x i4> @pass_vector.dfsan(<4 x i4> %v) - ; CHECK-NEXT: %_dfsret = load i8, ptr @__dfsan_retval_tls, align [[ALIGN]] - ; CHECK-NEXT: store i8 %_dfsret, ptr @__dfsan_retval_tls, align [[ALIGN]] - ; CHECK-NEXT: ret <4 x i4> %r - +; CHECK-LABEL: define <4 x i4> @call_vector( +; CHECK-SAME: <4 x i4> [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: store i8 [[TMP1]], ptr @__dfsan_arg_tls, align 2 +; CHECK-NEXT: [[R:%.*]] = call <4 x i4> @pass_vector(<4 x i4> [[V]]) +; CHECK-NEXT: [[_DFSRET:%.*]] = load i8, ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: store i8 [[_DFSRET]], ptr @__dfsan_retval_tls, align 2 +; CHECK-NEXT: ret <4 x i4> [[R]] +; %r = call <4 x i4> @pass_vector(<4 x i4> %v) ret <4 x i4> %r } diff --git a/llvm/test/TableGen/RuntimeLibcallEmitter-calling-conv.td b/llvm/test/TableGen/RuntimeLibcallEmitter-calling-conv.td index c224cd6..7ec70b7 100644 --- a/llvm/test/TableGen/RuntimeLibcallEmitter-calling-conv.td +++ b/llvm/test/TableGen/RuntimeLibcallEmitter-calling-conv.td @@ -48,47 +48,39 @@ def MSP430LibraryWithCondCC : SystemRuntimeLibrary<isMSP430, // CHECK-NEXT: Entry = DefaultCC; // CHECK-NEXT: } // CHECK-EMPTY: -// CHECK-NEXT: setLibcallsImpl({ -// CHECK-NEXT: {RTLIB::MALLOC, RTLIB::impl_malloc}, // malloc -// CHECK-NEXT: }); +// CHECK-NEXT: setLibcallImpl(RTLIB::MALLOC, RTLIB::impl_malloc); // malloc // CHECK-EMPTY: -// CHECK-NEXT: setLibcallsImpl({ -// CHECK-NEXT: {RTLIB::SDIVREM_I8, RTLIB::impl___divmodqi4}, // __divmodqi4 -// CHECK-NEXT: {RTLIB::UDIVREM_I16, RTLIB::impl___udivmodhi4}, // __udivmodhi4 -// CHECK-NEXT: }, CallingConv::AVR_BUILTIN); +// CHECK-NEXT: setLibcallImpl(RTLIB::SDIVREM_I8, RTLIB::impl___divmodqi4); // __divmodqi4 +// CHECK-NEXT: setLibcallImplCallingConv(RTLIB::impl___divmodqi4, CallingConv::AVR_BUILTIN); +// CHECK-NEXT: setLibcallImpl(RTLIB::UDIVREM_I16, RTLIB::impl___udivmodhi4); // __udivmodhi4 +// CHECK-NEXT: setLibcallImplCallingConv(RTLIB::impl___udivmodhi4, CallingConv::AVR_BUILTIN); // CHECK-EMPTY: // CHECK-NEXT: return; // CHECK-NEXT: } // CHECK-EMPTY: // CHECK-NEXT: if (TT.getArch() == Triple::avr) { -// CHECK-NEXT: setLibcallsImpl({ -// CHECK-NEXT: {RTLIB::MALLOC, RTLIB::impl_malloc}, // malloc -// CHECK-NEXT: }); +// CHECK-NEXT: setLibcallImpl(RTLIB::MALLOC, RTLIB::impl_malloc); // malloc // CHECK-EMPTY: -// CHECK-NEXT: setLibcallsImpl({ -// CHECK-NEXT: {RTLIB::SDIVREM_I8, RTLIB::impl___divmodqi4}, // __divmodqi4 -// CHECK-NEXT: {RTLIB::UDIVREM_I16, RTLIB::impl___udivmodhi4}, // __udivmodhi4 -// CHECK-NEXT: }, CallingConv::AVR_BUILTIN); +// CHECK-NEXT: setLibcallImpl(RTLIB::SDIVREM_I8, RTLIB::impl___divmodqi4); // __divmodqi4 +// CHECK-NEXT: setLibcallImplCallingConv(RTLIB::impl___divmodqi4, CallingConv::AVR_BUILTIN); +// CHECK-NEXT: setLibcallImpl(RTLIB::UDIVREM_I16, RTLIB::impl___udivmodhi4); // __udivmodhi4 +// CHECK-NEXT: setLibcallImplCallingConv(RTLIB::impl___udivmodhi4, CallingConv::AVR_BUILTIN); // CHECK-EMPTY: // CHECK-NEXT: return; // CHECK-NEXT: } // CHECK-EMPTY: // CHECK-NEXT: if (TT.getArch() == Triple::msp430) { -// CHECK-NEXT: setLibcallsImpl({ -// CHECK-NEXT: {RTLIB::MALLOC, RTLIB::impl_malloc}, // malloc -// CHECK-NEXT: }); +// CHECK-NEXT: setLibcallImpl(RTLIB::MALLOC, RTLIB::impl_malloc); // malloc // CHECK-EMPTY: // CHECK-NEXT: if ( isFoo() ) { -// CHECK-NEXT: setLibcallsImpl({ -// CHECK-NEXT: {RTLIB::SDIVREM_I8, RTLIB::impl___divmodqi4}, // __divmodqi4 -// CHECK-NEXT: }, CallingConv::AVR_BUILTIN); +// CHECK-NEXT: setLibcallImpl(RTLIB::SDIVREM_I8, RTLIB::impl___divmodqi4); // __divmodqi4 +// CHECK-NEXT: setLibcallImplCallingConv(RTLIB::impl___divmodqi4, CallingConv::AVR_BUILTIN); // CHECK-EMPTY: // CHECK-NEXT: } // CHECK-EMPTY: // CHECK-NEXT: if ( isBar() ) { -// CHECK-NEXT: setLibcallsImpl({ -// CHECK-NEXT: {RTLIB::UDIVREM_I16, RTLIB::impl___udivmodhi4}, // __udivmodhi4 -// CHECK-NEXT: }, CallingConv::MSP430_BUILTIN); +// CHECK-NEXT: setLibcallImpl(RTLIB::UDIVREM_I16, RTLIB::impl___udivmodhi4); // __udivmodhi4 +// CHECK-NEXT: setLibcallImplCallingConv(RTLIB::impl___udivmodhi4, CallingConv::MSP430_BUILTIN); // CHECK-EMPTY: // CHECK-NEXT: } // CHECK-EMPTY: diff --git a/llvm/test/TableGen/RuntimeLibcallEmitter-conflict-warning.td b/llvm/test/TableGen/RuntimeLibcallEmitter-conflict-warning.td index 8169f56..112c33e 100644 --- a/llvm/test/TableGen/RuntimeLibcallEmitter-conflict-warning.td +++ b/llvm/test/TableGen/RuntimeLibcallEmitter-conflict-warning.td @@ -25,9 +25,7 @@ def dup1 : RuntimeLibcallImpl<ANOTHER_DUP>; // func_a and func_b both provide SOME_FUNC. // CHECK: if (isTargetArchA()) { -// CHECK-NEXT: setLibcallsImpl({ -// CHECK-NEXT: {RTLIB::SOME_FUNC, RTLIB::impl_func_b}, // func_b -// CHECK-NEXT: }); +// CHECK-NEXT: setLibcallImpl(RTLIB::SOME_FUNC, RTLIB::impl_func_b); // func_b // ERR: :[[@LINE+1]]:5: warning: conflicting implementations for libcall SOME_FUNC: func_b, func_a def TheSystemLibraryA : SystemRuntimeLibrary<isTargetArchA, @@ -35,10 +33,8 @@ def TheSystemLibraryA : SystemRuntimeLibrary<isTargetArchA, >; // CHECK: if (isTargetArchB()) { -// CHECK-NEXT: setLibcallsImpl({ -// CHECK-NEXT: {RTLIB::OTHER_FUNC, RTLIB::impl_other_func}, // other_func -// CHECK-NEXT: {RTLIB::SOME_FUNC, RTLIB::impl_func_a}, // func_a -// CHECK-NEXT: }); +// CHECK-NEXT: setLibcallImpl(RTLIB::OTHER_FUNC, RTLIB::impl_other_func); // other_func +// CHECK-NEXT: setLibcallImpl(RTLIB::SOME_FUNC, RTLIB::impl_func_a); // func_a // ERR: :[[@LINE+1]]:5: warning: conflicting implementations for libcall SOME_FUNC: func_a, func_b def TheSystemLibraryB : SystemRuntimeLibrary<isTargetArchB, @@ -46,11 +42,9 @@ def TheSystemLibraryB : SystemRuntimeLibrary<isTargetArchB, >; // CHECK: if (isTargetArchC()) { -// CHECK-NEXT: setLibcallsImpl({ -// CHECK-NEXT: {RTLIB::ANOTHER_DUP, RTLIB::impl_dup1}, // dup1 -// CHECK-NEXT: {RTLIB::OTHER_FUNC, RTLIB::impl_other_func}, // other_func -// CHECK-NEXT: {RTLIB::SOME_FUNC, RTLIB::impl_func_a}, // func_a -// CHECK-NEXT: }); +// CHECK-NEXT: setLibcallImpl(RTLIB::ANOTHER_DUP, RTLIB::impl_dup1); // dup1 +// CHECK-NEXT: setLibcallImpl(RTLIB::OTHER_FUNC, RTLIB::impl_other_func); // other_func +// CHECK-NEXT: setLibcallImpl(RTLIB::SOME_FUNC, RTLIB::impl_func_a); // func_a // ERR: :[[@LINE+3]]:5: warning: conflicting implementations for libcall ANOTHER_DUP: dup1, dup0 // ERR: :[[@LINE+2]]:5: warning: conflicting implementations for libcall SOME_FUNC: func_a, func_b diff --git a/llvm/test/TableGen/RuntimeLibcallEmitter.td b/llvm/test/TableGen/RuntimeLibcallEmitter.td index 78705e2..f4577f8 100644 --- a/llvm/test/TableGen/RuntimeLibcallEmitter.td +++ b/llvm/test/TableGen/RuntimeLibcallEmitter.td @@ -190,40 +190,20 @@ def BlahLibrary : SystemRuntimeLibrary<isBlahArch, (add calloc, LibraryWithCondi // CHECK-NEXT: } // CHECK: void llvm::RTLIB::RuntimeLibcallsInfo::setTargetRuntimeLibcallSets(const llvm::Triple &TT, ExceptionHandling ExceptionModel, FloatABI::ABIType FloatABI, EABI EABIVersion, StringRef ABIName) { -// CHECK-NEXT: struct LibcallImplPair { -// CHECK-NEXT: RTLIB::Libcall Func; -// CHECK-NEXT: RTLIB::LibcallImpl Impl; -// CHECK-NEXT: }; -// CHECK-NEXT: auto setLibcallsImpl = [this]( -// CHECK-NEXT: ArrayRef<LibcallImplPair> Libcalls, -// CHECK-NEXT: std::optional<llvm::CallingConv::ID> CC = {}) -// CHECK-NEXT: { -// CHECK-NEXT: for (const auto [Func, Impl] : Libcalls) { -// CHECK-NEXT: setLibcallImpl(Func, Impl); -// CHECK-NEXT: if (CC) -// CHECK-NEXT: setLibcallImplCallingConv(Impl, *CC); -// CHECK-NEXT: } -// CHECK-NEXT: }; // CHECK-EMPTY: // CHECK-NEXT: if (TT.getArch() == Triple::blah) { -// CHECK-NEXT: setLibcallsImpl({ -// CHECK-NEXT: {RTLIB::BZERO, RTLIB::impl_bzero}, // bzero -// CHECK-NEXT: {RTLIB::CALLOC, RTLIB::impl_calloc}, // calloc -// CHECK-NEXT: {RTLIB::SQRT_F128, RTLIB::impl_sqrtl_f128}, // sqrtl -// CHECK-NEXT: }); +// CHECK-NEXT: setLibcallImpl(RTLIB::BZERO, RTLIB::impl_bzero); // bzero +// CHECK-NEXT: setLibcallImpl(RTLIB::CALLOC, RTLIB::impl_calloc); // calloc +// CHECK-NEXT: setLibcallImpl(RTLIB::SQRT_F128, RTLIB::impl_sqrtl_f128); // sqrtl // CHECK-EMPTY: // CHECK-NEXT: if (TT.hasCompilerRT()) { -// CHECK-NEXT: setLibcallsImpl({ -// CHECK-NEXT: {RTLIB::SHL_I32, RTLIB::impl___ashlsi3}, // __ashlsi3 -// CHECK-NEXT: {RTLIB::SRL_I64, RTLIB::impl___lshrdi3}, // __lshrdi3 -// CHECK-NEXT: }); +// CHECK-NEXT: setLibcallImpl(RTLIB::SHL_I32, RTLIB::impl___ashlsi3); // __ashlsi3 +// CHECK-NEXT: setLibcallImpl(RTLIB::SRL_I64, RTLIB::impl___lshrdi3); // __lshrdi3 // CHECK-EMPTY: // CHECK-NEXT: } // CHECK-EMPTY: // CHECK-NEXT: if (TT.getOS() == Triple::bar) { -// CHECK-NEXT: setLibcallsImpl({ -// CHECK-NEXT: {RTLIB::MEMSET, RTLIB::impl____memset}, // ___memset -// CHECK-NEXT: }); +// CHECK-NEXT: setLibcallImpl(RTLIB::MEMSET, RTLIB::impl____memset); // ___memset // CHECK-EMPTY: // CHECK-NEXT: } // CHECK-EMPTY: @@ -231,25 +211,19 @@ def BlahLibrary : SystemRuntimeLibrary<isBlahArch, (add calloc, LibraryWithCondi // CHECK-NEXT: } // CHECK-EMPTY: // CHECK-NEXT: if (TT.getArch() == Triple::buzz) { -// CHECK-NEXT: setLibcallsImpl({ -// CHECK-NEXT: {RTLIB::SHL_I32, RTLIB::impl___ashlsi3}, // __ashlsi3 -// CHECK-NEXT: {RTLIB::SQRT_F80, RTLIB::impl_sqrtl_f80}, // sqrtl -// CHECK-NEXT: {RTLIB::SRL_I64, RTLIB::impl___lshrdi3}, // __lshrdi3 -// CHECK-NEXT: }); +// CHECK-NEXT: setLibcallImpl(RTLIB::SHL_I32, RTLIB::impl___ashlsi3); // __ashlsi3 +// CHECK-NEXT: setLibcallImpl(RTLIB::SQRT_F80, RTLIB::impl_sqrtl_f80); // sqrtl +// CHECK-NEXT: setLibcallImpl(RTLIB::SRL_I64, RTLIB::impl___lshrdi3); // __lshrdi3 // CHECK-EMPTY: // CHECK-NEXT: return; // CHECK-NEXT: } // CHECK-EMPTY: // CHECK-NEXT: if (TT.getArch() == Triple::foo) { -// CHECK-NEXT: setLibcallsImpl({ -// CHECK-NEXT: {RTLIB::BZERO, RTLIB::impl_bzero}, // bzero -// CHECK-NEXT: {RTLIB::SQRT_F128, RTLIB::impl_sqrtl_f128}, // sqrtl -// CHECK-NEXT: }); +// CHECK-NEXT: setLibcallImpl(RTLIB::BZERO, RTLIB::impl_bzero); // bzero +// CHECK-NEXT: setLibcallImpl(RTLIB::SQRT_F128, RTLIB::impl_sqrtl_f128); // sqrtl // CHECK-EMPTY: // CHECK-NEXT: if (TT.getOS() == Triple::bar) { -// CHECK-NEXT: setLibcallsImpl({ -// CHECK-NEXT: {RTLIB::MEMSET, RTLIB::impl____memset}, // ___memset -// CHECK-NEXT: }); +// CHECK-NEXT: setLibcallImpl(RTLIB::MEMSET, RTLIB::impl____memset); // ___memset // CHECK-EMPTY: // CHECK-NEXT: } // CHECK-EMPTY: @@ -257,12 +231,10 @@ def BlahLibrary : SystemRuntimeLibrary<isBlahArch, (add calloc, LibraryWithCondi // CHECK-NEXT: } // CHECK-EMPTY: // CHECK-NEXT: if (TT.getArch() == Triple::simple) { -// CHECK-NEXT: setLibcallsImpl({ -// CHECK-NEXT: {RTLIB::CALLOC, RTLIB::impl_calloc}, // calloc -// CHECK-NEXT: {RTLIB::SHL_I32, RTLIB::impl___ashlsi3}, // __ashlsi3 -// CHECK-NEXT: {RTLIB::SQRT_F80, RTLIB::impl_sqrtl_f80}, // sqrtl -// CHECK-NEXT: {RTLIB::SRL_I64, RTLIB::impl___lshrdi3}, // __lshrdi3 -// CHECK-NEXT: }); +// CHECK-NEXT: setLibcallImpl(RTLIB::CALLOC, RTLIB::impl_calloc); // calloc +// CHECK-NEXT: setLibcallImpl(RTLIB::SHL_I32, RTLIB::impl___ashlsi3); // __ashlsi3 +// CHECK-NEXT: setLibcallImpl(RTLIB::SQRT_F80, RTLIB::impl_sqrtl_f80); // sqrtl +// CHECK-NEXT: setLibcallImpl(RTLIB::SRL_I64, RTLIB::impl___lshrdi3); // __lshrdi3 // CHECK-EMPTY: // CHECK-NEXT: return; // CHECK-NEXT: } diff --git a/llvm/test/Transforms/Inline/attributes.ll b/llvm/test/Transforms/Inline/attributes.ll index 42b1a3a..55ab430 100644 --- a/llvm/test/Transforms/Inline/attributes.ll +++ b/llvm/test/Transforms/Inline/attributes.ll @@ -26,6 +26,10 @@ define i32 @sanitize_memtag_callee(i32 %i) sanitize_memtag { ret i32 %i } +define i32 @sanitize_alloc_token_callee(i32 %i) sanitize_alloc_token { + ret i32 %i +} + define i32 @safestack_callee(i32 %i) safestack { ret i32 %i } @@ -58,6 +62,10 @@ define i32 @alwaysinline_sanitize_memtag_callee(i32 %i) alwaysinline sanitize_me ret i32 %i } +define i32 @alwaysinline_sanitize_alloc_token_callee(i32 %i) alwaysinline sanitize_alloc_token { + ret i32 %i +} + define i32 @alwaysinline_safestack_callee(i32 %i) alwaysinline safestack { ret i32 %i } @@ -184,6 +192,39 @@ define i32 @test_sanitize_memtag(i32 %arg) sanitize_memtag { ; CHECK-NEXT: ret i32 } +; ---------------------------------------------------------------------------- ; + +; Can inline sanitize_alloc_token functions into a noattr function. The +; attribute is *not* viral, otherwise may break code. +define i32 @test_no_sanitize_alloc_token(i32 %arg) { +; CHECK-LABEL: @test_no_sanitize_alloc_token( +; CHECK-SAME: ) { +; CHECK-NOT: call +; CHECK: ret i32 +entry: + %x1 = call i32 @noattr_callee(i32 %arg) + %x2 = call i32 @sanitize_alloc_token_callee(i32 %x1) + %x3 = call i32 @alwaysinline_callee(i32 %x2) + %x4 = call i32 @alwaysinline_sanitize_alloc_token_callee(i32 %x3) + ret i32 %x4 +} + +; Can inline noattr functions into a sanitize_alloc_token function. If +; inlinable noattr functions cannot be instrumented, they should be marked with +; explicit noinline. +define i32 @test_sanitize_alloc_token(i32 %arg) sanitize_alloc_token { +; CHECK-LABEL: @test_sanitize_alloc_token( +; CHECK-SAME: ) [[SANITIZE_ALLOC_TOKEN:.*]] { +; CHECK-NOT: call +; CHECK: ret i32 +entry: + %x1 = call i32 @noattr_callee(i32 %arg) + %x2 = call i32 @sanitize_alloc_token_callee(i32 %x1) + %x3 = call i32 @alwaysinline_callee(i32 %x2) + %x4 = call i32 @alwaysinline_sanitize_alloc_token_callee(i32 %x3) + ret i32 %x4 +} + define i32 @test_safestack(i32 %arg) safestack { %x1 = call i32 @noattr_callee(i32 %arg) %x2 = call i32 @safestack_callee(i32 %x1) @@ -639,6 +680,7 @@ define i32 @loader_replaceable_caller() { ret i32 %1 } +; CHECK: attributes [[SANITIZE_ALLOC_TOKEN]] = { sanitize_alloc_token } ; CHECK: attributes [[SLH]] = { speculative_load_hardening } ; CHECK: attributes [[FPMAD_FALSE]] = { "less-precise-fpmad"="false" } ; CHECK: attributes [[FPMAD_TRUE]] = { "less-precise-fpmad"="true" } diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll index 75420d4..bcea03a 100644 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll @@ -1182,31 +1182,13 @@ define void @deref_assumption_in_header_constant_trip_count_nofree_via_context(p ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD1]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 -; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP11]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = phi <2 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD1]], zeroinitializer +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[WIDE_LOAD1]], <2 x i32> [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll index f794620..cc3bda4 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll @@ -504,24 +504,35 @@ exit: define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_size_nofree_via_context(ptr noalias %p1, ptr noalias %p2) nosync { ; CHECK-LABEL: define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_size_nofree_via_context( ; CHECK-SAME: ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P1]], i64 4), "dereferenceable"(ptr [[P1]], i64 1024) ] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 4), "dereferenceable"(ptr [[P2]], i64 1024) ] -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[ARRAYIDX2]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX1]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[TMP1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] -; CHECK: [[LOOP_INC]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX1]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[MIDDLE_SPLIT]]: +; CHECK-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[LOOP_END:.*]] +; CHECK: [[VECTOR_EARLY_EXIT]]: +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[TMP7]] +; CHECK-NEXT: br label %[[LOOP_END]] ; CHECK: [[LOOP_END]]: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX1]], %[[LOOP]] ], [ -1, %[[LOOP_INC]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ [[TMP8]], %[[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: diff --git a/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll b/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll new file mode 100644 index 0000000..9bbe3eb --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" + +declare ptr @_Znwm(i64) + +define ptr @test_merge_alloc_token_same(i1 %b) { +; CHECK-LABEL: define ptr @test_merge_alloc_token_same( +; CHECK-SAME: i1 [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = call ptr @_Znwm(i64 4), !alloc_token [[META0:![0-9]+]] +; CHECK-NEXT: ret ptr [[CALL]] +; +entry: + br i1 %b, label %if.then, label %if.else + +if.then: + %call = call ptr @_Znwm(i64 4), !alloc_token !0 + br label %if.end + +if.else: + %call1 = call ptr @_Znwm(i64 4), !alloc_token !0 + br label %if.end + +if.end: + %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ] + ret ptr %x.0 +} + +define ptr @test_merge_alloc_token_different(i1 %b) { +; CHECK-LABEL: define ptr @test_merge_alloc_token_different( +; CHECK-SAME: i1 [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = call ptr @_Znwm(i64 4) +; CHECK-NEXT: ret ptr [[CALL]] +; +entry: + br i1 %b, label %if.then, label %if.else + +if.then: + %call = call ptr @_Znwm(i64 4), !alloc_token !0 + br label %if.end + +if.else: + %call1 = call ptr @_Znwm(i64 4), !alloc_token !1 + br label %if.end + +if.end: + %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ] + ret ptr %x.0 +} + +define ptr @test_merge_alloc_token_some1(i1 %b) { +; CHECK-LABEL: define ptr @test_merge_alloc_token_some1( +; CHECK-SAME: i1 [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = call ptr @_Znwm(i64 4) +; CHECK-NEXT: ret ptr [[CALL]] +; +entry: + br i1 %b, label %if.then, label %if.else + +if.then: + %call = call ptr @_Znwm(i64 4), !alloc_token !0 + br label %if.end + +if.else: + %call1 = call ptr @_Znwm(i64 4) + br label %if.end + +if.end: + %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ] + ret ptr %x.0 +} + +define ptr @test_merge_alloc_token_some2(i1 %b) { +; CHECK-LABEL: define ptr @test_merge_alloc_token_some2( +; CHECK-SAME: i1 [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = call ptr @_Znwm(i64 4) +; CHECK-NEXT: ret ptr [[CALL]] +; +entry: + br i1 %b, label %if.then, label %if.else + +if.then: + %call = call ptr @_Znwm(i64 4) + br label %if.end + +if.else: + %call1 = call ptr @_Znwm(i64 4), !alloc_token !0 + br label %if.end + +if.end: + %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ] + ret ptr %x.0 +} + +!0 = !{!"int"} +!1 = !{!"char[4]"} +;. +; CHECK: [[META0]] = !{!"int"} +;. diff --git a/llvm/test/tools/llvm-reduce/inline-call-sites-cost.ll b/llvm/test/tools/llvm-reduce/inline-call-sites-cost.ll new file mode 100644 index 0000000..fc25ca4 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/inline-call-sites-cost.ll @@ -0,0 +1,95 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=inline-call-sites -reduce-callsite-inline-threshold=3 --test FileCheck --test-arg --check-prefix=CHECK --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck -check-prefixes=RESULT,CHECK %s < %t + +declare void @extern_b() +declare void @extern_a() + +; RESULT: @gv_init = global ptr @no_inline_noncall_user +@gv_init = global ptr @no_inline_noncall_user + + +; CHECK-LABEL: define void @no_inline_noncall_user( +define void @no_inline_noncall_user() { + call void @extern_a() + call void @extern_a() + call void @extern_a() + call void @extern_a() + ret void +} + +; RESULT-LABEL: define void @noncall_user_call() { +; RESULT-NEXT: call void @no_inline_noncall_user() +; RESULT-NEXT: ret void +define void @noncall_user_call() { + call void @no_inline_noncall_user() + ret void +} + +; RESULT-LABEL: define void @big_callee_small_caller_callee() { +define void @big_callee_small_caller_callee() { + call void @extern_a() + call void @extern_a() + call void @extern_a() + call void @extern_a() + ret void +} + +; RESULT-LABEL: define void @big_callee_small_caller_caller() { +; RESULT-NEXT: call void @extern_b() +; RESULT-NEXT: call void @extern_a() +; RESULT-NEXT: call void @extern_a() +; RESULT-NEXT: call void @extern_a() +; RESULT-NEXT: call void @extern_a() +; RESULT-NEXT: ret void +define void @big_callee_small_caller_caller() { + call void @extern_b() + call void @big_callee_small_caller_callee() + ret void +} + +; RESULT-LABEL: define void @small_callee_big_caller_callee() { +; RESULT-NEXT: call void @extern_a() +; RESULT-NEXT: ret void +define void @small_callee_big_caller_callee() { + call void @extern_a() + ret void +} + +; RESULT-LABEL: define void @small_callee_big_caller_caller() { +; RESULT-NEXT: call void @extern_b() +; RESULT-NEXT: call void @extern_a() +; RESULT-NEXT: call void @extern_b() +; RESULT-NEXT: call void @extern_b() +; RESULT-NEXT: ret void +define void @small_callee_big_caller_caller() { + call void @extern_b() + call void @small_callee_big_caller_callee() + call void @extern_b() + call void @extern_b() + ret void +} + +; RESULT-LABEL: define void @big_callee_big_caller_callee() { +define void @big_callee_big_caller_callee() { + call void @extern_a() + call void @extern_a() + call void @extern_a() + call void @extern_a() + ret void +} + +; RESULT-LABEL: define void @big_callee_big_caller_caller() { +; RESULT-NEXT: call void @extern_b() +; RESULT-NEXT: call void @big_callee_big_caller_callee() +; RESULT-NEXT: call void @extern_b() +; RESULT-NEXT: call void @extern_b() +; RESULT-NEXT: call void @extern_b() +; RESULT-NEXT: ret void +define void @big_callee_big_caller_caller() { + call void @extern_b() + call void @big_callee_big_caller_callee() + call void @extern_b() + call void @extern_b() + call void @extern_b() + ret void +} diff --git a/llvm/test/tools/llvm-reduce/inline-call-sites.ll b/llvm/test/tools/llvm-reduce/inline-call-sites.ll new file mode 100644 index 0000000..34775d9 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/inline-call-sites.ll @@ -0,0 +1,765 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=inline-call-sites -reduce-callsite-inline-threshold=-1 --test FileCheck --test-arg --check-prefixes=CHECK,INTERESTING --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck -check-prefixes=RESULT,CHECK %s < %t + +; RESULT: @gv = global [2 x ptr] [ptr @only_gv_user, ptr @simple_callee] +@gv = global [2 x ptr] [ptr @only_gv_user, ptr @simple_callee] + +; RESULT: @indirectbr.L = internal unnamed_addr constant [3 x ptr] [ptr blockaddress(@callee_with_indirectbr, %L1), ptr blockaddress(@callee_with_indirectbr, %L2), ptr null], align 8 +@indirectbr.L = internal unnamed_addr constant [3 x ptr] [ptr blockaddress(@callee_with_indirectbr, %L1), ptr blockaddress(@callee_with_indirectbr, %L2), ptr null], align 8 + + +; CHECK-LABEL: define void @simple_callee( +; RESULT-NEXT: store i32 123, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @simple_callee(ptr %arg) { + store i32 123, ptr %arg + ret void +} + +; CHECK-LABEL: define void @simple_caller( +; RESULT-NEXT: store i32 123, ptr %outer.arg, align 4 +; RESULT-NEXT: ret void +define void @simple_caller(ptr %outer.arg) { + call void @simple_callee(ptr %outer.arg) + ret void +} + +; CHECK-LABEL: define void @multi_simple_caller( +; RESULT-NEXT: store i32 123, ptr %outer.arg, align 4 +; RESULT-NEXT: store i32 123, ptr %outer.arg, align 4 +; RESULT-NEXT: store i32 123, ptr null, align 4 +; RESULT-NEXT: ret void +define void @multi_simple_caller(ptr %outer.arg) { + call void @simple_callee(ptr %outer.arg) + call void @simple_callee(ptr %outer.arg) + call void @simple_callee(ptr null) + ret void +} + +; CHECK-LABEL: define void @only_gv_user( +; RESULT-NEXT: store i32 666, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @only_gv_user(ptr %arg) { + store i32 666, ptr %arg + ret void +} + +; CHECK-LABEL: define void @recursive( +; RESULT-NEXT: call void @recursive(ptr %arg) +; RESULT-NEXT: ret void +define void @recursive(ptr %arg) { + call void @recursive(ptr %arg) + ret void +} + +; CHECK-LABEL: define void @recursive_with_wrong_callsite_type( +; RESULT-NEXT: call void @recursive_with_wrong_callsite_type(ptr %arg, i32 2) +; RESULT-NEXT: ret void +define void @recursive_with_wrong_callsite_type(ptr %arg) { + call void @recursive_with_wrong_callsite_type(ptr %arg, i32 2) + ret void +} + +; CHECK-LABEL: define void @non_callee_use( +; RESULT-NEXT: store i32 567, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @non_callee_use(ptr %arg) { + store i32 567, ptr %arg + ret void +} + +declare void @extern_ptr_use(ptr) + +; CHECK-LABEL: define void @non_callee_user( +; RESULT-NEXT: call void @extern_ptr_use(ptr @non_callee_use) +; RESULT-NEXT: ret void +define void @non_callee_user() { + call void @extern_ptr_use(ptr @non_callee_use) + ret void +} + +; CHECK-LABEL: define void @non_call_inst_use( +define void @non_call_inst_use(ptr %arg) { + store i32 999, ptr %arg + ret void +} + +; CHECK-LABEL: define void @non_call_inst_user( +; RESULT-NEXT: store ptr @non_call_inst_use, ptr %arg, align 8 +; RESULT-NEXT: ret void +define void @non_call_inst_user(ptr %arg) { + store ptr @non_call_inst_use, ptr %arg + ret void +} + +; CHECK-LABEL: define i32 @used_wrong_call_type( +; RESULT-NEXT: store i32 123, ptr %arg, align 4 +; RESULT-NEXT: ret i32 8 +define i32 @used_wrong_call_type(ptr %arg) { + store i32 123, ptr %arg + ret i32 8 +} + +; Inlining doesn't support the UB cases +; CHECK-LABEL: define void @use_wrong_call_type( +; RESULT-NEXT: call void @used_wrong_call_type(ptr %outer.arg) +; RESULT-NEXT: ret void +define void @use_wrong_call_type(ptr %outer.arg) { + call void @used_wrong_call_type(ptr %outer.arg) + ret void +} + +; INTERESTING-LABEL: define void @incompatible_gc_callee( + +; RESULT-LABEL: define void @incompatible_gc_callee(ptr %arg) gc "gc0" { +; RESULT-NEXT: store i32 10000, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @incompatible_gc_callee(ptr %arg) gc "gc0" { + store i32 10000, ptr %arg + ret void +} + +; INTERESTING-LABEL: define void @incompatible_gc_caller( + +; RESULT-LABEL: define void @incompatible_gc_caller(ptr %outer.arg) gc "gc1" { +; RESULT-NEXT: call void @incompatible_gc_callee(ptr %outer.arg) +; RESULT-NEXT: ret void +define void @incompatible_gc_caller(ptr %outer.arg) gc "gc1" { + call void @incompatible_gc_callee(ptr %outer.arg) + ret void +} + +; INTERESTING-LABEL: define void @propagate_callee_gc( + +; RESULT-LABEL: define void @propagate_callee_gc(ptr %arg) gc "propagate-gc" { +; RESULT-NEXT: store i32 10000, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @propagate_callee_gc(ptr %arg) gc "propagate-gc" { + store i32 10000, ptr %arg + ret void +} + +; INTERESTING-LABEL: define void @propagate_caller_gc( + +; RESULT-LABEL: define void @propagate_caller_gc(ptr %arg) gc "propagate-gc" { +; RESULT-NEXT: store i32 10000, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @propagate_caller_gc(ptr %arg) { + call void @propagate_callee_gc(ptr %arg) + ret void +} + +declare i32 @__gxx_personality_v0(...) + +; INTERESTING-LABEL: define void @propagate_callee_personality( + +; RESULT-LABEL: define void @propagate_callee_personality(ptr %arg) personality ptr @__gxx_personality_v0 { +; RESULT-NEXT: store i32 2000, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @propagate_callee_personality(ptr %arg) personality ptr @__gxx_personality_v0 { + store i32 2000, ptr %arg + ret void +} + +; INTERESTING-LABEL: define void @propagate_caller_personality( + +; RESULT-LABEL: define void @propagate_caller_personality(ptr %arg) personality ptr @__gxx_personality_v0 { +; RESULT-NEXT: store i32 2000, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @propagate_caller_personality(ptr %arg) { + call void @propagate_callee_personality(ptr %arg) + ret void +} + +; CHECK-LABEL: define void @callee_with_indirectbr( +define void @callee_with_indirectbr() { +entry: + br label %L1 + +L1: ; preds = %entry, %L1 + %i = phi i32 [ 0, %entry ], [ %inc, %L1 ] + %inc = add i32 %i, 1 + %idxprom = zext i32 %i to i64 + %arrayidx = getelementptr inbounds [3 x ptr], ptr @indirectbr.L, i64 0, i64 %idxprom + %brtarget = load ptr, ptr %arrayidx, align 8 + indirectbr ptr %brtarget, [label %L1, label %L2] + +L2: ; preds = %L1 + ret void +} + +; CHECK-LABEL: define void @calls_func_with_indirectbr( + +; RESULT: L1.i: +; RESULT-NEXT: %i.i = phi i32 [ 0, %call ], [ %inc.i, %L1.i ] +; RESULT-NEXT: %inc.i = add i32 %i.i, 1 +; RESULT-NEXT: %idxprom.i = zext i32 %i.i to i64 +; RESULT-NEXT: %arrayidx.i = getelementptr inbounds [3 x ptr], ptr @indirectbr.L, i64 0, i64 %idxprom.i +; RESULT-NEXT: %brtarget.i = load ptr, ptr %arrayidx.i, align 8 +; RESULT-NEXT: indirectbr ptr %brtarget.i, [label %L1.i, label %callee_with_indirectbr.exit] + +define void @calls_func_with_indirectbr(i1 %arg0) { +entry: + br i1 %arg0, label %call, label %ret + +call: + call void @callee_with_indirectbr() + br label %ret + +ret: + ret void +} + + +; CHECK-LABEL: define ptr @callee_with_blockaddress_use( +; RESULT: L2: +; RESULT-NEXT: store ptr blockaddress(@callee_with_blockaddress_use, %L1), ptr %alloca, align 8 +; RESULT-NEXT: store ptr blockaddress(@callee_with_blockaddress_use, %L2), ptr %alloca, align 8 +; RESULT-NEXT: store ptr blockaddress(@callee_with_blockaddress_use, %L3), ptr %alloca, align 8 +; RESULT-NEXT: %cond1 = load volatile i1, ptr addrspace(1) null +; RESULT-NEXT: br i1 %cond1, label %L1, label %L3 +define ptr @callee_with_blockaddress_use() { +entry: + %alloca = alloca ptr + %cond0 = load volatile i1, ptr addrspace(1) null + br i1 %cond0, label %L1, label %L2 + +L1: + br label %L2 + +L2: + ; reference an earlier block + store ptr blockaddress(@callee_with_blockaddress_use, %L1), ptr %alloca + + ; reference the block itself from the block + store ptr blockaddress(@callee_with_blockaddress_use, %L2), ptr %alloca + + ; reference a later block + store ptr blockaddress(@callee_with_blockaddress_use, %L3), ptr %alloca + + %cond1 = load volatile i1, ptr addrspace(1) null + br i1 %cond1, label %L1, label %L3 + +L3: + %load = load ptr, ptr %alloca + ret ptr %load +} + +; FIXME: This is not correctly remapping the blockaddress use +; CHECK-LABEL: define void @calls_func_with_blockaddress_use( +; RESULT: entry: +; RESULT-NEXT: %alloca.i = alloca ptr, align 8 +; RESULT-NEXT: store i32 1000, ptr null, align 4 +; RESULT-NEXT: br i1 %arg0, label %call, label %ret + +; RESULT: call: +; RESULT-NEXT: store i32 2000, ptr null, align 4 +; RESULT-NEXT: call void @llvm.lifetime.start.p0(ptr %alloca.i) +; RESULT-NEXT: %cond0.i = load volatile i1, ptr addrspace(1) null, align 1 +; RESULT-NEXT: br i1 %cond0.i, label %L1.i, label %L2.i + +; RESULT: L1.i: ; preds = %L2.i, %call +; RESULT-NEXT: br label %L2.i + +; RESULT: L2.i: ; preds = %L1.i, %call +; RESULT-NEXT: store ptr blockaddress(@callee_with_blockaddress_use, %L1), ptr %alloca.i, align 8 +; RESULT-NEXT: store ptr blockaddress(@calls_func_with_blockaddress_use, %L2.i), ptr %alloca.i, align 8 +; RESULT-NEXT: store ptr blockaddress(@callee_with_blockaddress_use, %L3), ptr %alloca.i, align 8 +; RESULT-NEXT: %cond1.i = load volatile i1, ptr addrspace(1) null, align 1 +; RESULT-NEXT: br i1 %cond1.i, label %L1.i, label %callee_with_blockaddress_use.exit + +; RESULT: callee_with_blockaddress_use.exit: ; preds = %L2.i +; RESULT-NEXT: %load.i = load ptr, ptr %alloca.i, align 8 +; RESULT-NEXT: call void @llvm.lifetime.end.p0(ptr %alloca.i) +; RESULT-NEXT: store i32 3000, ptr null, align 4 +; RESULT-NEXT: br label %ret + +; RESULT: ret: ; preds = %callee_with_blockaddress_use.exit, %entry +; RESULT-NEXT: store i32 4000, ptr null, align 4 +; RESULT-NEXT: ret void +define void @calls_func_with_blockaddress_use(i1 %arg0) { +entry: + store i32 1000, ptr null + br i1 %arg0, label %call, label %ret + +call: + store i32 2000, ptr null + call ptr @callee_with_blockaddress_use() + store i32 3000, ptr null + br label %ret + +ret: + store i32 4000, ptr null + ret void +} + +; CHECK-LABEL: define void @callee_with_fallthrough_blockaddress_use( +; RESULT: L2: +; RESULT-NEXT: store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L1), ptr %alloca, align 8 +; RESULT-NEXT: store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L2), ptr %alloca, align 8 +; RESULT-NEXT: store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L3), ptr %alloca, align 8 +; RESULT-NEXT: br label %L3 +define void @callee_with_fallthrough_blockaddress_use() { +entry: + %alloca = alloca ptr + br label %L1 + +L1: + store i32 999, ptr null + br label %L2 + +L2: ; preds = %entry, %L1 + ; reference a block before this block + store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L1), ptr %alloca + + ; reference the block itself from the block + store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L2), ptr %alloca + + ; reference a block after this block + store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L3), ptr %alloca + br label %L3 + +L3: ; preds = %L1 + %load = load ptr, ptr %alloca + ret void +} + + +; CHECK-LABEL: define void @calls_func_with_fallthrough_blockaddress_use( +; RESULT: entry: +; RESULT-NEXT: %alloca.i = alloca ptr, align 8 +; RESULT-NEXT: store i32 1000, ptr null +; RESULT-NEXT: br i1 %arg0, label %call, label %ret + +; RESULT: call: +; RESULT-NEXT: store i32 2000, ptr null, align 4 +; RESULT-NEXT: call void @llvm.lifetime.start.p0(ptr %alloca.i) +; RESULT-NEXT: br label %L1.i + +; RESULT: L1.i: ; preds = %call +; RESULT-NEXT: store i32 999, ptr null, align 4 +; RESULT-NEXT: br label %L2.i + +; RESULT: L2.i: +; RESULT-NEXT: store ptr blockaddress(@calls_func_with_fallthrough_blockaddress_use, %L1.i), ptr %alloca.i, align 8 +; RESULT-NEXT: store ptr blockaddress(@calls_func_with_fallthrough_blockaddress_use, %L2.i), ptr %alloca.i, align 8 +; RESULT-NEXT: store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L3), ptr %alloca.i, align 8 +; RESULT-NEXT: br label %callee_with_fallthrough_blockaddress_use.exit + +; RESULT: callee_with_fallthrough_blockaddress_use.exit: ; preds = %L2.i +; RESULT-NEXT: %load.i = load ptr, ptr %alloca.i, align 8 +; RESULT-NEXT: call void @llvm.lifetime.end.p0(ptr %alloca.i) +; RESULT-NEXT: store i32 3000, ptr null, align 4 +; RESULT-NEXT: br label %ret + +; RESULT: ret: +; RESULT-NEXT: store i32 4000, ptr null, align 4 +; RESULT-NEXT: ret void +define void @calls_func_with_fallthrough_blockaddress_use(i1 %arg0) { +entry: + store i32 1000, ptr null + br i1 %arg0, label %call, label %ret + +call: + store i32 2000, ptr null + call void @callee_with_fallthrough_blockaddress_use() + store i32 3000, ptr null + br label %ret + +ret: + store i32 4000, ptr null + ret void +} + +declare i32 @extern_returns_twice() returns_twice + +; CHECK-LABEL: define i32 @callee_returns_twice( +; RESULT-NEXT: %call = call i32 @extern_returns_twice() +; RESULT-NEXT: %add = add nsw i32 1, %call +; RESULT-NEXT: ret i32 %add +define i32 @callee_returns_twice() { + %call = call i32 @extern_returns_twice() + %add = add nsw i32 1, %call + ret i32 %add +} + +; CHECK-LABEL: define i32 @caller_returns_twice_calls_callee_returns_twice( +; RESULT-NEXT: %call.i = call i32 @extern_returns_twice() +; RESULT-NEXT: %add.i = add nsw i32 1, %call.i +; RESULT-NEXT: %add = add nsw i32 1, %add.i +; RESULT-NEXT: ret i32 %add + define i32 @caller_returns_twice_calls_callee_returns_twice() returns_twice { + %call = call i32 @callee_returns_twice() + %add = add nsw i32 1, %call + ret i32 %add +} + +; Inliner usually blocks inlining of returns_twice functions into +; non-returns_twice functions +; CHECK-LABEL: define i32 @regular_caller_calls_callee_returns_twice() { +; RESULT-NEXT: %call.i = call i32 @extern_returns_twice() +; RESULT-NEXT: %add.i = add nsw i32 1, %call.i +; RESULT-NEXT: %add = add nsw i32 1, %add.i +; RESULT-NEXT: ret i32 %add +define i32 @regular_caller_calls_callee_returns_twice() { + %call = call i32 @callee_returns_twice() + %add = add nsw i32 1, %call + ret i32 %add +} + +; CHECK-LABEL: define void @caller_with_vastart( +; RESULT-NEXT: %ap = alloca ptr, align 4 +; RESULT-NEXT: %ap2 = alloca ptr, align 4 +; RESULT-NEXT: call void @llvm.va_start.p0(ptr nonnull %ap) +; RESULT-NEXT: call void @llvm.va_end.p0(ptr nonnull %ap) +; RESULT-NEXT: call void @llvm.va_start.p0(ptr nonnull %ap) +; RESULT-NEXT: call void @llvm.va_end.p0(ptr nonnull %ap) +; RESULT-NEXT: ret void +define void @caller_with_vastart(ptr noalias nocapture readnone %args, ...) { + %ap = alloca ptr, align 4 + %ap2 = alloca ptr, align 4 + call void @llvm.va_start.p0(ptr nonnull %ap) + call fastcc void @callee_with_vaend(ptr nonnull %ap) + call void @llvm.va_start.p0(ptr nonnull %ap) + call fastcc void @callee_with_vaend_alwaysinline(ptr nonnull %ap) + ret void +} + +; CHECK-LABEL: define fastcc void @callee_with_vaend( +; RESULT-NEXT: tail call void @llvm.va_end.p0(ptr %a) +; RESULT-NEXT: ret void +define fastcc void @callee_with_vaend(ptr %a) { + tail call void @llvm.va_end.p0(ptr %a) + ret void +} + +; CHECK-LABEL: define internal fastcc void @callee_with_vaend_alwaysinline( +; RESULT-NEXT: tail call void @llvm.va_end.p0(ptr %a) +; RESULT-NEXT: ret void +define internal fastcc void @callee_with_vaend_alwaysinline(ptr %a) alwaysinline { + tail call void @llvm.va_end.p0(ptr %a) + ret void +} + +; CHECK-LABEL: define i32 @callee_with_va_start( +define i32 @callee_with_va_start(ptr %a, ...) { + %vargs = alloca ptr, align 8 + tail call void @llvm.va_start.p0(ptr %a) + %va1 = va_arg ptr %vargs, i32 + call void @llvm.va_end(ptr %vargs) + ret i32 %va1 +} + +; CHECK-LABEL: define i32 @callee_vastart_caller( +; RESULT-NEXT: %vargs.i = alloca ptr, align 8 +; RESULT-NEXT: %ap = alloca ptr, align 4 +; RESULT-NEXT: %b = load i32, ptr null, align 4 +; RESULT-NEXT: call void @llvm.lifetime.start.p0(ptr %vargs.i) +; RESULT-NEXT: call void @llvm.va_start.p0(ptr nonnull %ap) +; RESULT-NEXT: %va1.i = va_arg ptr %vargs.i, i32 +; RESULT-NEXT: call void @llvm.va_end.p0(ptr %vargs.i) +; RESULT-NEXT: call void @llvm.lifetime.end.p0(ptr %vargs.i) +; RESULT-NEXT: ret i32 %va1.i +define i32 @callee_vastart_caller(ptr noalias nocapture readnone %args, ...) { + %ap = alloca ptr, align 4 + %b = load i32, ptr null + %result = call i32 (ptr, ...) @callee_with_va_start(ptr nonnull %ap, i32 %b) + ret i32 %result +} + +declare void @llvm.localescape(...) + +; CHECK-LABEL: define internal void @callee_uses_localrecover( +define internal void @callee_uses_localrecover(ptr %fp) { + %a.i8 = call ptr @llvm.localrecover(ptr @callee_uses_localescape, ptr %fp, i32 0) + store i32 42, ptr %a.i8 + ret void +} + +; CHECK-LABEL: define i32 @callee_uses_localescape( +; RESULT-NEXT: %a = alloca i32, align 4 +; RESULT-NEXT: call void (...) @llvm.localescape(ptr %a) +; RESULT-NEXT: %fp = call ptr @llvm.frameaddress.p0(i32 0) +; RESULT-NEXT: %a.i8.i = call ptr @llvm.localrecover(ptr @callee_uses_localescape, ptr %fp, i32 0) +; RESULT-NEXT: store i32 42, ptr %a.i8.i, align 4 +; RESULT-NEXT: %r = load i32, ptr %a, align 4 +; RESULT-NEXT: ret i32 %r +define i32 @callee_uses_localescape() alwaysinline { + %a = alloca i32 + call void (...) @llvm.localescape(ptr %a) + %fp = call ptr @llvm.frameaddress(i32 0) + tail call void @callee_uses_localrecover(ptr %fp) + %r = load i32, ptr %a + ret i32 %r +} + +; CHECK-LABEL: define i32 @callee_uses_localescape_caller( +; RESULT-NEXT: %a.i = alloca i32, align 4 +; RESULT-NEXT: call void @llvm.lifetime.start.p0(ptr %a.i) +; RESULT-NEXT: call void (...) @llvm.localescape(ptr %a.i) +; RESULT-NEXT: %fp.i = call ptr @llvm.frameaddress.p0(i32 0) +; RESULT-NEXT: %a.i8.i.i = call ptr @llvm.localrecover(ptr @callee_uses_localescape, ptr %fp.i, i32 0) +; RESULT-NEXT: store i32 42, ptr %a.i8.i.i, align 4 +; RESULT-NEXT: %r.i = load i32, ptr %a.i, align 4 +; RESULT-NEXT: call void @llvm.lifetime.end.p0(ptr %a.i) +; RESULT-NEXT: ret i32 %r.i +define i32 @callee_uses_localescape_caller() { + %r = tail call i32 @callee_uses_localescape() + ret i32 %r +} + +declare void @llvm.icall.branch.funnel(...) + +; CHECK-LABEL: define void @callee_uses_branch_funnel( +; RESULT-NEXT: musttail call void (...) @llvm.icall.branch.funnel(...) +; RESULT-NEXT: ret void +define void @callee_uses_branch_funnel(...) { + musttail call void (...) @llvm.icall.branch.funnel(...) + ret void +} + +; FIXME: This should fail the verifier after inlining +; CHECK-LABEL: define void @callee_branch_funnel_musttail_caller( +; RESULT-NEXT: call void (...) @llvm.icall.branch.funnel() +; RESULT-NEXT: ret void +define void @callee_branch_funnel_musttail_caller() { + call void (...) @callee_uses_branch_funnel() + ret void +} + +; Ignore noinline on the callee function +; CHECK-LABEL: define void @noinline_callee( +; RESULT-NEXT: store i32 123, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @noinline_callee(ptr %arg) { + store i32 123, ptr %arg + ret void +} + +; CHECK-LABEL: define void @calls_noinline_func( +; RESULT-NEXT: store i32 123, ptr %outer.arg, align 4 +; RESULT-NEXT: ret void +define void @calls_noinline_func(ptr %outer.arg) { + call void @noinline_callee(ptr %outer.arg) + ret void +} + +; Ignore noinline on the callsite +; CHECK-LABEL: define void @calls_noinline_callsite( +; RESULT-NEXT: store i32 123, ptr %outer.arg, align 4 +; RESULT-NEXT: ret void +define void @calls_noinline_callsite(ptr %outer.arg) { + call void @simple_callee(ptr %outer.arg) noinline + ret void +} + +; Ignore optnone +; CHECK-LABEL: define void @optnone_callee( +; RESULT-NEXT: store i32 5555, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @optnone_callee(ptr %arg) optnone noinline { + store i32 5555, ptr %arg + ret void +} + +; CHECK-LABEL: define void @calls_optnone_callee( +; RESULT-NEXT: store i32 5555, ptr %outer.arg, align 4 +; RESULT-NEXT: ret void +define void @calls_optnone_callee(ptr %outer.arg) { + call void @optnone_callee(ptr %outer.arg) + ret void +} + +; CHECK-LABEL: define void @optnone_caller( +; RESULT-NEXT: store i32 123, ptr %outer.arg, align 4 +; RESULT-NEXT: ret void +define void @optnone_caller(ptr %outer.arg) optnone noinline { + call void @simple_callee(ptr %outer.arg) + ret void +} + +; CHECK-LABEL: define weak void @interposable_callee( +; RESULT-NEXT: store i32 2024, ptr %arg, align 4 +; RESULT-NEXT: ret void +define weak void @interposable_callee(ptr %arg) { + store i32 2024, ptr %arg + ret void +} + +; Ignore interposable linkage +; CHECK-LABEL: @calls_interposable_callee( +; RESULT-NEXT: store i32 2024, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @calls_interposable_callee(ptr %arg) { + call void @interposable_callee(ptr %arg) + ret void +} + +; Ignore null_pointer_is_valid +; CHECK-LABEL: @null_pointer_is_valid_callee( +; RESULT-NEXT: store i32 42069, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @null_pointer_is_valid_callee(ptr %arg) null_pointer_is_valid { + store i32 42069, ptr %arg + ret void +} + +; CHECK-LABEL: @calls_null_pointer_is_valid_callee( +; RESULT-NEXT: store i32 42069, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @calls_null_pointer_is_valid_callee(ptr %arg) { + call void @null_pointer_is_valid_callee(ptr %arg) + ret void +} + +; CHECK-LABEL: @byval_arg_uses_non_alloca_addrspace( +; RESULT-NEXT: %load = load i32, ptr addrspace(1) %arg, align 4 +; RESULT-NEXT: ret i32 %load +define i32 @byval_arg_uses_non_alloca_addrspace(ptr addrspace(1) byval(i32) %arg) { + %load = load i32, ptr addrspace(1) %arg + ret i32 %load +} + +; CHECK-LABEL: @calls_byval_arg_uses_non_alloca_addrspace( +; RESULT-NEXT: %arg1 = alloca i32, align 4, addrspace(1) +; RESULT-NEXT: call void @llvm.lifetime.start.p1(ptr addrspace(1) %arg1) +; RESULT-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %arg1, ptr addrspace(1) %arg, i64 4, i1 false) +; RESULT-NEXT: %load.i = load i32, ptr addrspace(1) %arg1, align 4 +; RESULT-NEXT: call void @llvm.lifetime.end.p1(ptr addrspace(1) %arg1) +; RESULT-NEXT: ret i32 %load.i +define i32 @calls_byval_arg_uses_non_alloca_addrspace(ptr addrspace(1) %arg) { + %call = call i32 @byval_arg_uses_non_alloca_addrspace(ptr addrspace(1) byval(i32) %arg) + ret i32 %call +} + +; CHECK-LABEL: define void @callee_stacksize( +; RESULT-NEXT: %alloca = alloca [4096 x i32] +; RESULT-NEXT: store i32 12345678, ptr %arg +; RESULT-NEXT: store i32 0, ptr %alloca +; RESULT-NEXT: ret void +define void @callee_stacksize(ptr %arg) "inline-max-stacksize"="4" { + %alloca = alloca [4096 x i32] + store i32 12345678, ptr %arg + store i32 0, ptr %alloca + ret void +} + +; CHECK-LABEL: define void @caller_stacksize( +; RESULT-NEXT: %alloca.i = alloca [4096 x i32], align 4 +; RESULT-NEXT: call void @llvm.lifetime.start.p0(ptr %alloca.i) +; RESULT-NEXT: store i32 12345678, ptr %arg, align 4 +; RESULT-NEXT: store i32 0, ptr %alloca.i, align 4 +; RESULT-NEXT: call void @llvm.lifetime.end.p0(ptr %alloca.i) +; RESULT-NEXT: ret void +define void @caller_stacksize(ptr %arg) { + call void @callee_stacksize(ptr %arg) + ret void +} + +; CHECK-LABEL: define void @callee_dynamic_alloca( +; RESULT-NEXT: %alloca = alloca i32, i32 %n, align 4 +; RESULT-NEXT: store i32 12345678, ptr %arg, align 4 +; RESULT-NEXT: store i32 0, ptr %alloca, align 4 +; RESULT-NEXT: ret void +define void @callee_dynamic_alloca(ptr %arg, i32 %n) "inline-max-stacksize"="4" { + %alloca = alloca i32, i32 %n + store i32 12345678, ptr %arg + store i32 0, ptr %alloca + ret void +} + +; CHECK-LABEL: define void @caller_dynamic_alloca( +; RESULT-NEXT: %savedstack = call ptr @llvm.stacksave.p0() +; RESULT-NEXT: %alloca.i = alloca i32, i32 %size, align 4 +; RESULT-NEXT: store i32 12345678, ptr %arg, align 4 +; RESULT-NEXT: store i32 0, ptr %alloca.i, align 4 +; RESULT-NEXT: call void @llvm.stackrestore.p0(ptr %savedstack) +; RESULT-NEXT: ret void +define void @caller_dynamic_alloca(ptr %arg, i32 %size) { + call void @callee_dynamic_alloca(ptr %arg, i32 %size) + ret void +} + +declare void @extern_noduplicate() noduplicate + +; CHECK-LABEL: define void @callee_noduplicate_calls( +; RESULT-NEXT: call void @extern_noduplicate() +; RESULT-NEXT: call void @extern_noduplicate() +; RESULT-NEXT: ret void +define void @callee_noduplicate_calls() { + call void @extern_noduplicate() + call void @extern_noduplicate() + ret void +} + +; Ignore noduplicate restrictions +; CHECK-LABEL: define void @caller_noduplicate_calls_callee( +; RESULT-NEXT: call void @extern_noduplicate() +; RESULT-NEXT: call void @extern_noduplicate() +; RESULT-NEXT: call void @extern_noduplicate() +; RESULT-NEXT: call void @extern_noduplicate() +; RESULT-NEXT: ret void +define void @caller_noduplicate_calls_callee() { + call void @callee_noduplicate_calls() + call void @callee_noduplicate_calls() + ret void +} + +; CHECK-LABEL: define void @sanitize_address_callee( +; RESULT-NEXT: store i32 333, ptr %arg +; RESULT-NEXT: ret void +define void @sanitize_address_callee(ptr %arg) sanitize_address { + store i32 333, ptr %arg + ret void +} + +; CHECK-LABEL: define void @no_sanitize_address_caller( +; RESULT-NEXT: store i32 333, ptr %arg +; RESULT-NEXT: ret void +define void @no_sanitize_address_caller(ptr %arg) { + call void @sanitize_address_callee(ptr %arg) + ret void +} + +; CHECK-LABEL: define float @nonstrictfp_callee( +; RESULT-NEXT: %add = fadd float %a, %a +; RESULT-NEXT: ret float %add +define float @nonstrictfp_callee(float %a) { + %add = fadd float %a, %a + ret float %add +} + +; CHECK-LABEL: define float @strictfp_caller( +; RESULT-NEXT: call float @llvm.experimental.constrained.fadd.f32( +; RESULT-NEXT: call float @llvm.experimental.constrained.fadd.f32( +; RESULT-NEXT: ret float %add +define float @strictfp_caller(float %a) strictfp { + %call = call float @nonstrictfp_callee(float %a) strictfp + %add = call float @llvm.experimental.constrained.fadd.f32(float %call, float 2.0, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %add +} + +; CHECK-LABEL: define float @strictfp_callee( +; RESULT-NEXT: call float @llvm.experimental.constrained.fadd.f32( +; RESULT-NEXT: ret float +define float @strictfp_callee(float %a) strictfp { + %add = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %add +} + +; FIXME: This should not inline. The inlined case should fail the +; verifier, but it does not. +; CHECK-LABEL: define float @nonstrictfp_caller( +; RESULT-NEXT: call float @llvm.experimental.constrained.fadd.f32( +; RESULT-NEXT: fadd float +; RESULT-NEXT: ret float +define float @nonstrictfp_caller(float %a) { + %call = call float @strictfp_callee(float %a) + %add1 = fadd float %call, 2.0 + ret float %add1 +} + +define void @caller_also_has_non_callee_use() { + call void @simple_callee(ptr @simple_callee) + ret void +} |