; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 %s -o - | FileCheck -check-prefixes=GFX942,GFX942-SDAG %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 %s -o - | FileCheck -check-prefixes=GFX942,GFX942-GISEL %s define void @memset_p0_varsize_align_4_varsetval(ptr addrspace(0) align 4 %dst, i8 %setval, i64 %size) { ; GFX942-SDAG-LABEL: memset_p0_varsize_align_4_varsetval: ; GFX942-SDAG: ; %bb.0: ; %entry ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v3 ; GFX942-SDAG-NEXT: v_and_b32_e32 v12, -16, v10 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v4 ; GFX942-SDAG-NEXT: v_and_b32_e32 v8, 15, v10 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, 0 ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13] ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB0_3 ; GFX942-SDAG-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader ; GFX942-SDAG-NEXT: s_mov_b32 s4, 0x4040404 ; GFX942-SDAG-NEXT: v_perm_b32 v4, v2, v2, s4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4 ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 ; GFX942-SDAG-NEXT: .LBB0_2: ; %dynamic-memset-expansion-main-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[14:15], v[0:1], 0, s[4:5] ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 16 ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[12:13] ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[14:15], v[4:7] ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB0_2 ; GFX942-SDAG-NEXT: .LBB0_3: ; %Flow4 ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB0_6 ; GFX942-SDAG-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader ; GFX942-SDAG-NEXT: v_and_b32_e32 v10, -16, v10 ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[10:11] ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 ; GFX942-SDAG-NEXT: .LBB0_5: ; %dynamic-memset-expansion-residual-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[4:5] ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 1 ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[8:9] ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-SDAG-NEXT: flat_store_byte v[4:5], v2 ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB0_5 ; GFX942-SDAG-NEXT: .LBB0_6: ; %Flow2 ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-GISEL-LABEL: memset_p0_varsize_align_4_varsetval: ; GFX942-GISEL: ; %bb.0: ; %entry ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-GISEL-NEXT: v_mov_b32_e32 v10, v3 ; GFX942-GISEL-NEXT: v_and_b32_e32 v8, 15, v10 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v11, v4 ; GFX942-GISEL-NEXT: v_sub_co_u32_e32 v12, vcc, v10, v8 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, 0 ; GFX942-GISEL-NEXT: s_nop 0 ; GFX942-GISEL-NEXT: v_subbrev_co_u32_e32 v13, vcc, 0, v11, vcc ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13] ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB0_3 ; GFX942-GISEL-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader ; GFX942-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v2 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v3, 8, v3 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v3 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3 ; GFX942-GISEL-NEXT: v_or3_b32 v4, v4, v5, v3 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v6, v4 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v4 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[0:1] ; GFX942-GISEL-NEXT: .LBB0_2: ; %dynamic-memset-expansion-main-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v16, vcc, v0, v14 ; GFX942-GISEL-NEXT: s_nop 1 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v17, vcc, v1, v15, vcc ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, 16, v14 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[16:17], v[4:7] ; GFX942-GISEL-NEXT: s_nop 0 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, 0, v15, vcc ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[14:15], v[12:13] ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB0_2 ; GFX942-GISEL-NEXT: .LBB0_3: ; %Flow4 ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB0_6 ; GFX942-GISEL-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader ; GFX942-GISEL-NEXT: v_lshrrev_b64 v[4:5], 4, v[10:11] ; GFX942-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 4, v[0:1] ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1] ; GFX942-GISEL-NEXT: .LBB0_5: ; %dynamic-memset-expansion-residual-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, v0, v4 ; GFX942-GISEL-NEXT: s_nop 1 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v7, vcc, v1, v5, vcc ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v4, vcc, 1, v4 ; GFX942-GISEL-NEXT: flat_store_byte v[6:7], v2 ; GFX942-GISEL-NEXT: s_nop 0 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[4:5], v[8:9] ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB0_5 ; GFX942-GISEL-NEXT: .LBB0_6: ; %Flow2 ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memset.p0.i64(ptr addrspace(0) noundef nonnull align 4 %dst, i8 %setval, i64 %size, i1 false) ret void } define void @memset_p1_varsize_align_4_varsetval(ptr addrspace(1) align 4 %dst, i8 %setval, i64 %size) { ; GFX942-SDAG-LABEL: memset_p1_varsize_align_4_varsetval: ; GFX942-SDAG: ; %bb.0: ; %entry ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v3 ; GFX942-SDAG-NEXT: v_and_b32_e32 v12, -16, v10 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v4 ; GFX942-SDAG-NEXT: v_and_b32_e32 v8, 15, v10 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, 0 ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13] ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB1_3 ; GFX942-SDAG-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader ; GFX942-SDAG-NEXT: s_mov_b32 s4, 0x4040404 ; GFX942-SDAG-NEXT: v_perm_b32 v4, v2, v2, s4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4 ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 ; GFX942-SDAG-NEXT: .LBB1_2: ; %dynamic-memset-expansion-main-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[14:15], v[0:1], 0, s[4:5] ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 16 ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[12:13] ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-SDAG-NEXT: global_store_dwordx4 v[14:15], v[4:7], off ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB1_2 ; GFX942-SDAG-NEXT: .LBB1_3: ; %Flow4 ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB1_6 ; GFX942-SDAG-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader ; GFX942-SDAG-NEXT: v_and_b32_e32 v10, -16, v10 ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[10:11] ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 ; GFX942-SDAG-NEXT: .LBB1_5: ; %dynamic-memset-expansion-residual-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[4:5] ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 1 ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[8:9] ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-SDAG-NEXT: global_store_byte v[4:5], v2, off ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB1_5 ; GFX942-SDAG-NEXT: .LBB1_6: ; %Flow2 ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-GISEL-LABEL: memset_p1_varsize_align_4_varsetval: ; GFX942-GISEL: ; %bb.0: ; %entry ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-GISEL-NEXT: v_mov_b32_e32 v10, v3 ; GFX942-GISEL-NEXT: v_and_b32_e32 v8, 15, v10 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v11, v4 ; GFX942-GISEL-NEXT: v_sub_co_u32_e32 v12, vcc, v10, v8 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, 0 ; GFX942-GISEL-NEXT: s_nop 0 ; GFX942-GISEL-NEXT: v_subbrev_co_u32_e32 v13, vcc, 0, v11, vcc ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13] ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB1_3 ; GFX942-GISEL-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader ; GFX942-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v2 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v3, 8, v3 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v3 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3 ; GFX942-GISEL-NEXT: v_or3_b32 v4, v4, v5, v3 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v6, v4 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v4 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[0:1] ; GFX942-GISEL-NEXT: .LBB1_2: ; %dynamic-memset-expansion-main-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v16, vcc, v0, v14 ; GFX942-GISEL-NEXT: s_nop 1 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v17, vcc, v1, v15, vcc ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, 16, v14 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[16:17], v[4:7], off ; GFX942-GISEL-NEXT: s_nop 0 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, 0, v15, vcc ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[14:15], v[12:13] ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB1_2 ; GFX942-GISEL-NEXT: .LBB1_3: ; %Flow4 ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB1_6 ; GFX942-GISEL-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader ; GFX942-GISEL-NEXT: v_lshrrev_b64 v[4:5], 4, v[10:11] ; GFX942-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 4, v[0:1] ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1] ; GFX942-GISEL-NEXT: .LBB1_5: ; %dynamic-memset-expansion-residual-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, v0, v4 ; GFX942-GISEL-NEXT: s_nop 1 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v7, vcc, v1, v5, vcc ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v4, vcc, 1, v4 ; GFX942-GISEL-NEXT: global_store_byte v[6:7], v2, off ; GFX942-GISEL-NEXT: s_nop 0 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[4:5], v[8:9] ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB1_5 ; GFX942-GISEL-NEXT: .LBB1_6: ; %Flow2 ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 4 %dst, i8 %setval, i64 %size, i1 false) ret void } define void @memset_p3_varsize_align_4_varsetval(ptr addrspace(3) align 4 %dst, i8 %setval, i64 %size) { ; GFX942-SDAG-LABEL: memset_p3_varsize_align_4_varsetval: ; GFX942-SDAG: ; %bb.0: ; %entry ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v3 ; GFX942-SDAG-NEXT: v_and_b32_e32 v4, -16, v2 ; GFX942-SDAG-NEXT: v_and_b32_e32 v10, 15, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, 0 ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB2_3 ; GFX942-SDAG-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader ; GFX942-SDAG-NEXT: s_mov_b32 s4, 0x4040404 ; GFX942-SDAG-NEXT: v_perm_b32 v6, v1, v1, s4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v6 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v6 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v6 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v0 ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 ; GFX942-SDAG-NEXT: .LBB2_2: ; %dynamic-memset-expansion-main-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 16 ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[4:5] ; GFX942-SDAG-NEXT: ds_write2_b32 v9, v8, v7 offset0:2 offset1:3 ; GFX942-SDAG-NEXT: ds_write2_b32 v9, v6, v3 offset1:1 ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-SDAG-NEXT: v_add_u32_e32 v9, 16, v9 ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB2_2 ; GFX942-SDAG-NEXT: .LBB2_3: ; %Flow7 ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB2_6 ; GFX942-SDAG-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader ; GFX942-SDAG-NEXT: v_and_b32_e32 v2, -16, v2 ; GFX942-SDAG-NEXT: v_add_u32_e32 v0, v0, v2 ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 ; GFX942-SDAG-NEXT: .LBB2_5: ; %dynamic-memset-expansion-residual-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 1 ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[10:11] ; GFX942-SDAG-NEXT: ds_write_b8 v0, v1 ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB2_5 ; GFX942-SDAG-NEXT: .LBB2_6: ; %Flow5 ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-GISEL-LABEL: memset_p3_varsize_align_4_varsetval: ; GFX942-GISEL: ; %bb.0: ; %entry ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-GISEL-NEXT: v_and_b32_e32 v4, 15, v2 ; GFX942-GISEL-NEXT: v_sub_co_u32_e32 v6, vcc, v2, v4 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, 0 ; GFX942-GISEL-NEXT: s_nop 0 ; GFX942-GISEL-NEXT: v_subbrev_co_u32_e32 v7, vcc, 0, v3, vcc ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB2_3 ; GFX942-GISEL-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader ; GFX942-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v1 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v9, v8, 8, v8 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v8 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v8, 24, v8 ; GFX942-GISEL-NEXT: v_or3_b32 v8, v9, v10, v8 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, v8 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v10, v8 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v11, v8 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v14, v0 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[0:1] ; GFX942-GISEL-NEXT: .LBB2_2: ; %dynamic-memset-expansion-main-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 16, v12 ; GFX942-GISEL-NEXT: ds_write2_b64 v14, v[8:9], v[10:11] offset1:1 ; GFX942-GISEL-NEXT: s_nop 0 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v13, vcc ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[12:13], v[6:7] ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-GISEL-NEXT: v_add_u32_e32 v14, 16, v14 ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB2_2 ; GFX942-GISEL-NEXT: .LBB2_3: ; %Flow7 ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX942-GISEL-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader ; GFX942-GISEL-NEXT: v_lshrrev_b64 v[2:3], 4, v[2:3] ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 4, v2 ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, v0, v2 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] ; GFX942-GISEL-NEXT: .LBB2_5: ; %dynamic-memset-expansion-residual-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v2, vcc, 1, v2 ; GFX942-GISEL-NEXT: ds_write_b8 v0, v1 ; GFX942-GISEL-NEXT: s_nop 0 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[2:3], v[4:5] ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB2_5 ; GFX942-GISEL-NEXT: .LBB2_6: ; %Flow5 ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memset.p3.i64(ptr addrspace(3) noundef nonnull align 4 %dst, i8 %setval, i64 %size, i1 false) ret void } define void @memset_p5_varsize_align_4_varsetval(ptr addrspace(5) align 4 %dst, i8 %setval, i64 %size) { ; GFX942-SDAG-LABEL: memset_p5_varsize_align_4_varsetval: ; GFX942-SDAG: ; %bb.0: ; %entry ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v3 ; GFX942-SDAG-NEXT: v_and_b32_e32 v4, -16, v2 ; GFX942-SDAG-NEXT: v_and_b32_e32 v10, 15, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, 0 ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB3_3 ; GFX942-SDAG-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader ; GFX942-SDAG-NEXT: s_mov_b32 s4, 0x4040404 ; GFX942-SDAG-NEXT: v_perm_b32 v6, v1, v1, s4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v6 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v6 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v6 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v0 ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 ; GFX942-SDAG-NEXT: .LBB3_2: ; %dynamic-memset-expansion-main-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 16 ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[4:5] ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v3, v[6:9], off ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-SDAG-NEXT: v_add_u32_e32 v3, 16, v3 ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB3_2 ; GFX942-SDAG-NEXT: .LBB3_3: ; %Flow7 ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB3_6 ; GFX942-SDAG-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader ; GFX942-SDAG-NEXT: v_and_b32_e32 v2, -16, v2 ; GFX942-SDAG-NEXT: v_add_u32_e32 v0, v0, v2 ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 ; GFX942-SDAG-NEXT: .LBB3_5: ; %dynamic-memset-expansion-residual-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 1 ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[10:11] ; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB3_5 ; GFX942-SDAG-NEXT: .LBB3_6: ; %Flow5 ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-GISEL-LABEL: memset_p5_varsize_align_4_varsetval: ; GFX942-GISEL: ; %bb.0: ; %entry ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-GISEL-NEXT: v_and_b32_e32 v8, 15, v2 ; GFX942-GISEL-NEXT: v_sub_co_u32_e32 v10, vcc, v2, v8 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, 0 ; GFX942-GISEL-NEXT: s_nop 0 ; GFX942-GISEL-NEXT: v_subbrev_co_u32_e32 v11, vcc, 0, v3, vcc ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB3_3 ; GFX942-GISEL-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader ; GFX942-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v1 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v5, v4, 8, v4 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v4 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 24, v4 ; GFX942-GISEL-NEXT: v_or3_b32 v4, v5, v6, v4 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v6, v4 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v4 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v14, v0 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[0:1] ; GFX942-GISEL-NEXT: .LBB3_2: ; %dynamic-memset-expansion-main-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 16, v12 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v14, v[4:7], off ; GFX942-GISEL-NEXT: s_nop 0 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v13, vcc ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[12:13], v[10:11] ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-GISEL-NEXT: v_add_u32_e32 v14, 16, v14 ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB3_2 ; GFX942-GISEL-NEXT: .LBB3_3: ; %Flow7 ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB3_6 ; GFX942-GISEL-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader ; GFX942-GISEL-NEXT: v_lshrrev_b64 v[2:3], 4, v[2:3] ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 4, v2 ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, v0, v2 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] ; GFX942-GISEL-NEXT: .LBB3_5: ; %dynamic-memset-expansion-residual-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v2, vcc, 1, v2 ; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off ; GFX942-GISEL-NEXT: s_nop 0 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[2:3], v[8:9] ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB3_5 ; GFX942-GISEL-NEXT: .LBB3_6: ; %Flow5 ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memset.p5.i64(ptr addrspace(5) noundef nonnull align 4 %dst, i8 %setval, i64 %size, i1 false) ret void } define void @memset_p0_sz1055_align_4_varsetval(ptr addrspace(0) align 4 %dst, i8 %setval) { ; GFX942-SDAG-LABEL: memset_p0_sz1055_align_4_varsetval: ; GFX942-SDAG: ; %bb.0: ; %entry ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 ; GFX942-SDAG-NEXT: v_perm_b32 v4, v2, v2, s0 ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v34, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v35, v4 ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: s_mov_b64 s[2:3], 0x70 ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0x60 ; GFX942-SDAG-NEXT: s_mov_b64 s[6:7], 0x50 ; GFX942-SDAG-NEXT: s_mov_b64 s[8:9], 0xf0 ; GFX942-SDAG-NEXT: s_mov_b64 s[10:11], 0xe0 ; GFX942-SDAG-NEXT: s_mov_b64 s[12:13], 0xd0 ; GFX942-SDAG-NEXT: s_mov_b64 s[14:15], 0xc0 ; GFX942-SDAG-NEXT: s_mov_b64 s[16:17], 0xb0 ; GFX942-SDAG-NEXT: s_mov_b64 s[18:19], 0xa0 ; GFX942-SDAG-NEXT: s_mov_b64 s[20:21], 0x90 ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[36:37], 0x400 ; GFX942-SDAG-NEXT: .LBB4_1: ; %static-memset-expansion-main-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[38:39], v[0:1], 0, s[0:1] ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100 ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0 ; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[36:37] ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[48:49], v[38:39], 0, s[2:3] ; GFX942-SDAG-NEXT: s_and_b64 vcc, exec, vcc ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[50:51], v[38:39], 0, s[4:5] ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[52:53], v[38:39], 0, s[6:7] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[38:39], v[20:23] offset:64 ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[54:55], v[38:39], 0, 48 ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[38:39], v[12:15] offset:32 ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[38:39], v[8:11] offset:16 ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[38:39], v[4:7] ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[40:41], v[38:39], 0, s[8:9] ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[42:43], v[38:39], 0, s[10:11] ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[44:45], v[38:39], 0, s[12:13] ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[46:47], v[38:39], 0, s[14:15] ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[56:57], v[38:39], 0, s[16:17] ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[58:59], v[38:39], 0, s[18:19] ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[60:61], v[38:39], 0, s[20:21] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[38:39], v[4:7] offset:128 ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[48:49], v[32:35] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[50:51], v[28:31] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[52:53], v[24:27] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[54:55], v[16:19] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[40:41], v[32:35] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[42:43], v[28:31] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[44:45], v[24:27] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[46:47], v[20:23] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[56:57], v[16:19] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[58:59], v[12:15] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[60:61], v[8:11] ; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB4_1 ; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 ; GFX942-SDAG-NEXT: v_perm_b32 v4, v2, v2, s0 ; GFX942-SDAG-NEXT: v_lshlrev_b16_e32 v3, 8, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4 ; GFX942-SDAG-NEXT: v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:1024 ; GFX942-SDAG-NEXT: flat_store_dwordx3 v[0:1], v[4:6] offset:1040 ; GFX942-SDAG-NEXT: flat_store_short v[0:1], v3 offset:1052 ; GFX942-SDAG-NEXT: flat_store_byte v[0:1], v2 offset:1054 ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-GISEL-LABEL: memset_p0_sz1055_align_4_varsetval: ; GFX942-GISEL: ; %bb.0: ; %entry ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v2 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v3, 8, v3 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v3 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3 ; GFX942-GISEL-NEXT: v_or3_b32 v4, v4, v5, v3 ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v6, v4 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v4 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], 0x400 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[0:1] ; GFX942-GISEL-NEXT: .LBB4_1: ; %static-memset-expansion-main-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, v0, v10 ; GFX942-GISEL-NEXT: s_nop 1 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, v1, v11, vcc ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v10, vcc, 0x100, v10 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:16 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:32 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:48 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:64 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:80 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:96 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:112 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:128 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:144 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:160 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:176 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:192 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:208 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:224 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:240 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v11, vcc ; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[10:11], v[8:9] ; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB4_1 ; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion ; GFX942-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v2 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v3, 8, v3 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v3 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3 ; GFX942-GISEL-NEXT: v_or3_b32 v6, v4, v5, v3 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, 8 ; GFX942-GISEL-NEXT: v_lshlrev_b16_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX942-GISEL-NEXT: v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX942-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v3 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v4 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v6 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v8, v6 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, v6 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[6:9] offset:1024 ; GFX942-GISEL-NEXT: flat_store_dwordx3 v[0:1], v[4:6] offset:1040 ; GFX942-GISEL-NEXT: flat_store_short v[0:1], v3 offset:1052 ; GFX942-GISEL-NEXT: flat_store_byte v[0:1], v2 offset:1054 ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memset.p0.i64(ptr addrspace(0) noundef nonnull align 4 %dst, i8 %setval, i64 1055, i1 false) ret void } define void @memset_p0_sz2048_align_4_varsetval(ptr addrspace(0) align 4 %dst, i8 %setval) { ; GFX942-SDAG-LABEL: memset_p0_sz2048_align_4_varsetval: ; GFX942-SDAG: ; %bb.0: ; %entry ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 ; GFX942-SDAG-NEXT: v_perm_b32 v2, v2, v2, s0 ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v2 ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: s_mov_b64 s[2:3], 0x70 ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0x60 ; GFX942-SDAG-NEXT: s_mov_b64 s[6:7], 0x50 ; GFX942-SDAG-NEXT: s_mov_b64 s[8:9], 0xf0 ; GFX942-SDAG-NEXT: s_mov_b64 s[10:11], 0xe0 ; GFX942-SDAG-NEXT: s_mov_b64 s[12:13], 0xd0 ; GFX942-SDAG-NEXT: s_mov_b64 s[14:15], 0xc0 ; GFX942-SDAG-NEXT: s_mov_b64 s[16:17], 0xb0 ; GFX942-SDAG-NEXT: s_mov_b64 s[18:19], 0xa0 ; GFX942-SDAG-NEXT: s_mov_b64 s[20:21], 0x90 ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[34:35], 0x800 ; GFX942-SDAG-NEXT: .LBB5_1: ; %static-memset-expansion-main-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[36:37], v[0:1], 0, s[0:1] ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100 ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0 ; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[34:35] ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[38:39], v[36:37], 0, s[2:3] ; GFX942-SDAG-NEXT: s_and_b64 vcc, exec, vcc ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[48:49], v[36:37], 0, s[4:5] ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[50:51], v[36:37], 0, s[6:7] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[36:37], v[18:21] offset:64 ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[52:53], v[36:37], 0, 48 ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[36:37], v[10:13] offset:32 ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[36:37], v[6:9] offset:16 ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[36:37], v[2:5] ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[54:55], v[36:37], 0, s[8:9] ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[40:41], v[36:37], 0, s[10:11] ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[42:43], v[36:37], 0, s[12:13] ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[44:45], v[36:37], 0, s[14:15] ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[46:47], v[36:37], 0, s[16:17] ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[56:57], v[36:37], 0, s[18:19] ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[58:59], v[36:37], 0, s[20:21] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[36:37], v[2:5] offset:128 ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[38:39], v[30:33] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[48:49], v[26:29] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[50:51], v[22:25] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[52:53], v[14:17] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[54:55], v[30:33] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[40:41], v[26:29] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[42:43], v[22:25] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[44:45], v[18:21] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[46:47], v[14:17] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[56:57], v[10:13] ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[58:59], v[6:9] ; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB5_1 ; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-GISEL-LABEL: memset_p0_sz2048_align_4_varsetval: ; GFX942-GISEL: ; %bb.0: ; %entry ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v3, v2, 8, v2 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v2 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX942-GISEL-NEXT: v_or3_b32 v2, v3, v4, v2 ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], 0x800 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1] ; GFX942-GISEL-NEXT: .LBB5_1: ; %static-memset-expansion-main-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v10, vcc, v0, v8 ; GFX942-GISEL-NEXT: s_nop 1 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v11, vcc, v1, v9, vcc ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, 0x100, v8 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:16 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:32 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:48 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:64 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:80 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:96 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:112 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:128 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:144 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:160 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:176 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:192 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:208 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:224 ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:240 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc ; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[6:7] ; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB5_1 ; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memset.p0.i64(ptr addrspace(0) noundef nonnull align 4 %dst, i8 %setval, i64 2048, i1 false) ret void } define void @memset_p1_sz1055_align_4_varsetval(ptr addrspace(1) align 4 %dst, i8 %setval) { ; GFX942-SDAG-LABEL: memset_p1_sz1055_align_4_varsetval: ; GFX942-SDAG: ; %bb.0: ; %entry ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 ; GFX942-SDAG-NEXT: v_perm_b32 v4, v2, v2, s0 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v34, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v35, v4 ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[36:37], 0x400 ; GFX942-SDAG-NEXT: .LBB6_1: ; %static-memset-expansion-main-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[38:39], v[0:1], 0, s[0:1] ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100 ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0 ; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[36:37] ; GFX942-SDAG-NEXT: s_and_b64 vcc, exec, vcc ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[32:35], off offset:112 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[28:31], off offset:96 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[24:27], off offset:80 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[20:23], off offset:64 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[16:19], off offset:48 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[12:15], off offset:32 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[8:11], off offset:16 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[4:7], off ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[32:35], off offset:240 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[28:31], off offset:224 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[24:27], off offset:208 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[20:23], off offset:192 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[16:19], off offset:176 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[12:15], off offset:160 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[8:11], off offset:144 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[4:7], off offset:128 ; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB6_1 ; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 ; GFX942-SDAG-NEXT: v_perm_b32 v4, v2, v2, s0 ; GFX942-SDAG-NEXT: v_lshlrev_b16_e32 v3, 8, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4 ; GFX942-SDAG-NEXT: v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX942-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:1024 ; GFX942-SDAG-NEXT: global_store_dwordx3 v[0:1], v[4:6], off offset:1040 ; GFX942-SDAG-NEXT: global_store_short v[0:1], v3, off offset:1052 ; GFX942-SDAG-NEXT: global_store_byte v[0:1], v2, off offset:1054 ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-GISEL-LABEL: memset_p1_sz1055_align_4_varsetval: ; GFX942-GISEL: ; %bb.0: ; %entry ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v2 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v3, 8, v3 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v3 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3 ; GFX942-GISEL-NEXT: v_or3_b32 v4, v4, v5, v3 ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v6, v4 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v4 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], 0x400 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[0:1] ; GFX942-GISEL-NEXT: .LBB6_1: ; %static-memset-expansion-main-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, v0, v10 ; GFX942-GISEL-NEXT: s_nop 1 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, v1, v11, vcc ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v10, vcc, 0x100, v10 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:16 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:32 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:48 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:64 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:80 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:96 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:112 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:128 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:144 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:160 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:176 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:192 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:208 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:224 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:240 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v11, vcc ; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[10:11], v[8:9] ; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB6_1 ; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion ; GFX942-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v2 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v3, 8, v3 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v3 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3 ; GFX942-GISEL-NEXT: v_or3_b32 v6, v4, v5, v3 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, 8 ; GFX942-GISEL-NEXT: v_lshlrev_b16_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX942-GISEL-NEXT: v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX942-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v3 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v4 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v6 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v8, v6 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, v6 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[0:1], v[6:9], off offset:1024 ; GFX942-GISEL-NEXT: global_store_dwordx3 v[0:1], v[4:6], off offset:1040 ; GFX942-GISEL-NEXT: global_store_short v[0:1], v3, off offset:1052 ; GFX942-GISEL-NEXT: global_store_byte v[0:1], v2, off offset:1054 ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 4 %dst, i8 %setval, i64 1055, i1 false) ret void } define void @memset_p1_sz2048_align_4_varsetval(ptr addrspace(1) align 4 %dst, i8 %setval) { ; GFX942-SDAG-LABEL: memset_p1_sz2048_align_4_varsetval: ; GFX942-SDAG: ; %bb.0: ; %entry ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 ; GFX942-SDAG-NEXT: v_perm_b32 v2, v2, v2, s0 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v2 ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[34:35], 0x800 ; GFX942-SDAG-NEXT: .LBB7_1: ; %static-memset-expansion-main-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[36:37], v[0:1], 0, s[0:1] ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100 ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0 ; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[34:35] ; GFX942-SDAG-NEXT: s_and_b64 vcc, exec, vcc ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[30:33], off offset:112 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[26:29], off offset:96 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[22:25], off offset:80 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[18:21], off offset:64 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[14:17], off offset:48 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[10:13], off offset:32 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[6:9], off offset:16 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[2:5], off ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[30:33], off offset:240 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[26:29], off offset:224 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[22:25], off offset:208 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[18:21], off offset:192 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[14:17], off offset:176 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[10:13], off offset:160 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[6:9], off offset:144 ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[2:5], off offset:128 ; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB7_1 ; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-GISEL-LABEL: memset_p1_sz2048_align_4_varsetval: ; GFX942-GISEL: ; %bb.0: ; %entry ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v3, v2, 8, v2 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v2 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX942-GISEL-NEXT: v_or3_b32 v2, v3, v4, v2 ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], 0x800 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1] ; GFX942-GISEL-NEXT: .LBB7_1: ; %static-memset-expansion-main-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v10, vcc, v0, v8 ; GFX942-GISEL-NEXT: s_nop 1 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v11, vcc, v1, v9, vcc ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, 0x100, v8 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:16 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:32 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:48 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:64 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:80 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:96 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:112 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:128 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:144 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:160 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:176 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:192 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:208 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:224 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:240 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc ; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[6:7] ; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB7_1 ; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 4 %dst, i8 %setval, i64 2048, i1 false) ret void } define void @memset_p3_sz1055_align_4_varsetval(ptr addrspace(3) align 4 %dst, i8 %setval) { ; GFX942-SDAG-LABEL: memset_p3_sz1055_align_4_varsetval: ; GFX942-SDAG: ; %bb.0: ; %entry ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 ; GFX942-SDAG-NEXT: v_perm_b32 v2, v1, v1, s0 ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[34:35], 0x400 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v36, v0 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v2 ; GFX942-SDAG-NEXT: .LBB8_1: ; %static-memset-expansion-main-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100 ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0 ; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[34:35] ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v32, v33 offset0:30 offset1:31 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v30, v31 offset0:28 offset1:29 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v28, v29 offset0:26 offset1:27 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v26, v27 offset0:24 offset1:25 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v24, v25 offset0:22 offset1:23 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v22, v23 offset0:20 offset1:21 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v20, v21 offset0:18 offset1:19 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v18, v19 offset0:16 offset1:17 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v16, v17 offset0:14 offset1:15 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v14, v15 offset0:12 offset1:13 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v12, v13 offset0:10 offset1:11 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v10, v11 offset0:8 offset1:9 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v8, v9 offset0:6 offset1:7 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v6, v7 offset0:4 offset1:5 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v4, v5 offset0:2 offset1:3 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v2, v3 offset1:1 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v32, v33 offset0:62 offset1:63 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v30, v31 offset0:60 offset1:61 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v28, v29 offset0:58 offset1:59 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v26, v27 offset0:56 offset1:57 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v24, v25 offset0:54 offset1:55 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v22, v23 offset0:52 offset1:53 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v20, v21 offset0:50 offset1:51 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v18, v19 offset0:48 offset1:49 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v16, v17 offset0:46 offset1:47 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v14, v15 offset0:44 offset1:45 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v12, v13 offset0:42 offset1:43 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v10, v11 offset0:40 offset1:41 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v8, v9 offset0:38 offset1:39 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v6, v7 offset0:36 offset1:37 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v4, v5 offset0:34 offset1:35 ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v2, v3 offset0:32 offset1:33 ; GFX942-SDAG-NEXT: v_add_u32_e32 v36, 0x100, v36 ; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB8_1 ; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 ; GFX942-SDAG-NEXT: v_add_u32_e32 v2, 0x400, v0 ; GFX942-SDAG-NEXT: v_add_u32_e32 v3, 0x408, v0 ; GFX942-SDAG-NEXT: v_perm_b32 v4, v1, v1, s0 ; GFX942-SDAG-NEXT: ds_write2_b32 v3, v4, v4 offset1:1 ; GFX942-SDAG-NEXT: ds_write2_b32 v2, v4, v4 offset1:1 ; GFX942-SDAG-NEXT: v_add_u32_e32 v2, 0x410, v0 ; GFX942-SDAG-NEXT: ds_write2_b32 v2, v4, v4 offset1:1 ; GFX942-SDAG-NEXT: ds_write_b32 v0, v4 offset:1048 ; GFX942-SDAG-NEXT: v_lshlrev_b16_e32 v2, 8, v1 ; GFX942-SDAG-NEXT: v_or_b32_sdwa v2, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX942-SDAG-NEXT: ds_write_b16 v0, v2 offset:1052 ; GFX942-SDAG-NEXT: ds_write_b8 v0, v1 offset:1054 ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-GISEL-LABEL: memset_p3_sz1055_align_4_varsetval: ; GFX942-GISEL: ; %bb.0: ; %entry ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v3, v2, 8, v2 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v2 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX942-GISEL-NEXT: v_or3_b32 v2, v3, v4, v2 ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], 0x400 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v10, v0 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1] ; GFX942-GISEL-NEXT: .LBB8_1: ; %static-memset-expansion-main-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, 0x100, v8 ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset1:1 ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:2 offset1:3 ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:4 offset1:5 ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:6 offset1:7 ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:8 offset1:9 ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:10 offset1:11 ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:12 offset1:13 ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:14 offset1:15 ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:16 offset1:17 ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:18 offset1:19 ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:20 offset1:21 ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:22 offset1:23 ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:24 offset1:25 ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:26 offset1:27 ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:28 offset1:29 ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:30 offset1:31 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc ; GFX942-GISEL-NEXT: v_add_u32_e32 v10, 0x100, v10 ; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[6:7] ; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB8_1 ; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion ; GFX942-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v3, v2, 8, v2 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v2 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX942-GISEL-NEXT: v_or3_b32 v2, v3, v4, v2 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2 ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:128 offset1:129 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, 8 ; GFX942-GISEL-NEXT: v_lshlrev_b16_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX942-GISEL-NEXT: v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX942-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v3 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v4 ; GFX942-GISEL-NEXT: v_add_u32_e32 v5, 0x410, v0 ; GFX942-GISEL-NEXT: ds_write2_b32 v5, v4, v4 offset1:1 ; GFX942-GISEL-NEXT: ds_write_b32 v0, v2 offset:1048 ; GFX942-GISEL-NEXT: ds_write_b16 v0, v3 offset:1052 ; GFX942-GISEL-NEXT: ds_write_b8 v0, v1 offset:1054 ; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memset.p3.i64(ptr addrspace(3) noundef nonnull align 4 %dst, i8 %setval, i64 1055, i1 false) ret void } define void @memset_p3_sz2048_align_4_varsetval(ptr addrspace(3) align 4 %dst, i8 %setval) { ; GFX942-SDAG-LABEL: memset_p3_sz2048_align_4_varsetval: ; GFX942-SDAG: ; %bb.0: ; %entry ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 ; GFX942-SDAG-NEXT: v_perm_b32 v2, v1, v1, s0 ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[34:35], 0x800 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v2 ; GFX942-SDAG-NEXT: .LBB9_1: ; %static-memset-expansion-main-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100 ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0 ; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[34:35] ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v32, v31 offset0:30 offset1:31 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v30, v29 offset0:28 offset1:29 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v28, v27 offset0:26 offset1:27 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v26, v25 offset0:24 offset1:25 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v24, v23 offset0:22 offset1:23 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v22, v21 offset0:20 offset1:21 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v20, v19 offset0:18 offset1:19 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v18, v17 offset0:16 offset1:17 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v16, v15 offset0:14 offset1:15 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v14, v13 offset0:12 offset1:13 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v12, v11 offset0:10 offset1:11 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v10, v9 offset0:8 offset1:9 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v8, v7 offset0:6 offset1:7 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v6, v5 offset0:4 offset1:5 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v4, v3 offset0:2 offset1:3 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v2, v1 offset1:1 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v32, v31 offset0:62 offset1:63 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v30, v29 offset0:60 offset1:61 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v28, v27 offset0:58 offset1:59 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v26, v25 offset0:56 offset1:57 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v24, v23 offset0:54 offset1:55 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v22, v21 offset0:52 offset1:53 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v20, v19 offset0:50 offset1:51 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v18, v17 offset0:48 offset1:49 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v16, v15 offset0:46 offset1:47 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v14, v13 offset0:44 offset1:45 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v12, v11 offset0:42 offset1:43 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v10, v9 offset0:40 offset1:41 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v8, v7 offset0:38 offset1:39 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v6, v5 offset0:36 offset1:37 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v4, v3 offset0:34 offset1:35 ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v2, v1 offset0:32 offset1:33 ; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 0x100, v0 ; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB9_1 ; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-GISEL-LABEL: memset_p3_sz2048_align_4_varsetval: ; GFX942-GISEL: ; %bb.0: ; %entry ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v2, v1, 8, v1 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v1 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX942-GISEL-NEXT: v_or3_b32 v2, v2, v3, v1 ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], 0x800 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1] ; GFX942-GISEL-NEXT: .LBB9_1: ; %static-memset-expansion-main-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, 0x100, v8 ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3 ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:4 offset1:5 ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:6 offset1:7 ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:8 offset1:9 ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:10 offset1:11 ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:12 offset1:13 ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:14 offset1:15 ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:16 offset1:17 ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:18 offset1:19 ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:20 offset1:21 ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:22 offset1:23 ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:24 offset1:25 ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:26 offset1:27 ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:28 offset1:29 ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:30 offset1:31 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 0x100, v0 ; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[6:7] ; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB9_1 ; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion ; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memset.p3.i64(ptr addrspace(3) noundef nonnull align 4 %dst, i8 %setval, i64 2048, i1 false) ret void } define void @memset_p5_sz1055_align_4_varsetval(ptr addrspace(5) align 4 %dst, i8 %setval) { ; GFX942-SDAG-LABEL: memset_p5_sz1055_align_4_varsetval: ; GFX942-SDAG: ; %bb.0: ; %entry ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 ; GFX942-SDAG-NEXT: v_perm_b32 v2, v1, v1, s0 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v2 ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[34:35], 0x400 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v36, v0 ; GFX942-SDAG-NEXT: .LBB10_1: ; %static-memset-expansion-main-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100 ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0 ; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[34:35] ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[30:33], off offset:112 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[26:29], off offset:96 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[22:25], off offset:80 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[18:21], off offset:64 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[14:17], off offset:48 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[10:13], off offset:32 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[6:9], off offset:16 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[2:5], off ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[30:33], off offset:240 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[26:29], off offset:224 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[22:25], off offset:208 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[18:21], off offset:192 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[14:17], off offset:176 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[10:13], off offset:160 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[6:9], off offset:144 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[2:5], off offset:128 ; GFX942-SDAG-NEXT: v_add_u32_e32 v36, 0x100, v36 ; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB10_1 ; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 ; GFX942-SDAG-NEXT: v_perm_b32 v2, v1, v1, s0 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:1024 ; GFX942-SDAG-NEXT: scratch_store_dwordx2 v0, v[2:3], off offset:1040 ; GFX942-SDAG-NEXT: scratch_store_dword v0, v2, off offset:1048 ; GFX942-SDAG-NEXT: v_lshlrev_b16_e32 v2, 8, v1 ; GFX942-SDAG-NEXT: v_or_b32_sdwa v2, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX942-SDAG-NEXT: scratch_store_short v0, v2, off offset:1052 ; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off offset:1054 ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-GISEL-LABEL: memset_p5_sz1055_align_4_varsetval: ; GFX942-GISEL: ; %bb.0: ; %entry ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v3, v2, 8, v2 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v2 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX942-GISEL-NEXT: v_or3_b32 v2, v3, v4, v2 ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], 0x400 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v10, v0 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1] ; GFX942-GISEL-NEXT: .LBB10_1: ; %static-memset-expansion-main-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, 0x100, v8 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:16 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:32 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:48 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:64 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:80 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:96 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:112 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:128 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:144 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:160 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:176 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:192 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:208 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:224 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:240 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc ; GFX942-GISEL-NEXT: v_add_u32_e32 v10, 0x100, v10 ; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[6:7] ; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB10_1 ; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion ; GFX942-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v3, v2, 8, v2 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v2 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX942-GISEL-NEXT: v_or3_b32 v2, v3, v4, v2 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:1024 ; GFX942-GISEL-NEXT: s_nop 1 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, 8 ; GFX942-GISEL-NEXT: v_lshlrev_b16_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX942-GISEL-NEXT: v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX942-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v3 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v4 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-GISEL-NEXT: scratch_store_dwordx2 v0, v[4:5], off offset:1040 ; GFX942-GISEL-NEXT: scratch_store_dword v0, v2, off offset:1048 ; GFX942-GISEL-NEXT: scratch_store_short v0, v3, off offset:1052 ; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off offset:1054 ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memset.p5.i64(ptr addrspace(5) noundef nonnull align 4 %dst, i8 %setval, i64 1055, i1 false) ret void } define void @memset_p5_sz2048_align_4_varsetval(ptr addrspace(5) align 4 %dst, i8 %setval) { ; GFX942-SDAG-LABEL: memset_p5_sz2048_align_4_varsetval: ; GFX942-SDAG: ; %bb.0: ; %entry ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 ; GFX942-SDAG-NEXT: v_perm_b32 v2, v1, v1, s0 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v2 ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[34:35], 0x800 ; GFX942-SDAG-NEXT: .LBB11_1: ; %static-memset-expansion-main-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100 ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0 ; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[34:35] ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[30:33], off offset:112 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[26:29], off offset:96 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[22:25], off offset:80 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[18:21], off offset:64 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[14:17], off offset:48 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[10:13], off offset:32 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[6:9], off offset:16 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[2:5], off ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[30:33], off offset:240 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[26:29], off offset:224 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[22:25], off offset:208 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[18:21], off offset:192 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[14:17], off offset:176 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[10:13], off offset:160 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[6:9], off offset:144 ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:128 ; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 0x100, v0 ; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB11_1 ; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-GISEL-LABEL: memset_p5_sz2048_align_4_varsetval: ; GFX942-GISEL: ; %bb.0: ; %entry ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX942-GISEL-NEXT: v_lshl_or_b32 v2, v1, 8, v1 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v1 ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX942-GISEL-NEXT: v_or3_b32 v2, v2, v3, v1 ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], 0x800 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1] ; GFX942-GISEL-NEXT: .LBB11_1: ; %static-memset-expansion-main-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, 0x100, v8 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:16 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:32 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:48 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:64 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:80 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:96 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:112 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:128 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:144 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:160 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:176 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:192 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:208 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:224 ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:240 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 0x100, v0 ; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[6:7] ; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB11_1 ; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memset.p5.i64(ptr addrspace(5) noundef nonnull align 4 %dst, i8 %setval, i64 2048, i1 false) ret void } define void @memset_p1_varsz_align_4_set40(ptr addrspace(1) align 4 %dst, i64 %size) { ; GFX942-SDAG-LABEL: memset_p1_varsz_align_4_set40: ; GFX942-SDAG: ; %bb.0: ; %entry ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-SDAG-NEXT: v_and_b32_e32 v10, -16, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v3 ; GFX942-SDAG-NEXT: v_and_b32_e32 v8, 15, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, 0 ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB12_3 ; GFX942-SDAG-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader ; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, 0x28282828 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4 ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 ; GFX942-SDAG-NEXT: .LBB12_2: ; %dynamic-memset-expansion-main-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[12:13], v[0:1], 0, s[4:5] ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 16 ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[10:11] ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-SDAG-NEXT: global_store_dwordx4 v[12:13], v[4:7], off ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB12_2 ; GFX942-SDAG-NEXT: .LBB12_3: ; %Flow4 ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB12_6 ; GFX942-SDAG-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader ; GFX942-SDAG-NEXT: v_and_b32_e32 v2, -16, v2 ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] ; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 40 ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 ; GFX942-SDAG-NEXT: .LBB12_5: ; %dynamic-memset-expansion-residual-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[4:5] ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 1 ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[8:9] ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-SDAG-NEXT: global_store_byte v[4:5], v2, off ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB12_5 ; GFX942-SDAG-NEXT: .LBB12_6: ; %Flow2 ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-GISEL-LABEL: memset_p1_varsz_align_4_set40: ; GFX942-GISEL: ; %bb.0: ; %entry ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-GISEL-NEXT: v_and_b32_e32 v8, 15, v2 ; GFX942-GISEL-NEXT: v_sub_co_u32_e32 v10, vcc, v2, v8 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, 0 ; GFX942-GISEL-NEXT: s_nop 0 ; GFX942-GISEL-NEXT: v_subbrev_co_u32_e32 v11, vcc, 0, v3, vcc ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB12_3 ; GFX942-GISEL-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader ; GFX942-GISEL-NEXT: s_mov_b32 s4, 0x28282828 ; GFX942-GISEL-NEXT: s_mov_b32 s5, s4 ; GFX942-GISEL-NEXT: s_mov_b32 s6, s4 ; GFX942-GISEL-NEXT: s_mov_b32 s7, s4 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[4:5] ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[6:7] ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[0:1] ; GFX942-GISEL-NEXT: .LBB12_2: ; %dynamic-memset-expansion-main-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, v0, v12 ; GFX942-GISEL-NEXT: s_nop 1 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, v1, v13, vcc ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 16, v12 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[14:15], v[4:7], off ; GFX942-GISEL-NEXT: s_nop 0 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v13, vcc ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[12:13], v[10:11] ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB12_2 ; GFX942-GISEL-NEXT: .LBB12_3: ; %Flow4 ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB12_6 ; GFX942-GISEL-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader ; GFX942-GISEL-NEXT: v_lshrrev_b64 v[2:3], 4, v[2:3] ; GFX942-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 4, v[0:1] ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, 40 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] ; GFX942-GISEL-NEXT: .LBB12_5: ; %dynamic-memset-expansion-residual-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, v0, v2 ; GFX942-GISEL-NEXT: s_nop 1 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v7, vcc, v1, v3, vcc ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v2, vcc, 1, v2 ; GFX942-GISEL-NEXT: global_store_byte v[6:7], v4, off ; GFX942-GISEL-NEXT: s_nop 0 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[2:3], v[8:9] ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB12_5 ; GFX942-GISEL-NEXT: .LBB12_6: ; %Flow2 ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 4 %dst, i8 40, i64 %size, i1 false) ret void } define void @memset_p1_varsz_align_4_set0(ptr addrspace(1) align 4 %dst, i64 %size) { ; GFX942-SDAG-LABEL: memset_p1_varsz_align_4_set0: ; GFX942-SDAG: ; %bb.0: ; %entry ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-SDAG-NEXT: v_and_b32_e32 v6, -16, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v3 ; GFX942-SDAG-NEXT: v_and_b32_e32 v4, 15, v2 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, 0 ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB13_3 ; GFX942-SDAG-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v5 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v5 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v5 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v5 ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 ; GFX942-SDAG-NEXT: .LBB13_2: ; %dynamic-memset-expansion-main-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[12:13], v[0:1], 0, s[4:5] ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 16 ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[6:7] ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-SDAG-NEXT: global_store_dwordx4 v[12:13], v[8:11], off ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB13_2 ; GFX942-SDAG-NEXT: .LBB13_3: ; %Flow4 ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB13_6 ; GFX942-SDAG-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader ; GFX942-SDAG-NEXT: v_and_b32_e32 v2, -16, v2 ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] ; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 0 ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 ; GFX942-SDAG-NEXT: .LBB13_5: ; %dynamic-memset-expansion-residual-body ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[4:5] ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 1 ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[4:5] ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-SDAG-NEXT: global_store_byte v[6:7], v2, off ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB13_5 ; GFX942-SDAG-NEXT: .LBB13_6: ; %Flow2 ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-GISEL-LABEL: memset_p1_varsz_align_4_set0: ; GFX942-GISEL: ; %bb.0: ; %entry ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-GISEL-NEXT: v_and_b32_e32 v8, 15, v2 ; GFX942-GISEL-NEXT: v_sub_co_u32_e32 v10, vcc, v2, v8 ; GFX942-GISEL-NEXT: s_mov_b32 s0, 0 ; GFX942-GISEL-NEXT: s_nop 0 ; GFX942-GISEL-NEXT: v_subbrev_co_u32_e32 v11, vcc, 0, v3, vcc ; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, 0 ; GFX942-GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB13_3 ; GFX942-GISEL-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader ; GFX942-GISEL-NEXT: s_mov_b32 s2, s0 ; GFX942-GISEL-NEXT: s_mov_b32 s3, s0 ; GFX942-GISEL-NEXT: s_mov_b32 s1, s0 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[2:3] ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1] ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[4:5] ; GFX942-GISEL-NEXT: .LBB13_2: ; %dynamic-memset-expansion-main-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, v0, v12 ; GFX942-GISEL-NEXT: s_nop 1 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, v1, v13, vcc ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 16, v12 ; GFX942-GISEL-NEXT: global_store_dwordx4 v[14:15], v[4:7], off ; GFX942-GISEL-NEXT: s_nop 0 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v13, vcc ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[12:13], v[10:11] ; GFX942-GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5] ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB13_2 ; GFX942-GISEL-NEXT: .LBB13_3: ; %Flow4 ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB13_6 ; GFX942-GISEL-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader ; GFX942-GISEL-NEXT: v_lshrrev_b64 v[2:3], 4, v[2:3] ; GFX942-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 4, v[0:1] ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, 0 ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] ; GFX942-GISEL-NEXT: .LBB13_5: ; %dynamic-memset-expansion-residual-body ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, v0, v2 ; GFX942-GISEL-NEXT: s_nop 1 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v7, vcc, v1, v3, vcc ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v2, vcc, 1, v2 ; GFX942-GISEL-NEXT: global_store_byte v[6:7], v4, off ; GFX942-GISEL-NEXT: s_nop 0 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[2:3], v[8:9] ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB13_5 ; GFX942-GISEL-NEXT: .LBB13_6: ; %Flow2 ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] entry: tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 4 %dst, i8 0, i64 %size, i1 false) ret void } declare void @llvm.memset.p0.i64(ptr addrspace(0) noalias nocapture writeonly, i8, i64, i1 immarg) declare void @llvm.memset.p1.i64(ptr addrspace(1) noalias nocapture writeonly, i8, i64, i1 immarg) declare void @llvm.memset.p3.i64(ptr addrspace(3) noalias nocapture writeonly, i8, i64, i1 immarg) declare void @llvm.memset.p5.i64(ptr addrspace(5) noalias nocapture writeonly, i8, i64, i1 immarg) ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX942: {{.*}}