; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn -mcpu=verde < %s | FileCheck -check-prefixes=GCN,SI %s ; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI %s define amdgpu_kernel void @truncstore_arg_v16i32_to_v16i8(ptr addrspace(1) %out, <16 x i32> %in) { ; SI-LABEL: truncstore_arg_v16i32_to_v16i8: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x19 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_and_b32 s5, s22, 0xff ; SI-NEXT: s_lshl_b32 s4, s23, 24 ; SI-NEXT: s_lshl_b32 s5, s5, 16 ; SI-NEXT: s_or_b32 s4, s4, s5 ; SI-NEXT: s_lshl_b32 s5, s21, 8 ; SI-NEXT: s_and_b32 s6, s20, 0xff ; SI-NEXT: s_or_b32 s5, s6, s5 ; SI-NEXT: s_and_b32 s5, s5, 0xffff ; SI-NEXT: s_and_b32 s6, s18, 0xff ; SI-NEXT: s_or_b32 s4, s5, s4 ; SI-NEXT: s_lshl_b32 s5, s19, 24 ; SI-NEXT: s_lshl_b32 s6, s6, 16 ; SI-NEXT: s_or_b32 s5, s5, s6 ; SI-NEXT: s_lshl_b32 s6, s17, 8 ; SI-NEXT: s_and_b32 s7, s16, 0xff ; SI-NEXT: s_or_b32 s6, s7, s6 ; SI-NEXT: s_and_b32 s6, s6, 0xffff ; SI-NEXT: s_and_b32 s7, s14, 0xff ; SI-NEXT: s_or_b32 s5, s6, s5 ; SI-NEXT: s_lshl_b32 s6, s15, 24 ; SI-NEXT: s_lshl_b32 s7, s7, 16 ; SI-NEXT: s_or_b32 s6, s6, s7 ; SI-NEXT: s_lshl_b32 s7, s13, 8 ; SI-NEXT: s_and_b32 s12, s12, 0xff ; SI-NEXT: s_or_b32 s7, s12, s7 ; SI-NEXT: s_and_b32 s7, s7, 0xffff ; SI-NEXT: s_and_b32 s10, s10, 0xff ; SI-NEXT: s_lshl_b32 s9, s9, 8 ; SI-NEXT: s_and_b32 s8, s8, 0xff ; SI-NEXT: s_or_b32 s6, s7, s6 ; SI-NEXT: s_lshl_b32 s7, s11, 24 ; SI-NEXT: s_lshl_b32 s10, s10, 16 ; SI-NEXT: s_or_b32 s8, s8, s9 ; SI-NEXT: s_or_b32 s7, s7, s10 ; SI-NEXT: s_and_b32 s8, s8, 0xffff ; SI-NEXT: s_or_b32 s7, s8, s7 ; SI-NEXT: v_mov_b32_e32 v0, s7 ; SI-NEXT: v_mov_b32_e32 v1, s6 ; SI-NEXT: v_mov_b32_e32 v2, s5 ; SI-NEXT: v_mov_b32_e32 v3, s4 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: truncstore_arg_v16i32_to_v16i8: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_lshl_b32 s2, s23, 8 ; VI-NEXT: s_and_b32 s3, s22, 0xff ; VI-NEXT: s_lshl_b32 s4, s21, 8 ; VI-NEXT: s_and_b32 s5, s20, 0xff ; VI-NEXT: s_or_b32 s2, s3, s2 ; VI-NEXT: s_or_b32 s3, s5, s4 ; VI-NEXT: s_lshl_b32 s2, s2, 16 ; VI-NEXT: s_and_b32 s3, s3, 0xffff ; VI-NEXT: s_or_b32 s2, s3, s2 ; VI-NEXT: s_lshl_b32 s3, s19, 8 ; VI-NEXT: s_and_b32 s4, s18, 0xff ; VI-NEXT: s_or_b32 s3, s4, s3 ; VI-NEXT: s_lshl_b32 s4, s17, 8 ; VI-NEXT: s_and_b32 s5, s16, 0xff ; VI-NEXT: s_or_b32 s4, s5, s4 ; VI-NEXT: s_lshl_b32 s3, s3, 16 ; VI-NEXT: s_and_b32 s4, s4, 0xffff ; VI-NEXT: s_or_b32 s3, s4, s3 ; VI-NEXT: s_lshl_b32 s4, s15, 8 ; VI-NEXT: s_and_b32 s5, s14, 0xff ; VI-NEXT: s_or_b32 s4, s5, s4 ; VI-NEXT: s_lshl_b32 s5, s13, 8 ; VI-NEXT: s_and_b32 s6, s12, 0xff ; VI-NEXT: s_or_b32 s5, s6, s5 ; VI-NEXT: s_lshl_b32 s4, s4, 16 ; VI-NEXT: s_and_b32 s5, s5, 0xffff ; VI-NEXT: s_or_b32 s4, s5, s4 ; VI-NEXT: s_lshl_b32 s5, s11, 8 ; VI-NEXT: s_and_b32 s6, s10, 0xff ; VI-NEXT: s_or_b32 s5, s6, s5 ; VI-NEXT: s_lshl_b32 s6, s9, 8 ; VI-NEXT: s_and_b32 s7, s8, 0xff ; VI-NEXT: s_or_b32 s6, s7, s6 ; VI-NEXT: s_lshl_b32 s5, s5, 16 ; VI-NEXT: s_and_b32 s6, s6, 0xffff ; VI-NEXT: s_or_b32 s5, s6, s5 ; VI-NEXT: v_mov_b32_e32 v5, s1 ; VI-NEXT: v_mov_b32_e32 v0, s5 ; VI-NEXT: v_mov_b32_e32 v1, s4 ; VI-NEXT: v_mov_b32_e32 v2, s3 ; VI-NEXT: v_mov_b32_e32 v3, s2 ; VI-NEXT: v_mov_b32_e32 v4, s0 ; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-NEXT: s_endpgm %trunc = trunc <16 x i32> %in to <16 x i8> store <16 x i8> %trunc, ptr addrspace(1) %out ret void } define amdgpu_kernel void @truncstore_arg_v16i64_to_v16i8(ptr addrspace(1) %out, <16 x i64> %in) { ; SI-LABEL: truncstore_arg_v16i64_to_v16i8: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx16 s[16:31], s[4:5], 0x39 ; SI-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 ; SI-NEXT: s_load_dwordx16 s[0:15], s[4:5], 0x29 ; SI-NEXT: s_mov_b32 s39, 0xf000 ; SI-NEXT: s_mov_b32 s38, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_and_b32 s3, s28, 0xff ; SI-NEXT: s_lshl_b32 s1, s30, 24 ; SI-NEXT: s_lshl_b32 s3, s3, 16 ; SI-NEXT: s_or_b32 s1, s1, s3 ; SI-NEXT: s_lshl_b32 s3, s26, 8 ; SI-NEXT: s_and_b32 s5, s24, 0xff ; SI-NEXT: s_or_b32 s3, s5, s3 ; SI-NEXT: s_and_b32 s3, s3, 0xffff ; SI-NEXT: s_and_b32 s5, s20, 0xff ; SI-NEXT: s_or_b32 s1, s3, s1 ; SI-NEXT: s_lshl_b32 s3, s22, 24 ; SI-NEXT: s_lshl_b32 s5, s5, 16 ; SI-NEXT: s_or_b32 s3, s3, s5 ; SI-NEXT: s_lshl_b32 s5, s18, 8 ; SI-NEXT: s_and_b32 s7, s16, 0xff ; SI-NEXT: s_or_b32 s5, s7, s5 ; SI-NEXT: s_and_b32 s5, s5, 0xffff ; SI-NEXT: s_and_b32 s7, s12, 0xff ; SI-NEXT: s_or_b32 s3, s5, s3 ; SI-NEXT: s_lshl_b32 s5, s14, 24 ; SI-NEXT: s_lshl_b32 s7, s7, 16 ; SI-NEXT: s_or_b32 s5, s5, s7 ; SI-NEXT: s_lshl_b32 s7, s10, 8 ; SI-NEXT: s_and_b32 s8, s8, 0xff ; SI-NEXT: s_and_b32 s4, s4, 0xff ; SI-NEXT: s_lshl_b32 s2, s2, 8 ; SI-NEXT: s_and_b32 s0, s0, 0xff ; SI-NEXT: s_or_b32 s7, s8, s7 ; SI-NEXT: s_lshl_b32 s6, s6, 24 ; SI-NEXT: s_lshl_b32 s4, s4, 16 ; SI-NEXT: s_or_b32 s0, s0, s2 ; SI-NEXT: s_and_b32 s7, s7, 0xffff ; SI-NEXT: s_or_b32 s4, s6, s4 ; SI-NEXT: s_and_b32 s0, s0, 0xffff ; SI-NEXT: s_or_b32 s5, s7, s5 ; SI-NEXT: s_or_b32 s0, s0, s4 ; SI-NEXT: v_mov_b32_e32 v0, s0 ; SI-NEXT: v_mov_b32_e32 v1, s5 ; SI-NEXT: v_mov_b32_e32 v2, s3 ; SI-NEXT: v_mov_b32_e32 v3, s1 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[36:39], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: truncstore_arg_v16i64_to_v16i8: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx16 s[16:31], s[4:5], 0xe4 ; VI-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; VI-NEXT: s_load_dwordx16 s[0:15], s[4:5], 0xa4 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_lshl_b32 s1, s30, 8 ; VI-NEXT: s_and_b32 s3, s28, 0xff ; VI-NEXT: s_lshl_b32 s5, s26, 8 ; VI-NEXT: s_or_b32 s1, s3, s1 ; VI-NEXT: s_and_b32 s3, s24, 0xff ; VI-NEXT: s_or_b32 s3, s3, s5 ; VI-NEXT: s_lshl_b32 s1, s1, 16 ; VI-NEXT: s_and_b32 s3, s3, 0xffff ; VI-NEXT: s_or_b32 s1, s3, s1 ; VI-NEXT: s_lshl_b32 s3, s22, 8 ; VI-NEXT: s_and_b32 s5, s20, 0xff ; VI-NEXT: s_or_b32 s3, s5, s3 ; VI-NEXT: s_lshl_b32 s5, s18, 8 ; VI-NEXT: s_and_b32 s7, s16, 0xff ; VI-NEXT: s_or_b32 s5, s7, s5 ; VI-NEXT: s_lshl_b32 s3, s3, 16 ; VI-NEXT: s_and_b32 s5, s5, 0xffff ; VI-NEXT: s_or_b32 s3, s5, s3 ; VI-NEXT: s_lshl_b32 s5, s14, 8 ; VI-NEXT: s_and_b32 s7, s12, 0xff ; VI-NEXT: s_or_b32 s5, s7, s5 ; VI-NEXT: s_lshl_b32 s7, s10, 8 ; VI-NEXT: s_and_b32 s8, s8, 0xff ; VI-NEXT: s_lshl_b32 s6, s6, 8 ; VI-NEXT: s_and_b32 s4, s4, 0xff ; VI-NEXT: s_lshl_b32 s2, s2, 8 ; VI-NEXT: s_and_b32 s0, s0, 0xff ; VI-NEXT: s_or_b32 s7, s8, s7 ; VI-NEXT: s_or_b32 s4, s4, s6 ; VI-NEXT: s_or_b32 s0, s0, s2 ; VI-NEXT: s_lshl_b32 s5, s5, 16 ; VI-NEXT: s_and_b32 s7, s7, 0xffff ; VI-NEXT: s_lshl_b32 s4, s4, 16 ; VI-NEXT: s_and_b32 s0, s0, 0xffff ; VI-NEXT: s_or_b32 s5, s7, s5 ; VI-NEXT: s_or_b32 s0, s0, s4 ; VI-NEXT: v_mov_b32_e32 v4, s34 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s3 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v5, s35 ; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-NEXT: s_endpgm %trunc = trunc <16 x i64> %in to <16 x i8> store <16 x i8> %trunc, ptr addrspace(1) %out ret void } define void @truncstore_v5i32_to_v5i1(ptr addrspace(1) %out, <5 x i32> %val) { ; SI-LABEL: truncstore_v5i32_to_v5i1: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: v_and_b32_e32 v3, 1, v3 ; SI-NEXT: v_and_b32_e32 v2, 1, v2 ; SI-NEXT: v_lshlrev_b32_e32 v3, 1, v3 ; SI-NEXT: v_or_b32_e32 v2, v2, v3 ; SI-NEXT: v_and_b32_e32 v3, 1, v4 ; SI-NEXT: v_lshlrev_b32_e32 v3, 2, v3 ; SI-NEXT: v_or_b32_e32 v2, v2, v3 ; SI-NEXT: v_and_b32_e32 v3, 1, v5 ; SI-NEXT: v_lshlrev_b32_e32 v3, 3, v3 ; SI-NEXT: v_or_b32_e32 v2, v2, v3 ; SI-NEXT: v_lshlrev_b32_e32 v3, 4, v6 ; SI-NEXT: s_mov_b32 s6, 0 ; SI-NEXT: v_or_b32_e32 v2, v2, v3 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 ; SI-NEXT: v_and_b32_e32 v2, 31, v2 ; SI-NEXT: buffer_store_byte v2, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: truncstore_v5i32_to_v5i1: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_and_b32_e32 v3, 1, v3 ; VI-NEXT: v_and_b32_e32 v2, 1, v2 ; VI-NEXT: v_lshlrev_b16_e32 v3, 1, v3 ; VI-NEXT: v_or_b32_e32 v2, v2, v3 ; VI-NEXT: v_and_b32_e32 v3, 1, v4 ; VI-NEXT: v_lshlrev_b16_e32 v3, 2, v3 ; VI-NEXT: v_or_b32_e32 v2, v2, v3 ; VI-NEXT: v_and_b32_e32 v3, 1, v5 ; VI-NEXT: v_lshlrev_b16_e32 v3, 3, v3 ; VI-NEXT: v_or_b32_e32 v2, v2, v3 ; VI-NEXT: v_lshlrev_b16_e32 v3, 4, v6 ; VI-NEXT: v_or_b32_e32 v2, v2, v3 ; VI-NEXT: v_and_b32_e32 v2, 31, v2 ; VI-NEXT: flat_store_byte v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] %trunc = trunc <5 x i32> %val to <5 x i1> store <5 x i1> %trunc, ptr addrspace(1) %out ret void } define void @truncstore_v5i32_to_v5i8(ptr addrspace(1) %out, <5 x i32> %val) { ; SI-LABEL: truncstore_v5i32_to_v5i8: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: v_and_b32_e32 v4, 0xff, v4 ; SI-NEXT: v_lshlrev_b32_e32 v3, 8, v3 ; SI-NEXT: v_and_b32_e32 v2, 0xff, v2 ; SI-NEXT: s_mov_b32 s6, 0 ; SI-NEXT: v_lshlrev_b32_e32 v5, 24, v5 ; SI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; SI-NEXT: v_or_b32_e32 v2, v2, v3 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 ; SI-NEXT: v_or_b32_e32 v4, v5, v4 ; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; SI-NEXT: v_or_b32_e32 v2, v2, v4 ; SI-NEXT: buffer_store_byte v6, v[0:1], s[4:7], 0 addr64 offset:4 ; SI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: truncstore_v5i32_to_v5i8: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_lshlrev_b16_e32 v3, 8, v3 ; VI-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; VI-NEXT: v_lshlrev_b16_e32 v3, 8, v5 ; VI-NEXT: v_or_b32_sdwa v3, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; VI-NEXT: v_or_b32_sdwa v4, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; VI-NEXT: v_add_u32_e32 v2, vcc, 4, v0 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc ; VI-NEXT: flat_store_byte v[2:3], v6 ; VI-NEXT: flat_store_dword v[0:1], v4 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] %trunc = trunc <5 x i32> %val to <5 x i8> store <5 x i8> %trunc, ptr addrspace(1) %out ret void } define void @truncstore_v6i32_to_v6i1(ptr addrspace(1) %out, <6 x i32> %val) { ; SI-LABEL: truncstore_v6i32_to_v6i1: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: v_and_b32_e32 v3, 1, v3 ; SI-NEXT: v_and_b32_e32 v2, 1, v2 ; SI-NEXT: v_lshlrev_b32_e32 v3, 1, v3 ; SI-NEXT: v_or_b32_e32 v2, v2, v3 ; SI-NEXT: v_and_b32_e32 v3, 1, v4 ; SI-NEXT: v_lshlrev_b32_e32 v3, 2, v3 ; SI-NEXT: v_or_b32_e32 v2, v2, v3 ; SI-NEXT: v_and_b32_e32 v3, 1, v5 ; SI-NEXT: v_lshlrev_b32_e32 v3, 3, v3 ; SI-NEXT: v_or_b32_e32 v2, v2, v3 ; SI-NEXT: v_and_b32_e32 v3, 1, v6 ; SI-NEXT: v_lshlrev_b32_e32 v3, 4, v3 ; SI-NEXT: v_or_b32_e32 v2, v2, v3 ; SI-NEXT: v_lshlrev_b32_e32 v3, 5, v7 ; SI-NEXT: s_mov_b32 s6, 0 ; SI-NEXT: v_or_b32_e32 v2, v2, v3 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 ; SI-NEXT: v_and_b32_e32 v2, 63, v2 ; SI-NEXT: buffer_store_byte v2, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: truncstore_v6i32_to_v6i1: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_and_b32_e32 v3, 1, v3 ; VI-NEXT: v_and_b32_e32 v2, 1, v2 ; VI-NEXT: v_lshlrev_b16_e32 v3, 1, v3 ; VI-NEXT: v_or_b32_e32 v2, v2, v3 ; VI-NEXT: v_and_b32_e32 v3, 1, v4 ; VI-NEXT: v_lshlrev_b16_e32 v3, 2, v3 ; VI-NEXT: v_or_b32_e32 v2, v2, v3 ; VI-NEXT: v_and_b32_e32 v3, 1, v5 ; VI-NEXT: v_lshlrev_b16_e32 v3, 3, v3 ; VI-NEXT: v_or_b32_e32 v2, v2, v3 ; VI-NEXT: v_and_b32_e32 v3, 1, v6 ; VI-NEXT: v_lshlrev_b16_e32 v3, 4, v3 ; VI-NEXT: v_or_b32_e32 v2, v2, v3 ; VI-NEXT: v_lshlrev_b16_e32 v3, 5, v7 ; VI-NEXT: v_or_b32_e32 v2, v2, v3 ; VI-NEXT: v_and_b32_e32 v2, 63, v2 ; VI-NEXT: flat_store_byte v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] %trunc = trunc <6 x i32> %val to <6 x i1> store <6 x i1> %trunc, ptr addrspace(1) %out ret void } define void @truncstore_v6i32_to_v6i8(ptr addrspace(1) %out, <6 x i32> %val) { ; SI-LABEL: truncstore_v6i32_to_v6i8: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: v_and_b32_e32 v4, 0xff, v4 ; SI-NEXT: v_lshlrev_b32_e32 v3, 8, v3 ; SI-NEXT: v_and_b32_e32 v2, 0xff, v2 ; SI-NEXT: v_lshlrev_b32_e32 v5, 24, v5 ; SI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; SI-NEXT: v_or_b32_e32 v2, v2, v3 ; SI-NEXT: v_or_b32_e32 v4, v5, v4 ; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; SI-NEXT: s_mov_b32 s6, 0 ; SI-NEXT: v_or_b32_e32 v2, v2, v4 ; SI-NEXT: v_lshlrev_b32_e32 v3, 8, v7 ; SI-NEXT: v_and_b32_e32 v4, 0xff, v6 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 ; SI-NEXT: v_or_b32_e32 v3, v4, v3 ; SI-NEXT: buffer_store_short v3, v[0:1], s[4:7], 0 addr64 offset:4 ; SI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: truncstore_v6i32_to_v6i8: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_lshlrev_b16_e32 v3, 8, v3 ; VI-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; VI-NEXT: v_lshlrev_b16_e32 v3, 8, v5 ; VI-NEXT: v_or_b32_sdwa v3, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; VI-NEXT: v_or_b32_sdwa v4, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v7 ; VI-NEXT: v_or_b32_sdwa v5, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; VI-NEXT: v_add_u32_e32 v2, vcc, 4, v0 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc ; VI-NEXT: flat_store_short v[2:3], v5 ; VI-NEXT: flat_store_dword v[0:1], v4 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] %trunc = trunc <6 x i32> %val to <6 x i8> store <6 x i8> %trunc, ptr addrspace(1) %out ret void } define void @truncstore_v6i32_to_v6i16(ptr addrspace(1) %out, <6 x i32> %val) { ; SI-LABEL: truncstore_v6i32_to_v6i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; SI-NEXT: v_and_b32_e32 v4, 0xffff, v4 ; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; SI-NEXT: s_mov_b32 s6, 0 ; SI-NEXT: v_or_b32_e32 v4, v4, v5 ; SI-NEXT: v_or_b32_e32 v3, v2, v3 ; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v7 ; SI-NEXT: v_and_b32_e32 v5, 0xffff, v6 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 ; SI-NEXT: v_or_b32_e32 v2, v5, v2 ; SI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 offset:8 ; SI-NEXT: buffer_store_dwordx2 v[3:4], v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: truncstore_v6i32_to_v6i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: s_mov_b32 s4, 0x1000504 ; VI-NEXT: v_perm_b32 v6, v6, v7, s4 ; VI-NEXT: v_perm_b32 v5, v4, v5, s4 ; VI-NEXT: v_perm_b32 v4, v2, v3, s4 ; VI-NEXT: flat_store_dwordx3 v[0:1], v[4:6] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] %trunc = trunc <6 x i32> %val to <6 x i16> store <6 x i16> %trunc, ptr addrspace(1) %out ret void } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GCN: {{.*}}