; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=verde < %s | FileCheck -check-prefixes=SI %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX9 %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX10 %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11 %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX12 %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX1250 %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG %s define amdgpu_kernel void @test_clmul_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; SI-LABEL: test_clmul_i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_mov_b32 s10, s2 ; SI-NEXT: s_mov_b32 s11, s3 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b32 s8, s6 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_readfirstlane_b32 s5, v1 ; SI-NEXT: v_readfirstlane_b32 s4, v0 ; SI-NEXT: s_and_b32 s6, s5, 2 ; SI-NEXT: s_and_b32 s7, s5, 1 ; SI-NEXT: s_and_b32 s8, s5, 4 ; SI-NEXT: s_mul_i32 s6, s4, s6 ; SI-NEXT: s_mul_i32 s7, s4, s7 ; SI-NEXT: s_and_b32 s9, s5, 8 ; SI-NEXT: s_mul_i32 s8, s4, s8 ; SI-NEXT: s_xor_b32 s6, s7, s6 ; SI-NEXT: s_and_b32 s10, s5, 16 ; SI-NEXT: s_mul_i32 s9, s4, s9 ; SI-NEXT: s_xor_b32 s6, s6, s8 ; SI-NEXT: s_and_b32 s11, s5, 32 ; SI-NEXT: s_mul_i32 s10, s4, s10 ; SI-NEXT: s_xor_b32 s6, s6, s9 ; SI-NEXT: s_and_b32 s12, s5, 64 ; SI-NEXT: s_mul_i32 s11, s4, s11 ; SI-NEXT: s_xor_b32 s6, s6, s10 ; SI-NEXT: s_and_b32 s13, s5, 0x80 ; SI-NEXT: s_mul_i32 s12, s4, s12 ; SI-NEXT: s_xor_b32 s6, s6, s11 ; SI-NEXT: s_and_b32 s14, s5, 0x100 ; SI-NEXT: s_mul_i32 s13, s4, s13 ; SI-NEXT: s_xor_b32 s6, s6, s12 ; SI-NEXT: s_and_b32 s15, s5, 0x200 ; SI-NEXT: s_mul_i32 s14, s4, s14 ; SI-NEXT: s_xor_b32 s6, s6, s13 ; SI-NEXT: s_and_b32 s16, s5, 0x400 ; SI-NEXT: s_mul_i32 s15, s4, s15 ; SI-NEXT: s_xor_b32 s6, s6, s14 ; SI-NEXT: s_and_b32 s17, s5, 0x800 ; SI-NEXT: s_mul_i32 s16, s4, s16 ; SI-NEXT: s_xor_b32 s6, s6, s15 ; SI-NEXT: s_and_b32 s18, s5, 0x1000 ; SI-NEXT: s_mul_i32 s17, s4, s17 ; SI-NEXT: s_xor_b32 s6, s6, s16 ; SI-NEXT: s_and_b32 s19, s5, 0x2000 ; SI-NEXT: s_mul_i32 s18, s4, s18 ; SI-NEXT: s_xor_b32 s6, s6, s17 ; SI-NEXT: s_and_b32 s20, s5, 0x4000 ; SI-NEXT: s_mul_i32 s19, s4, s19 ; SI-NEXT: s_xor_b32 s6, s6, s18 ; SI-NEXT: s_and_b32 s21, s5, 0x8000 ; SI-NEXT: s_mul_i32 s20, s4, s20 ; SI-NEXT: s_xor_b32 s6, s6, s19 ; SI-NEXT: s_and_b32 s22, s5, 0x10000 ; SI-NEXT: s_mul_i32 s21, s4, s21 ; SI-NEXT: s_xor_b32 s6, s6, s20 ; SI-NEXT: s_and_b32 s23, s5, 0x20000 ; SI-NEXT: s_mul_i32 s22, s4, s22 ; SI-NEXT: s_xor_b32 s6, s6, s21 ; SI-NEXT: s_and_b32 s24, s5, 0x40000 ; SI-NEXT: s_mul_i32 s23, s4, s23 ; SI-NEXT: s_xor_b32 s6, s6, s22 ; SI-NEXT: s_and_b32 s25, s5, 0x80000 ; SI-NEXT: s_mul_i32 s24, s4, s24 ; SI-NEXT: s_xor_b32 s6, s6, s23 ; SI-NEXT: s_and_b32 s26, s5, 0x100000 ; SI-NEXT: s_mul_i32 s25, s4, s25 ; SI-NEXT: s_xor_b32 s6, s6, s24 ; SI-NEXT: s_and_b32 s27, s5, 0x200000 ; SI-NEXT: s_mul_i32 s26, s4, s26 ; SI-NEXT: s_xor_b32 s6, s6, s25 ; SI-NEXT: s_and_b32 s28, s5, 0x400000 ; SI-NEXT: s_mul_i32 s27, s4, s27 ; SI-NEXT: s_xor_b32 s6, s6, s26 ; SI-NEXT: s_and_b32 s29, s5, 0x800000 ; SI-NEXT: s_mul_i32 s28, s4, s28 ; SI-NEXT: s_xor_b32 s6, s6, s27 ; SI-NEXT: s_and_b32 s30, s5, 0x1000000 ; SI-NEXT: s_mul_i32 s29, s4, s29 ; SI-NEXT: s_xor_b32 s6, s6, s28 ; SI-NEXT: s_and_b32 s31, s5, 0x2000000 ; SI-NEXT: s_mul_i32 s30, s4, s30 ; SI-NEXT: s_xor_b32 s6, s6, s29 ; SI-NEXT: s_and_b32 s33, s5, 0x4000000 ; SI-NEXT: s_mul_i32 s31, s4, s31 ; SI-NEXT: s_xor_b32 s6, s6, s30 ; SI-NEXT: s_and_b32 s34, s5, 0x8000000 ; SI-NEXT: s_mul_i32 s33, s4, s33 ; SI-NEXT: s_xor_b32 s6, s6, s31 ; SI-NEXT: s_and_b32 s35, s5, 0x10000000 ; SI-NEXT: s_mul_i32 s34, s4, s34 ; SI-NEXT: s_xor_b32 s6, s6, s33 ; SI-NEXT: s_and_b32 s36, s5, 0x20000000 ; SI-NEXT: s_mul_i32 s35, s4, s35 ; SI-NEXT: s_xor_b32 s6, s6, s34 ; SI-NEXT: s_and_b32 s37, s5, 2.0 ; SI-NEXT: s_mul_i32 s36, s4, s36 ; SI-NEXT: s_xor_b32 s6, s6, s35 ; SI-NEXT: s_and_b32 s5, s5, 0x80000000 ; SI-NEXT: s_mul_i32 s37, s4, s37 ; SI-NEXT: s_xor_b32 s6, s6, s36 ; SI-NEXT: s_xor_b32 s6, s6, s37 ; SI-NEXT: s_mul_i32 s4, s4, s5 ; SI-NEXT: s_xor_b32 s4, s6, s4 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_clmul_i32: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_mov_b32 s10, s2 ; VI-NEXT: s_mov_b32 s11, s3 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_mov_b32 s8, s6 ; VI-NEXT: s_mov_b32 s9, s7 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: s_mov_b32 s0, s4 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_readfirstlane_b32 s5, v1 ; VI-NEXT: v_readfirstlane_b32 s4, v0 ; VI-NEXT: s_and_b32 s6, s5, 2 ; VI-NEXT: s_and_b32 s7, s5, 1 ; VI-NEXT: s_and_b32 s8, s5, 4 ; VI-NEXT: s_mul_i32 s6, s4, s6 ; VI-NEXT: s_mul_i32 s7, s4, s7 ; VI-NEXT: s_and_b32 s9, s5, 8 ; VI-NEXT: s_mul_i32 s8, s4, s8 ; VI-NEXT: s_xor_b32 s6, s7, s6 ; VI-NEXT: s_and_b32 s10, s5, 16 ; VI-NEXT: s_mul_i32 s9, s4, s9 ; VI-NEXT: s_xor_b32 s6, s6, s8 ; VI-NEXT: s_and_b32 s11, s5, 32 ; VI-NEXT: s_mul_i32 s10, s4, s10 ; VI-NEXT: s_xor_b32 s6, s6, s9 ; VI-NEXT: s_and_b32 s12, s5, 64 ; VI-NEXT: s_mul_i32 s11, s4, s11 ; VI-NEXT: s_xor_b32 s6, s6, s10 ; VI-NEXT: s_and_b32 s13, s5, 0x80 ; VI-NEXT: s_mul_i32 s12, s4, s12 ; VI-NEXT: s_xor_b32 s6, s6, s11 ; VI-NEXT: s_and_b32 s14, s5, 0x100 ; VI-NEXT: s_mul_i32 s13, s4, s13 ; VI-NEXT: s_xor_b32 s6, s6, s12 ; VI-NEXT: s_and_b32 s15, s5, 0x200 ; VI-NEXT: s_mul_i32 s14, s4, s14 ; VI-NEXT: s_xor_b32 s6, s6, s13 ; VI-NEXT: s_and_b32 s16, s5, 0x400 ; VI-NEXT: s_mul_i32 s15, s4, s15 ; VI-NEXT: s_xor_b32 s6, s6, s14 ; VI-NEXT: s_and_b32 s17, s5, 0x800 ; VI-NEXT: s_mul_i32 s16, s4, s16 ; VI-NEXT: s_xor_b32 s6, s6, s15 ; VI-NEXT: s_and_b32 s18, s5, 0x1000 ; VI-NEXT: s_mul_i32 s17, s4, s17 ; VI-NEXT: s_xor_b32 s6, s6, s16 ; VI-NEXT: s_and_b32 s19, s5, 0x2000 ; VI-NEXT: s_mul_i32 s18, s4, s18 ; VI-NEXT: s_xor_b32 s6, s6, s17 ; VI-NEXT: s_and_b32 s20, s5, 0x4000 ; VI-NEXT: s_mul_i32 s19, s4, s19 ; VI-NEXT: s_xor_b32 s6, s6, s18 ; VI-NEXT: s_and_b32 s21, s5, 0x8000 ; VI-NEXT: s_mul_i32 s20, s4, s20 ; VI-NEXT: s_xor_b32 s6, s6, s19 ; VI-NEXT: s_and_b32 s22, s5, 0x10000 ; VI-NEXT: s_mul_i32 s21, s4, s21 ; VI-NEXT: s_xor_b32 s6, s6, s20 ; VI-NEXT: s_and_b32 s23, s5, 0x20000 ; VI-NEXT: s_mul_i32 s22, s4, s22 ; VI-NEXT: s_xor_b32 s6, s6, s21 ; VI-NEXT: s_and_b32 s24, s5, 0x40000 ; VI-NEXT: s_mul_i32 s23, s4, s23 ; VI-NEXT: s_xor_b32 s6, s6, s22 ; VI-NEXT: s_and_b32 s25, s5, 0x80000 ; VI-NEXT: s_mul_i32 s24, s4, s24 ; VI-NEXT: s_xor_b32 s6, s6, s23 ; VI-NEXT: s_and_b32 s26, s5, 0x100000 ; VI-NEXT: s_mul_i32 s25, s4, s25 ; VI-NEXT: s_xor_b32 s6, s6, s24 ; VI-NEXT: s_and_b32 s27, s5, 0x200000 ; VI-NEXT: s_mul_i32 s26, s4, s26 ; VI-NEXT: s_xor_b32 s6, s6, s25 ; VI-NEXT: s_and_b32 s28, s5, 0x400000 ; VI-NEXT: s_mul_i32 s27, s4, s27 ; VI-NEXT: s_xor_b32 s6, s6, s26 ; VI-NEXT: s_and_b32 s29, s5, 0x800000 ; VI-NEXT: s_mul_i32 s28, s4, s28 ; VI-NEXT: s_xor_b32 s6, s6, s27 ; VI-NEXT: s_and_b32 s30, s5, 0x1000000 ; VI-NEXT: s_mul_i32 s29, s4, s29 ; VI-NEXT: s_xor_b32 s6, s6, s28 ; VI-NEXT: s_and_b32 s31, s5, 0x2000000 ; VI-NEXT: s_mul_i32 s30, s4, s30 ; VI-NEXT: s_xor_b32 s6, s6, s29 ; VI-NEXT: s_and_b32 s33, s5, 0x4000000 ; VI-NEXT: s_mul_i32 s31, s4, s31 ; VI-NEXT: s_xor_b32 s6, s6, s30 ; VI-NEXT: s_and_b32 s34, s5, 0x8000000 ; VI-NEXT: s_mul_i32 s33, s4, s33 ; VI-NEXT: s_xor_b32 s6, s6, s31 ; VI-NEXT: s_and_b32 s35, s5, 0x10000000 ; VI-NEXT: s_mul_i32 s34, s4, s34 ; VI-NEXT: s_xor_b32 s6, s6, s33 ; VI-NEXT: s_and_b32 s36, s5, 0x20000000 ; VI-NEXT: s_mul_i32 s35, s4, s35 ; VI-NEXT: s_xor_b32 s6, s6, s34 ; VI-NEXT: s_and_b32 s37, s5, 2.0 ; VI-NEXT: s_mul_i32 s36, s4, s36 ; VI-NEXT: s_xor_b32 s6, s6, s35 ; VI-NEXT: s_and_b32 s5, s5, 0x80000000 ; VI-NEXT: s_mul_i32 s37, s4, s37 ; VI-NEXT: s_xor_b32 s6, s6, s36 ; VI-NEXT: s_xor_b32 s6, s6, s37 ; VI-NEXT: s_mul_i32 s4, s4, s5 ; VI-NEXT: s_xor_b32 s4, s6, s4 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: test_clmul_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_mov_b32 s6, s2 ; GFX9-NEXT: s_mov_b32 s7, s3 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s4, s10 ; GFX9-NEXT: s_mov_b32 s5, s11 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 ; GFX9-NEXT: s_mov_b32 s0, s8 ; GFX9-NEXT: s_mov_b32 s1, s9 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s5, v1 ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_and_b32 s6, s5, 2 ; GFX9-NEXT: s_and_b32 s7, s5, 1 ; GFX9-NEXT: s_and_b32 s8, s5, 4 ; GFX9-NEXT: s_mul_i32 s6, s4, s6 ; GFX9-NEXT: s_mul_i32 s7, s4, s7 ; GFX9-NEXT: s_and_b32 s9, s5, 8 ; GFX9-NEXT: s_mul_i32 s8, s4, s8 ; GFX9-NEXT: s_xor_b32 s6, s7, s6 ; GFX9-NEXT: s_and_b32 s10, s5, 16 ; GFX9-NEXT: s_mul_i32 s9, s4, s9 ; GFX9-NEXT: s_xor_b32 s6, s6, s8 ; GFX9-NEXT: s_and_b32 s11, s5, 32 ; GFX9-NEXT: s_mul_i32 s10, s4, s10 ; GFX9-NEXT: s_xor_b32 s6, s6, s9 ; GFX9-NEXT: s_and_b32 s12, s5, 64 ; GFX9-NEXT: s_mul_i32 s11, s4, s11 ; GFX9-NEXT: s_xor_b32 s6, s6, s10 ; GFX9-NEXT: s_and_b32 s13, s5, 0x80 ; GFX9-NEXT: s_mul_i32 s12, s4, s12 ; GFX9-NEXT: s_xor_b32 s6, s6, s11 ; GFX9-NEXT: s_and_b32 s14, s5, 0x100 ; GFX9-NEXT: s_mul_i32 s13, s4, s13 ; GFX9-NEXT: s_xor_b32 s6, s6, s12 ; GFX9-NEXT: s_and_b32 s15, s5, 0x200 ; GFX9-NEXT: s_mul_i32 s14, s4, s14 ; GFX9-NEXT: s_xor_b32 s6, s6, s13 ; GFX9-NEXT: s_and_b32 s16, s5, 0x400 ; GFX9-NEXT: s_mul_i32 s15, s4, s15 ; GFX9-NEXT: s_xor_b32 s6, s6, s14 ; GFX9-NEXT: s_and_b32 s17, s5, 0x800 ; GFX9-NEXT: s_mul_i32 s16, s4, s16 ; GFX9-NEXT: s_xor_b32 s6, s6, s15 ; GFX9-NEXT: s_and_b32 s18, s5, 0x1000 ; GFX9-NEXT: s_mul_i32 s17, s4, s17 ; GFX9-NEXT: s_xor_b32 s6, s6, s16 ; GFX9-NEXT: s_and_b32 s19, s5, 0x2000 ; GFX9-NEXT: s_mul_i32 s18, s4, s18 ; GFX9-NEXT: s_xor_b32 s6, s6, s17 ; GFX9-NEXT: s_and_b32 s20, s5, 0x4000 ; GFX9-NEXT: s_mul_i32 s19, s4, s19 ; GFX9-NEXT: s_xor_b32 s6, s6, s18 ; GFX9-NEXT: s_and_b32 s21, s5, 0x8000 ; GFX9-NEXT: s_mul_i32 s20, s4, s20 ; GFX9-NEXT: s_xor_b32 s6, s6, s19 ; GFX9-NEXT: s_and_b32 s22, s5, 0x10000 ; GFX9-NEXT: s_mul_i32 s21, s4, s21 ; GFX9-NEXT: s_xor_b32 s6, s6, s20 ; GFX9-NEXT: s_and_b32 s23, s5, 0x20000 ; GFX9-NEXT: s_mul_i32 s22, s4, s22 ; GFX9-NEXT: s_xor_b32 s6, s6, s21 ; GFX9-NEXT: s_and_b32 s24, s5, 0x40000 ; GFX9-NEXT: s_mul_i32 s23, s4, s23 ; GFX9-NEXT: s_xor_b32 s6, s6, s22 ; GFX9-NEXT: s_and_b32 s25, s5, 0x80000 ; GFX9-NEXT: s_mul_i32 s24, s4, s24 ; GFX9-NEXT: s_xor_b32 s6, s6, s23 ; GFX9-NEXT: s_and_b32 s26, s5, 0x100000 ; GFX9-NEXT: s_mul_i32 s25, s4, s25 ; GFX9-NEXT: s_xor_b32 s6, s6, s24 ; GFX9-NEXT: s_and_b32 s27, s5, 0x200000 ; GFX9-NEXT: s_mul_i32 s26, s4, s26 ; GFX9-NEXT: s_xor_b32 s6, s6, s25 ; GFX9-NEXT: s_and_b32 s28, s5, 0x400000 ; GFX9-NEXT: s_mul_i32 s27, s4, s27 ; GFX9-NEXT: s_xor_b32 s6, s6, s26 ; GFX9-NEXT: s_and_b32 s29, s5, 0x800000 ; GFX9-NEXT: s_mul_i32 s28, s4, s28 ; GFX9-NEXT: s_xor_b32 s6, s6, s27 ; GFX9-NEXT: s_and_b32 s30, s5, 0x1000000 ; GFX9-NEXT: s_mul_i32 s29, s4, s29 ; GFX9-NEXT: s_xor_b32 s6, s6, s28 ; GFX9-NEXT: s_and_b32 s31, s5, 0x2000000 ; GFX9-NEXT: s_mul_i32 s30, s4, s30 ; GFX9-NEXT: s_xor_b32 s6, s6, s29 ; GFX9-NEXT: s_and_b32 s33, s5, 0x4000000 ; GFX9-NEXT: s_mul_i32 s31, s4, s31 ; GFX9-NEXT: s_xor_b32 s6, s6, s30 ; GFX9-NEXT: s_and_b32 s34, s5, 0x8000000 ; GFX9-NEXT: s_mul_i32 s33, s4, s33 ; GFX9-NEXT: s_xor_b32 s6, s6, s31 ; GFX9-NEXT: s_and_b32 s35, s5, 0x10000000 ; GFX9-NEXT: s_mul_i32 s34, s4, s34 ; GFX9-NEXT: s_xor_b32 s6, s6, s33 ; GFX9-NEXT: s_and_b32 s36, s5, 0x20000000 ; GFX9-NEXT: s_mul_i32 s35, s4, s35 ; GFX9-NEXT: s_xor_b32 s6, s6, s34 ; GFX9-NEXT: s_and_b32 s37, s5, 2.0 ; GFX9-NEXT: s_mul_i32 s36, s4, s36 ; GFX9-NEXT: s_xor_b32 s6, s6, s35 ; GFX9-NEXT: s_and_b32 s5, s5, 0x80000000 ; GFX9-NEXT: s_mul_i32 s37, s4, s37 ; GFX9-NEXT: s_xor_b32 s6, s6, s36 ; GFX9-NEXT: s_xor_b32 s6, s6, s37 ; GFX9-NEXT: s_mul_i32 s4, s4, s5 ; GFX9-NEXT: s_xor_b32 s4, s6, s4 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: test_clmul_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX10-NEXT: s_mov_b32 s6, -1 ; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; GFX10-NEXT: s_mov_b32 s10, s6 ; GFX10-NEXT: s_mov_b32 s11, s7 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_mov_b32 s8, s2 ; GFX10-NEXT: s_mov_b32 s9, s3 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_readfirstlane_b32 s2, v1 ; GFX10-NEXT: v_readfirstlane_b32 s3, v0 ; GFX10-NEXT: s_and_b32 s4, s2, 2 ; GFX10-NEXT: s_and_b32 s5, s2, 1 ; GFX10-NEXT: s_and_b32 s8, s2, 4 ; GFX10-NEXT: s_mul_i32 s4, s3, s4 ; GFX10-NEXT: s_mul_i32 s5, s3, s5 ; GFX10-NEXT: s_and_b32 s9, s2, 8 ; GFX10-NEXT: s_mul_i32 s8, s3, s8 ; GFX10-NEXT: s_xor_b32 s4, s5, s4 ; GFX10-NEXT: s_and_b32 s10, s2, 16 ; GFX10-NEXT: s_mul_i32 s5, s3, s9 ; GFX10-NEXT: s_xor_b32 s4, s4, s8 ; GFX10-NEXT: s_and_b32 s11, s2, 32 ; GFX10-NEXT: s_mul_i32 s8, s3, s10 ; GFX10-NEXT: s_xor_b32 s4, s4, s5 ; GFX10-NEXT: s_and_b32 s12, s2, 64 ; GFX10-NEXT: s_mul_i32 s5, s3, s11 ; GFX10-NEXT: s_xor_b32 s4, s4, s8 ; GFX10-NEXT: s_and_b32 s13, s2, 0x80 ; GFX10-NEXT: s_mul_i32 s8, s3, s12 ; GFX10-NEXT: s_xor_b32 s4, s4, s5 ; GFX10-NEXT: s_and_b32 s14, s2, 0x100 ; GFX10-NEXT: s_mul_i32 s5, s3, s13 ; GFX10-NEXT: s_xor_b32 s4, s4, s8 ; GFX10-NEXT: s_and_b32 s15, s2, 0x200 ; GFX10-NEXT: s_mul_i32 s8, s3, s14 ; GFX10-NEXT: s_xor_b32 s4, s4, s5 ; GFX10-NEXT: s_and_b32 s16, s2, 0x400 ; GFX10-NEXT: s_mul_i32 s5, s3, s15 ; GFX10-NEXT: s_xor_b32 s4, s4, s8 ; GFX10-NEXT: s_and_b32 s17, s2, 0x800 ; GFX10-NEXT: s_mul_i32 s8, s3, s16 ; GFX10-NEXT: s_xor_b32 s4, s4, s5 ; GFX10-NEXT: s_and_b32 s18, s2, 0x1000 ; GFX10-NEXT: s_mul_i32 s5, s3, s17 ; GFX10-NEXT: s_xor_b32 s4, s4, s8 ; GFX10-NEXT: s_and_b32 s19, s2, 0x2000 ; GFX10-NEXT: s_mul_i32 s8, s3, s18 ; GFX10-NEXT: s_xor_b32 s4, s4, s5 ; GFX10-NEXT: s_and_b32 s20, s2, 0x4000 ; GFX10-NEXT: s_mul_i32 s5, s3, s19 ; GFX10-NEXT: s_xor_b32 s4, s4, s8 ; GFX10-NEXT: s_and_b32 s21, s2, 0x8000 ; GFX10-NEXT: s_mul_i32 s8, s3, s20 ; GFX10-NEXT: s_xor_b32 s4, s4, s5 ; GFX10-NEXT: s_and_b32 s22, s2, 0x10000 ; GFX10-NEXT: s_mul_i32 s5, s3, s21 ; GFX10-NEXT: s_xor_b32 s4, s4, s8 ; GFX10-NEXT: s_and_b32 s23, s2, 0x20000 ; GFX10-NEXT: s_mul_i32 s8, s3, s22 ; GFX10-NEXT: s_xor_b32 s4, s4, s5 ; GFX10-NEXT: s_and_b32 s24, s2, 0x40000 ; GFX10-NEXT: s_mul_i32 s5, s3, s23 ; GFX10-NEXT: s_xor_b32 s4, s4, s8 ; GFX10-NEXT: s_and_b32 s25, s2, 0x80000 ; GFX10-NEXT: s_mul_i32 s8, s3, s24 ; GFX10-NEXT: s_xor_b32 s4, s4, s5 ; GFX10-NEXT: s_and_b32 s26, s2, 0x100000 ; GFX10-NEXT: s_mul_i32 s5, s3, s25 ; GFX10-NEXT: s_xor_b32 s4, s4, s8 ; GFX10-NEXT: s_and_b32 s27, s2, 0x200000 ; GFX10-NEXT: s_mul_i32 s8, s3, s26 ; GFX10-NEXT: s_xor_b32 s4, s4, s5 ; GFX10-NEXT: s_and_b32 s28, s2, 0x400000 ; GFX10-NEXT: s_mul_i32 s5, s3, s27 ; GFX10-NEXT: s_xor_b32 s4, s4, s8 ; GFX10-NEXT: s_and_b32 s29, s2, 0x800000 ; GFX10-NEXT: s_mul_i32 s8, s3, s28 ; GFX10-NEXT: s_xor_b32 s4, s4, s5 ; GFX10-NEXT: s_and_b32 s30, s2, 0x1000000 ; GFX10-NEXT: s_mul_i32 s5, s3, s29 ; GFX10-NEXT: s_xor_b32 s4, s4, s8 ; GFX10-NEXT: s_and_b32 s31, s2, 0x2000000 ; GFX10-NEXT: s_mul_i32 s8, s3, s30 ; GFX10-NEXT: s_xor_b32 s4, s4, s5 ; GFX10-NEXT: s_and_b32 s33, s2, 0x4000000 ; GFX10-NEXT: s_mul_i32 s5, s3, s31 ; GFX10-NEXT: s_xor_b32 s4, s4, s8 ; GFX10-NEXT: s_and_b32 s34, s2, 0x8000000 ; GFX10-NEXT: s_mul_i32 s8, s3, s33 ; GFX10-NEXT: s_xor_b32 s4, s4, s5 ; GFX10-NEXT: s_and_b32 s35, s2, 0x10000000 ; GFX10-NEXT: s_mul_i32 s5, s3, s34 ; GFX10-NEXT: s_xor_b32 s4, s4, s8 ; GFX10-NEXT: s_and_b32 s36, s2, 0x20000000 ; GFX10-NEXT: s_mul_i32 s8, s3, s35 ; GFX10-NEXT: s_xor_b32 s4, s4, s5 ; GFX10-NEXT: s_and_b32 s37, s2, 2.0 ; GFX10-NEXT: s_mul_i32 s5, s3, s36 ; GFX10-NEXT: s_xor_b32 s4, s4, s8 ; GFX10-NEXT: s_mul_i32 s8, s3, s37 ; GFX10-NEXT: s_xor_b32 s4, s4, s5 ; GFX10-NEXT: s_and_b32 s2, s2, 0x80000000 ; GFX10-NEXT: s_xor_b32 s4, s4, s8 ; GFX10-NEXT: s_mul_i32 s3, s3, s2 ; GFX10-NEXT: s_mov_b32 s5, s1 ; GFX10-NEXT: s_xor_b32 s2, s4, s3 ; GFX10-NEXT: s_mov_b32 s4, s0 ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: test_clmul_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-NEXT: s_mov_b32 s6, -1 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s10, s6 ; GFX11-NEXT: s_mov_b32 s11, s7 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s8, s2 ; GFX11-NEXT: s_mov_b32 s9, s3 ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_readfirstlane_b32 s2, v1 ; GFX11-NEXT: v_readfirstlane_b32 s3, v0 ; GFX11-NEXT: s_and_b32 s4, s2, 2 ; GFX11-NEXT: s_and_b32 s5, s2, 1 ; GFX11-NEXT: s_and_b32 s8, s2, 4 ; GFX11-NEXT: s_mul_i32 s4, s3, s4 ; GFX11-NEXT: s_mul_i32 s5, s3, s5 ; GFX11-NEXT: s_and_b32 s9, s2, 8 ; GFX11-NEXT: s_mul_i32 s8, s3, s8 ; GFX11-NEXT: s_xor_b32 s4, s5, s4 ; GFX11-NEXT: s_and_b32 s10, s2, 16 ; GFX11-NEXT: s_mul_i32 s5, s3, s9 ; GFX11-NEXT: s_xor_b32 s4, s4, s8 ; GFX11-NEXT: s_and_b32 s11, s2, 32 ; GFX11-NEXT: s_mul_i32 s8, s3, s10 ; GFX11-NEXT: s_xor_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s12, s2, 64 ; GFX11-NEXT: s_mul_i32 s5, s3, s11 ; GFX11-NEXT: s_xor_b32 s4, s4, s8 ; GFX11-NEXT: s_and_b32 s13, s2, 0x80 ; GFX11-NEXT: s_mul_i32 s8, s3, s12 ; GFX11-NEXT: s_xor_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s14, s2, 0x100 ; GFX11-NEXT: s_mul_i32 s5, s3, s13 ; GFX11-NEXT: s_xor_b32 s4, s4, s8 ; GFX11-NEXT: s_and_b32 s15, s2, 0x200 ; GFX11-NEXT: s_mul_i32 s8, s3, s14 ; GFX11-NEXT: s_xor_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s16, s2, 0x400 ; GFX11-NEXT: s_mul_i32 s5, s3, s15 ; GFX11-NEXT: s_xor_b32 s4, s4, s8 ; GFX11-NEXT: s_and_b32 s17, s2, 0x800 ; GFX11-NEXT: s_mul_i32 s8, s3, s16 ; GFX11-NEXT: s_xor_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s18, s2, 0x1000 ; GFX11-NEXT: s_mul_i32 s5, s3, s17 ; GFX11-NEXT: s_xor_b32 s4, s4, s8 ; GFX11-NEXT: s_and_b32 s19, s2, 0x2000 ; GFX11-NEXT: s_mul_i32 s8, s3, s18 ; GFX11-NEXT: s_xor_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s20, s2, 0x4000 ; GFX11-NEXT: s_mul_i32 s5, s3, s19 ; GFX11-NEXT: s_xor_b32 s4, s4, s8 ; GFX11-NEXT: s_and_b32 s21, s2, 0x8000 ; GFX11-NEXT: s_mul_i32 s8, s3, s20 ; GFX11-NEXT: s_xor_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s22, s2, 0x10000 ; GFX11-NEXT: s_mul_i32 s5, s3, s21 ; GFX11-NEXT: s_xor_b32 s4, s4, s8 ; GFX11-NEXT: s_and_b32 s23, s2, 0x20000 ; GFX11-NEXT: s_mul_i32 s8, s3, s22 ; GFX11-NEXT: s_xor_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s24, s2, 0x40000 ; GFX11-NEXT: s_mul_i32 s5, s3, s23 ; GFX11-NEXT: s_xor_b32 s4, s4, s8 ; GFX11-NEXT: s_and_b32 s25, s2, 0x80000 ; GFX11-NEXT: s_mul_i32 s8, s3, s24 ; GFX11-NEXT: s_xor_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s26, s2, 0x100000 ; GFX11-NEXT: s_mul_i32 s5, s3, s25 ; GFX11-NEXT: s_xor_b32 s4, s4, s8 ; GFX11-NEXT: s_and_b32 s27, s2, 0x200000 ; GFX11-NEXT: s_mul_i32 s8, s3, s26 ; GFX11-NEXT: s_xor_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s28, s2, 0x400000 ; GFX11-NEXT: s_mul_i32 s5, s3, s27 ; GFX11-NEXT: s_xor_b32 s4, s4, s8 ; GFX11-NEXT: s_and_b32 s29, s2, 0x800000 ; GFX11-NEXT: s_mul_i32 s8, s3, s28 ; GFX11-NEXT: s_xor_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s30, s2, 0x1000000 ; GFX11-NEXT: s_mul_i32 s5, s3, s29 ; GFX11-NEXT: s_xor_b32 s4, s4, s8 ; GFX11-NEXT: s_and_b32 s31, s2, 0x2000000 ; GFX11-NEXT: s_mul_i32 s8, s3, s30 ; GFX11-NEXT: s_xor_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s33, s2, 0x4000000 ; GFX11-NEXT: s_mul_i32 s5, s3, s31 ; GFX11-NEXT: s_xor_b32 s4, s4, s8 ; GFX11-NEXT: s_and_b32 s34, s2, 0x8000000 ; GFX11-NEXT: s_mul_i32 s8, s3, s33 ; GFX11-NEXT: s_xor_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s35, s2, 0x10000000 ; GFX11-NEXT: s_mul_i32 s5, s3, s34 ; GFX11-NEXT: s_xor_b32 s4, s4, s8 ; GFX11-NEXT: s_and_b32 s36, s2, 0x20000000 ; GFX11-NEXT: s_mul_i32 s8, s3, s35 ; GFX11-NEXT: s_xor_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s37, s2, 2.0 ; GFX11-NEXT: s_mul_i32 s5, s3, s36 ; GFX11-NEXT: s_xor_b32 s4, s4, s8 ; GFX11-NEXT: s_mul_i32 s8, s3, s37 ; GFX11-NEXT: s_xor_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s2, s2, 0x80000000 ; GFX11-NEXT: s_xor_b32 s4, s4, s8 ; GFX11-NEXT: s_mul_i32 s3, s3, s2 ; GFX11-NEXT: s_mov_b32 s5, s1 ; GFX11-NEXT: s_xor_b32 s2, s4, s3 ; GFX11-NEXT: s_mov_b32 s4, s0 ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0 ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: test_clmul_i32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX12-NEXT: s_mov_b32 s6, -1 ; GFX12-NEXT: s_mov_b32 s7, 0x31016000 ; GFX12-NEXT: s_mov_b32 s10, s6 ; GFX12-NEXT: s_mov_b32 s11, s7 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_mov_b32 s8, s2 ; GFX12-NEXT: s_mov_b32 s9, s3 ; GFX12-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_readfirstlane_b32 s2, v1 ; GFX12-NEXT: v_readfirstlane_b32 s3, v0 ; GFX12-NEXT: s_and_b32 s4, s2, 2 ; GFX12-NEXT: s_and_b32 s5, s2, 1 ; GFX12-NEXT: s_and_b32 s8, s2, 4 ; GFX12-NEXT: s_mul_i32 s4, s3, s4 ; GFX12-NEXT: s_mul_i32 s5, s3, s5 ; GFX12-NEXT: s_and_b32 s9, s2, 8 ; GFX12-NEXT: s_mul_i32 s8, s3, s8 ; GFX12-NEXT: s_xor_b32 s4, s5, s4 ; GFX12-NEXT: s_and_b32 s10, s2, 16 ; GFX12-NEXT: s_mul_i32 s5, s3, s9 ; GFX12-NEXT: s_xor_b32 s4, s4, s8 ; GFX12-NEXT: s_and_b32 s11, s2, 32 ; GFX12-NEXT: s_mul_i32 s8, s3, s10 ; GFX12-NEXT: s_xor_b32 s4, s4, s5 ; GFX12-NEXT: s_and_b32 s12, s2, 64 ; GFX12-NEXT: s_mul_i32 s5, s3, s11 ; GFX12-NEXT: s_xor_b32 s4, s4, s8 ; GFX12-NEXT: s_and_b32 s13, s2, 0x80 ; GFX12-NEXT: s_mul_i32 s8, s3, s12 ; GFX12-NEXT: s_xor_b32 s4, s4, s5 ; GFX12-NEXT: s_and_b32 s14, s2, 0x100 ; GFX12-NEXT: s_mul_i32 s5, s3, s13 ; GFX12-NEXT: s_xor_b32 s4, s4, s8 ; GFX12-NEXT: s_and_b32 s15, s2, 0x200 ; GFX12-NEXT: s_mul_i32 s8, s3, s14 ; GFX12-NEXT: s_xor_b32 s4, s4, s5 ; GFX12-NEXT: s_and_b32 s16, s2, 0x400 ; GFX12-NEXT: s_mul_i32 s5, s3, s15 ; GFX12-NEXT: s_xor_b32 s4, s4, s8 ; GFX12-NEXT: s_and_b32 s17, s2, 0x800 ; GFX12-NEXT: s_mul_i32 s8, s3, s16 ; GFX12-NEXT: s_xor_b32 s4, s4, s5 ; GFX12-NEXT: s_and_b32 s18, s2, 0x1000 ; GFX12-NEXT: s_mul_i32 s5, s3, s17 ; GFX12-NEXT: s_xor_b32 s4, s4, s8 ; GFX12-NEXT: s_and_b32 s19, s2, 0x2000 ; GFX12-NEXT: s_mul_i32 s8, s3, s18 ; GFX12-NEXT: s_xor_b32 s4, s4, s5 ; GFX12-NEXT: s_and_b32 s20, s2, 0x4000 ; GFX12-NEXT: s_mul_i32 s5, s3, s19 ; GFX12-NEXT: s_xor_b32 s4, s4, s8 ; GFX12-NEXT: s_and_b32 s21, s2, 0x8000 ; GFX12-NEXT: s_mul_i32 s8, s3, s20 ; GFX12-NEXT: s_xor_b32 s4, s4, s5 ; GFX12-NEXT: s_and_b32 s22, s2, 0x10000 ; GFX12-NEXT: s_mul_i32 s5, s3, s21 ; GFX12-NEXT: s_xor_b32 s4, s4, s8 ; GFX12-NEXT: s_and_b32 s23, s2, 0x20000 ; GFX12-NEXT: s_mul_i32 s8, s3, s22 ; GFX12-NEXT: s_xor_b32 s4, s4, s5 ; GFX12-NEXT: s_and_b32 s24, s2, 0x40000 ; GFX12-NEXT: s_mul_i32 s5, s3, s23 ; GFX12-NEXT: s_xor_b32 s4, s4, s8 ; GFX12-NEXT: s_and_b32 s25, s2, 0x80000 ; GFX12-NEXT: s_mul_i32 s8, s3, s24 ; GFX12-NEXT: s_xor_b32 s4, s4, s5 ; GFX12-NEXT: s_and_b32 s26, s2, 0x100000 ; GFX12-NEXT: s_mul_i32 s5, s3, s25 ; GFX12-NEXT: s_xor_b32 s4, s4, s8 ; GFX12-NEXT: s_and_b32 s27, s2, 0x200000 ; GFX12-NEXT: s_mul_i32 s8, s3, s26 ; GFX12-NEXT: s_xor_b32 s4, s4, s5 ; GFX12-NEXT: s_and_b32 s28, s2, 0x400000 ; GFX12-NEXT: s_mul_i32 s5, s3, s27 ; GFX12-NEXT: s_xor_b32 s4, s4, s8 ; GFX12-NEXT: s_and_b32 s29, s2, 0x800000 ; GFX12-NEXT: s_mul_i32 s8, s3, s28 ; GFX12-NEXT: s_xor_b32 s4, s4, s5 ; GFX12-NEXT: s_and_b32 s30, s2, 0x1000000 ; GFX12-NEXT: s_mul_i32 s5, s3, s29 ; GFX12-NEXT: s_xor_b32 s4, s4, s8 ; GFX12-NEXT: s_and_b32 s31, s2, 0x2000000 ; GFX12-NEXT: s_mul_i32 s8, s3, s30 ; GFX12-NEXT: s_xor_b32 s4, s4, s5 ; GFX12-NEXT: s_and_b32 s33, s2, 0x4000000 ; GFX12-NEXT: s_mul_i32 s5, s3, s31 ; GFX12-NEXT: s_xor_b32 s4, s4, s8 ; GFX12-NEXT: s_and_b32 s34, s2, 0x8000000 ; GFX12-NEXT: s_mul_i32 s8, s3, s33 ; GFX12-NEXT: s_xor_b32 s4, s4, s5 ; GFX12-NEXT: s_and_b32 s35, s2, 0x10000000 ; GFX12-NEXT: s_mul_i32 s5, s3, s34 ; GFX12-NEXT: s_xor_b32 s4, s4, s8 ; GFX12-NEXT: s_and_b32 s36, s2, 0x20000000 ; GFX12-NEXT: s_mul_i32 s8, s3, s35 ; GFX12-NEXT: s_xor_b32 s4, s4, s5 ; GFX12-NEXT: s_and_b32 s37, s2, 2.0 ; GFX12-NEXT: s_mul_i32 s5, s3, s36 ; GFX12-NEXT: s_xor_b32 s4, s4, s8 ; GFX12-NEXT: s_mul_i32 s8, s3, s37 ; GFX12-NEXT: s_xor_b32 s4, s4, s5 ; GFX12-NEXT: s_and_b32 s2, s2, 0x80000000 ; GFX12-NEXT: s_xor_b32 s4, s4, s8 ; GFX12-NEXT: s_mul_i32 s3, s3, s2 ; GFX12-NEXT: s_mov_b32 s5, s1 ; GFX12-NEXT: s_xor_b32 s2, s4, s3 ; GFX12-NEXT: s_mov_b32 s4, s0 ; GFX12-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-NEXT: buffer_store_b32 v0, off, s[4:7], null ; GFX12-NEXT: s_endpgm ; ; GFX1250-LABEL: test_clmul_i32: ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv ; GFX1250-NEXT: s_mov_b32 s6, -1 ; GFX1250-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1250-NEXT: s_mov_b32 s10, s6 ; GFX1250-NEXT: s_mov_b32 s11, s7 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_mov_b32 s8, s2 ; GFX1250-NEXT: s_mov_b32 s9, s3 ; GFX1250-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: v_readfirstlane_b32 s2, v1 ; GFX1250-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1250-NEXT: s_and_b32 s4, s2, 2 ; GFX1250-NEXT: s_and_b32 s5, s2, 1 ; GFX1250-NEXT: s_and_b32 s8, s2, 4 ; GFX1250-NEXT: s_mul_i32 s4, s3, s4 ; GFX1250-NEXT: s_mul_i32 s5, s3, s5 ; GFX1250-NEXT: s_and_b32 s9, s2, 8 ; GFX1250-NEXT: s_mul_i32 s8, s3, s8 ; GFX1250-NEXT: s_xor_b32 s4, s5, s4 ; GFX1250-NEXT: s_and_b32 s10, s2, 16 ; GFX1250-NEXT: s_mul_i32 s5, s3, s9 ; GFX1250-NEXT: s_xor_b32 s4, s4, s8 ; GFX1250-NEXT: s_and_b32 s11, s2, 32 ; GFX1250-NEXT: s_mul_i32 s8, s3, s10 ; GFX1250-NEXT: s_xor_b32 s4, s4, s5 ; GFX1250-NEXT: s_and_b32 s12, s2, 64 ; GFX1250-NEXT: s_mul_i32 s5, s3, s11 ; GFX1250-NEXT: s_xor_b32 s4, s4, s8 ; GFX1250-NEXT: s_and_b32 s13, s2, 0x80 ; GFX1250-NEXT: s_mul_i32 s8, s3, s12 ; GFX1250-NEXT: s_xor_b32 s4, s4, s5 ; GFX1250-NEXT: s_and_b32 s14, s2, 0x100 ; GFX1250-NEXT: s_mul_i32 s5, s3, s13 ; GFX1250-NEXT: s_xor_b32 s4, s4, s8 ; GFX1250-NEXT: s_and_b32 s15, s2, 0x200 ; GFX1250-NEXT: s_mul_i32 s8, s3, s14 ; GFX1250-NEXT: s_xor_b32 s4, s4, s5 ; GFX1250-NEXT: s_and_b32 s16, s2, 0x400 ; GFX1250-NEXT: s_mul_i32 s5, s3, s15 ; GFX1250-NEXT: s_xor_b32 s4, s4, s8 ; GFX1250-NEXT: s_and_b32 s17, s2, 0x800 ; GFX1250-NEXT: s_mul_i32 s8, s3, s16 ; GFX1250-NEXT: s_xor_b32 s4, s4, s5 ; GFX1250-NEXT: s_and_b32 s18, s2, 0x1000 ; GFX1250-NEXT: s_mul_i32 s5, s3, s17 ; GFX1250-NEXT: s_xor_b32 s4, s4, s8 ; GFX1250-NEXT: s_and_b32 s19, s2, 0x2000 ; GFX1250-NEXT: s_mul_i32 s8, s3, s18 ; GFX1250-NEXT: s_xor_b32 s4, s4, s5 ; GFX1250-NEXT: s_and_b32 s20, s2, 0x4000 ; GFX1250-NEXT: s_mul_i32 s5, s3, s19 ; GFX1250-NEXT: s_xor_b32 s4, s4, s8 ; GFX1250-NEXT: s_and_b32 s21, s2, 0x8000 ; GFX1250-NEXT: s_mul_i32 s8, s3, s20 ; GFX1250-NEXT: s_xor_b32 s4, s4, s5 ; GFX1250-NEXT: s_and_b32 s22, s2, 0x10000 ; GFX1250-NEXT: s_mul_i32 s5, s3, s21 ; GFX1250-NEXT: s_xor_b32 s4, s4, s8 ; GFX1250-NEXT: s_and_b32 s23, s2, 0x20000 ; GFX1250-NEXT: s_mul_i32 s8, s3, s22 ; GFX1250-NEXT: s_xor_b32 s4, s4, s5 ; GFX1250-NEXT: s_and_b32 s24, s2, 0x40000 ; GFX1250-NEXT: s_mul_i32 s5, s3, s23 ; GFX1250-NEXT: s_xor_b32 s4, s4, s8 ; GFX1250-NEXT: s_and_b32 s25, s2, 0x80000 ; GFX1250-NEXT: s_mul_i32 s8, s3, s24 ; GFX1250-NEXT: s_xor_b32 s4, s4, s5 ; GFX1250-NEXT: s_and_b32 s26, s2, 0x100000 ; GFX1250-NEXT: s_mul_i32 s5, s3, s25 ; GFX1250-NEXT: s_xor_b32 s4, s4, s8 ; GFX1250-NEXT: s_and_b32 s27, s2, 0x200000 ; GFX1250-NEXT: s_mul_i32 s8, s3, s26 ; GFX1250-NEXT: s_xor_b32 s4, s4, s5 ; GFX1250-NEXT: s_and_b32 s28, s2, 0x400000 ; GFX1250-NEXT: s_mul_i32 s5, s3, s27 ; GFX1250-NEXT: s_xor_b32 s4, s4, s8 ; GFX1250-NEXT: s_and_b32 s29, s2, 0x800000 ; GFX1250-NEXT: s_mul_i32 s8, s3, s28 ; GFX1250-NEXT: s_xor_b32 s4, s4, s5 ; GFX1250-NEXT: s_and_b32 s30, s2, 0x1000000 ; GFX1250-NEXT: s_mul_i32 s5, s3, s29 ; GFX1250-NEXT: s_xor_b32 s4, s4, s8 ; GFX1250-NEXT: s_and_b32 s31, s2, 0x2000000 ; GFX1250-NEXT: s_mul_i32 s8, s3, s30 ; GFX1250-NEXT: s_xor_b32 s4, s4, s5 ; GFX1250-NEXT: s_and_b32 s33, s2, 0x4000000 ; GFX1250-NEXT: s_mul_i32 s5, s3, s31 ; GFX1250-NEXT: s_xor_b32 s4, s4, s8 ; GFX1250-NEXT: s_and_b32 s34, s2, 0x8000000 ; GFX1250-NEXT: s_mul_i32 s8, s3, s33 ; GFX1250-NEXT: s_xor_b32 s4, s4, s5 ; GFX1250-NEXT: s_and_b32 s35, s2, 0x10000000 ; GFX1250-NEXT: s_mul_i32 s5, s3, s34 ; GFX1250-NEXT: s_xor_b32 s4, s4, s8 ; GFX1250-NEXT: s_and_b32 s36, s2, 0x20000000 ; GFX1250-NEXT: s_mul_i32 s8, s3, s35 ; GFX1250-NEXT: s_xor_b32 s4, s4, s5 ; GFX1250-NEXT: s_and_b32 s37, s2, 2.0 ; GFX1250-NEXT: s_mul_i32 s5, s3, s36 ; GFX1250-NEXT: s_xor_b32 s4, s4, s8 ; GFX1250-NEXT: s_mul_i32 s8, s3, s37 ; GFX1250-NEXT: s_xor_b32 s4, s4, s5 ; GFX1250-NEXT: s_and_b32 s2, s2, 0x80000000 ; GFX1250-NEXT: s_xor_b32 s4, s4, s8 ; GFX1250-NEXT: s_mul_i32 s3, s3, s2 ; GFX1250-NEXT: s_mov_b32 s5, s1 ; GFX1250-NEXT: s_xor_b32 s2, s4, s3 ; GFX1250-NEXT: s_mov_b32 s4, s0 ; GFX1250-NEXT: v_mov_b32_e32 v0, s2 ; GFX1250-NEXT: buffer_store_b32 v0, off, s[4:7], null ; GFX1250-NEXT: s_endpgm ; ; EG-LABEL: test_clmul_i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] ; EG-NEXT: TEX 0 @6 ; EG-NEXT: ALU 116, @9, KC0[], KC1[] ; EG-NEXT: ALU 10, @126, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: Fetch clause starting at 6: ; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 ; EG-NEXT: ALU clause starting at 8: ; EG-NEXT: MOV * T0.X, KC0[2].Z, ; EG-NEXT: ALU clause starting at 9: ; EG-NEXT: AND_INT T0.W, T0.Y, 1, ; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.W, ; EG-NEXT: AND_INT T0.W, T0.Y, literal.x, ; EG-NEXT: MULLO_INT * T1.X, T0.X, T1.W, ; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T1.W, T0.Z, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.W, ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) ; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) ; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 128(1.793662e-43), 0(0.000000e+00) ; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 256(3.587324e-43), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 512(7.174648e-43), 0(0.000000e+00) ; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 1024(1.434930e-42), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 2048(2.869859e-42), 0(0.000000e+00) ; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 4096(5.739719e-42), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 8192(1.147944e-41), 0(0.000000e+00) ; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 16384(2.295887e-41), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 32768(4.591775e-41), 0(0.000000e+00) ; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 65536(9.183550e-41), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 131072(1.836710e-40), 0(0.000000e+00) ; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 262144(3.673420e-40), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 524288(7.346840e-40), 0(0.000000e+00) ; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 1048576(1.469368e-39), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 2097152(2.938736e-39), 0(0.000000e+00) ; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 4194304(5.877472e-39), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) ; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 16777216(2.350989e-38), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 33554432(9.403955e-38), 0(0.000000e+00) ; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 67108864(1.504633e-36), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 134217728(3.851860e-34), 0(0.000000e+00) ; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 268435456(2.524355e-29), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z, ; EG-NEXT: 536870912(1.084202e-19), 0(0.000000e+00) ; EG-NEXT: AND_INT * T2.Z, T0.Y, literal.x, ; EG-NEXT: 1073741824(2.000000e+00), 0(0.000000e+00) ; EG-NEXT: ALU clause starting at 126: ; EG-NEXT: XOR_INT T0.W, T0.W, T0.Z, BS:VEC_021/SCL_122 ; EG-NEXT: MULLO_INT * T0.Z, T0.X, T1.Z, ; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.Y, T0.X, T2.Z, ; EG-NEXT: -2147483648(-0.000000e+00), 0(0.000000e+00) ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.X, PV.Z, ; EG-NEXT: XOR_INT T0.X, PV.W, PS, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 %a = load i32, ptr addrspace(1) %in %b = load i32, ptr addrspace(1) %b_ptr %res = call i32 @llvm.clmul.i32(i32 %a, i32 %b) store i32 %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @test_clmulr_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; SI-LABEL: test_clmulr_i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_mov_b32 s10, s2 ; SI-NEXT: s_mov_b32 s11, s3 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b32 s8, s6 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 ; SI-NEXT: s_mov_b32 s7, 0 ; SI-NEXT: s_mov_b32 s21, s7 ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: s_mov_b32 s15, s7 ; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: s_mov_b32 s13, s7 ; SI-NEXT: s_mov_b32 s17, s7 ; SI-NEXT: s_mov_b32 s19, s7 ; SI-NEXT: s_mov_b32 s23, s7 ; SI-NEXT: s_mov_b32 s25, s7 ; SI-NEXT: s_mov_b32 s27, s7 ; SI-NEXT: s_mov_b32 s29, s7 ; SI-NEXT: s_mov_b32 s31, s7 ; SI-NEXT: s_mov_b32 s35, s7 ; SI-NEXT: s_mov_b32 s37, s7 ; SI-NEXT: s_mov_b32 s39, s7 ; SI-NEXT: s_mov_b32 s41, s7 ; SI-NEXT: s_mov_b32 s43, s7 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_readfirstlane_b32 s33, v1 ; SI-NEXT: s_and_b32 s20, s33, 2 ; SI-NEXT: v_readfirstlane_b32 s6, v0 ; SI-NEXT: s_bfe_i32 s8, s33, 0x10000 ; SI-NEXT: v_cmp_eq_u64_e64 s[20:21], s[20:21], 0 ; SI-NEXT: s_lshl_b64 s[4:5], s[6:7], 1 ; SI-NEXT: s_and_b32 s8, s8, s6 ; SI-NEXT: s_and_b64 s[20:21], s[20:21], exec ; SI-NEXT: s_cselect_b32 s21, 0, s5 ; SI-NEXT: s_cselect_b32 s20, 0, s4 ; SI-NEXT: s_and_b32 s14, s33, 4 ; SI-NEXT: s_xor_b64 s[20:21], s[8:9], s[20:21] ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[14:15], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 2 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s15, 0, s15 ; SI-NEXT: s_cselect_b32 s14, 0, s14 ; SI-NEXT: s_and_b32 s10, s33, 8 ; SI-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], 0 ; SI-NEXT: s_xor_b64 s[14:15], s[20:21], s[14:15] ; SI-NEXT: s_lshl_b64 s[20:21], s[6:7], 3 ; SI-NEXT: s_and_b64 s[10:11], s[10:11], exec ; SI-NEXT: s_cselect_b32 s11, 0, s21 ; SI-NEXT: s_cselect_b32 s10, 0, s20 ; SI-NEXT: s_and_b32 s12, s33, 16 ; SI-NEXT: v_cmp_eq_u64_e64 s[12:13], s[12:13], 0 ; SI-NEXT: s_lshl_b64 s[20:21], s[6:7], 4 ; SI-NEXT: s_xor_b64 s[10:11], s[14:15], s[10:11] ; SI-NEXT: s_and_b64 s[12:13], s[12:13], exec ; SI-NEXT: s_cselect_b32 s13, 0, s21 ; SI-NEXT: s_cselect_b32 s12, 0, s20 ; SI-NEXT: s_and_b32 s16, s33, 32 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[16:17], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 5 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s18, s33, 64 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[18:19], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 6 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s22, s33, 0x80 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[22:23], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 7 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s24, s33, 0x100 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[24:25], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 8 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s26, s33, 0x200 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[26:27], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 9 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s28, s33, 0x400 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[28:29], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 10 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s30, s33, 0x800 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[30:31], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 11 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s34, s33, 0x1000 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[34:35], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 12 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s36, s33, 0x2000 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[36:37], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 13 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s38, s33, 0x4000 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[38:39], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 14 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s40, s33, 0x8000 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[40:41], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 15 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s42, s33, 0x10000 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[42:43], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 16 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_mov_b32 s5, s7 ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s4, s33, 0x20000 ; SI-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 17 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[4:5], s[4:5], exec ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: s_cselect_b32 s5, 0, s15 ; SI-NEXT: s_cselect_b32 s4, 0, s14 ; SI-NEXT: s_and_b32 s8, s33, 0x40000 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[12:13], s[6:7], 18 ; SI-NEXT: s_xor_b64 s[4:5], s[10:11], s[4:5] ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s13 ; SI-NEXT: s_cselect_b32 s8, 0, s12 ; SI-NEXT: s_and_b32 s10, s33, 0x80000 ; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[10:11], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 19 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x100000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 20 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x200000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 21 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x400000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 22 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x800000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 23 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x1000000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 24 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x2000000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 25 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x4000000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 26 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x8000000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 27 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x10000000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 28 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x20000000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 29 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 2.0 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 30 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_lshl_b64 s[6:7], s[6:7], 31 ; SI-NEXT: s_cmp_gt_i32 s33, -1 ; SI-NEXT: s_cselect_b32 s7, 0, s7 ; SI-NEXT: s_cselect_b32 s6, 0, s6 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7] ; SI-NEXT: s_lshr_b64 s[4:5], s[4:5], 31 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_clmulr_i32: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_mov_b32 s10, s2 ; VI-NEXT: s_mov_b32 s11, s3 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_mov_b32 s8, s6 ; VI-NEXT: s_mov_b32 s9, s7 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 ; VI-NEXT: s_mov_b32 s0, s4 ; VI-NEXT: s_mov_b32 s7, 0 ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: s_mov_b32 s11, s7 ; VI-NEXT: s_mov_b32 s9, s7 ; VI-NEXT: s_mov_b32 s13, s7 ; VI-NEXT: s_mov_b32 s15, s7 ; VI-NEXT: s_mov_b32 s17, s7 ; VI-NEXT: s_mov_b32 s19, s7 ; VI-NEXT: s_mov_b32 s21, s7 ; VI-NEXT: s_mov_b32 s23, s7 ; VI-NEXT: s_mov_b32 s25, s7 ; VI-NEXT: s_mov_b32 s27, s7 ; VI-NEXT: s_mov_b32 s29, s7 ; VI-NEXT: s_mov_b32 s31, s7 ; VI-NEXT: s_mov_b32 s35, s7 ; VI-NEXT: s_mov_b32 s37, s7 ; VI-NEXT: s_mov_b32 s39, s7 ; VI-NEXT: s_mov_b32 s41, s7 ; VI-NEXT: s_mov_b32 s43, s7 ; VI-NEXT: s_mov_b32 s45, s7 ; VI-NEXT: s_mov_b32 s47, s7 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_readfirstlane_b32 s4, v1 ; VI-NEXT: v_readfirstlane_b32 s6, v0 ; VI-NEXT: s_bfe_i32 s5, s4, 0x10000 ; VI-NEXT: s_lshl_b64 s[48:49], s[6:7], 1 ; VI-NEXT: s_and_b32 s10, s4, 2 ; VI-NEXT: s_and_b32 s8, s5, s6 ; VI-NEXT: s_cmp_eq_u64 s[10:11], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s49 ; VI-NEXT: s_cselect_b32 s10, 0, s48 ; VI-NEXT: s_lshl_b64 s[48:49], s[6:7], 2 ; VI-NEXT: s_and_b32 s12, s4, 4 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s49 ; VI-NEXT: s_cselect_b32 s10, 0, s48 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 3 ; VI-NEXT: s_and_b32 s14, s4, 8 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[14:15], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 4 ; VI-NEXT: s_and_b32 s16, s4, 16 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[16:17], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 5 ; VI-NEXT: s_and_b32 s18, s4, 32 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[18:19], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 6 ; VI-NEXT: s_and_b32 s20, s4, 64 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[20:21], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 7 ; VI-NEXT: s_and_b32 s22, s4, 0x80 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[22:23], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 8 ; VI-NEXT: s_and_b32 s24, s4, 0x100 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[24:25], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 9 ; VI-NEXT: s_and_b32 s26, s4, 0x200 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[26:27], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 10 ; VI-NEXT: s_and_b32 s28, s4, 0x400 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[28:29], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 11 ; VI-NEXT: s_and_b32 s30, s4, 0x800 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[30:31], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 12 ; VI-NEXT: s_and_b32 s34, s4, 0x1000 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[34:35], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 13 ; VI-NEXT: s_and_b32 s36, s4, 0x2000 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[36:37], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 14 ; VI-NEXT: s_and_b32 s38, s4, 0x4000 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[38:39], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 15 ; VI-NEXT: s_and_b32 s40, s4, 0x8000 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[40:41], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 16 ; VI-NEXT: s_and_b32 s42, s4, 0x10000 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[42:43], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 17 ; VI-NEXT: s_and_b32 s44, s4, 0x20000 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[44:45], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 18 ; VI-NEXT: s_and_b32 s46, s4, 0x40000 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[46:47], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 19 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_and_b32 s10, s4, 0x80000 ; VI-NEXT: s_mov_b32 s11, s7 ; VI-NEXT: s_cmp_eq_u64 s[10:11], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 20 ; VI-NEXT: s_and_b32 s12, s4, 0x100000 ; VI-NEXT: s_mov_b32 s13, s7 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 21 ; VI-NEXT: s_and_b32 s12, s4, 0x200000 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 22 ; VI-NEXT: s_and_b32 s12, s4, 0x400000 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 23 ; VI-NEXT: s_and_b32 s12, s4, 0x800000 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 24 ; VI-NEXT: s_and_b32 s12, s4, 0x1000000 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 25 ; VI-NEXT: s_and_b32 s12, s4, 0x2000000 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 26 ; VI-NEXT: s_and_b32 s12, s4, 0x4000000 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 27 ; VI-NEXT: s_and_b32 s12, s4, 0x8000000 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 28 ; VI-NEXT: s_and_b32 s12, s4, 0x10000000 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 29 ; VI-NEXT: s_and_b32 s12, s4, 0x20000000 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 30 ; VI-NEXT: s_and_b32 s12, s4, 2.0 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[6:7], s[6:7], 31 ; VI-NEXT: s_cmp_gt_i32 s4, -1 ; VI-NEXT: s_cselect_b32 s5, 0, s7 ; VI-NEXT: s_cselect_b32 s4, 0, s6 ; VI-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5] ; VI-NEXT: s_lshr_b64 s[4:5], s[4:5], 31 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: test_clmulr_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_mov_b32 s6, s2 ; GFX9-NEXT: s_mov_b32 s7, s3 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s4, s10 ; GFX9-NEXT: s_mov_b32 s5, s11 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 ; GFX9-NEXT: s_mov_b32 s0, s8 ; GFX9-NEXT: s_mov_b32 s5, 0 ; GFX9-NEXT: s_mov_b32 s11, s5 ; GFX9-NEXT: s_mov_b32 s7, s5 ; GFX9-NEXT: s_mov_b32 s13, s5 ; GFX9-NEXT: s_mov_b32 s15, s5 ; GFX9-NEXT: s_mov_b32 s17, s5 ; GFX9-NEXT: s_mov_b32 s19, s5 ; GFX9-NEXT: s_mov_b32 s21, s5 ; GFX9-NEXT: s_mov_b32 s23, s5 ; GFX9-NEXT: s_mov_b32 s25, s5 ; GFX9-NEXT: s_mov_b32 s27, s5 ; GFX9-NEXT: s_mov_b32 s29, s5 ; GFX9-NEXT: s_mov_b32 s31, s5 ; GFX9-NEXT: s_mov_b32 s35, s5 ; GFX9-NEXT: s_mov_b32 s37, s5 ; GFX9-NEXT: s_mov_b32 s39, s5 ; GFX9-NEXT: s_mov_b32 s41, s5 ; GFX9-NEXT: s_mov_b32 s43, s5 ; GFX9-NEXT: s_mov_b32 s45, s5 ; GFX9-NEXT: s_mov_b32 s47, s5 ; GFX9-NEXT: s_mov_b32 s1, s9 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s8, v1 ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_bfe_i32 s6, s8, 0x10000 ; GFX9-NEXT: s_lshl_b64 s[48:49], s[4:5], 1 ; GFX9-NEXT: s_and_b32 s10, s8, 2 ; GFX9-NEXT: s_and_b32 s6, s6, s4 ; GFX9-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s49 ; GFX9-NEXT: s_cselect_b32 s10, 0, s48 ; GFX9-NEXT: s_lshl_b64 s[48:49], s[4:5], 2 ; GFX9-NEXT: s_and_b32 s12, s8, 4 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s49 ; GFX9-NEXT: s_cselect_b32 s10, 0, s48 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 3 ; GFX9-NEXT: s_and_b32 s14, s8, 8 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[14:15], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 4 ; GFX9-NEXT: s_and_b32 s16, s8, 16 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[16:17], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 5 ; GFX9-NEXT: s_and_b32 s18, s8, 32 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[18:19], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 6 ; GFX9-NEXT: s_and_b32 s20, s8, 64 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[20:21], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 7 ; GFX9-NEXT: s_and_b32 s22, s8, 0x80 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[22:23], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 8 ; GFX9-NEXT: s_and_b32 s24, s8, 0x100 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[24:25], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 9 ; GFX9-NEXT: s_and_b32 s26, s8, 0x200 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[26:27], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 10 ; GFX9-NEXT: s_and_b32 s28, s8, 0x400 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[28:29], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 11 ; GFX9-NEXT: s_and_b32 s30, s8, 0x800 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[30:31], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 12 ; GFX9-NEXT: s_and_b32 s34, s8, 0x1000 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[34:35], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 13 ; GFX9-NEXT: s_and_b32 s36, s8, 0x2000 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[36:37], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 14 ; GFX9-NEXT: s_and_b32 s38, s8, 0x4000 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[38:39], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 15 ; GFX9-NEXT: s_and_b32 s40, s8, 0x8000 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[40:41], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 16 ; GFX9-NEXT: s_and_b32 s42, s8, 0x10000 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[42:43], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 17 ; GFX9-NEXT: s_and_b32 s44, s8, 0x20000 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[44:45], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 18 ; GFX9-NEXT: s_and_b32 s46, s8, 0x40000 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[46:47], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 19 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_and_b32 s10, s8, 0x80000 ; GFX9-NEXT: s_mov_b32 s11, s5 ; GFX9-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 20 ; GFX9-NEXT: s_and_b32 s12, s8, 0x100000 ; GFX9-NEXT: s_mov_b32 s13, s5 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 21 ; GFX9-NEXT: s_and_b32 s12, s8, 0x200000 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 22 ; GFX9-NEXT: s_and_b32 s12, s8, 0x400000 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 23 ; GFX9-NEXT: s_and_b32 s12, s8, 0x800000 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 24 ; GFX9-NEXT: s_and_b32 s12, s8, 0x1000000 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 25 ; GFX9-NEXT: s_and_b32 s12, s8, 0x2000000 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 26 ; GFX9-NEXT: s_and_b32 s12, s8, 0x4000000 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 27 ; GFX9-NEXT: s_and_b32 s12, s8, 0x8000000 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 28 ; GFX9-NEXT: s_and_b32 s12, s8, 0x10000000 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 29 ; GFX9-NEXT: s_and_b32 s12, s8, 0x20000000 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 30 ; GFX9-NEXT: s_and_b32 s12, s8, 2.0 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[4:5], s[4:5], 31 ; GFX9-NEXT: s_cmp_gt_i32 s8, -1 ; GFX9-NEXT: s_cselect_b32 s5, 0, s5 ; GFX9-NEXT: s_cselect_b32 s4, 0, s4 ; GFX9-NEXT: s_xor_b64 s[4:5], s[6:7], s[4:5] ; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], 31 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: test_clmulr_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX10-NEXT: s_mov_b32 s6, -1 ; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; GFX10-NEXT: s_mov_b32 s10, s6 ; GFX10-NEXT: s_mov_b32 s11, s7 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_mov_b32 s8, s2 ; GFX10-NEXT: s_mov_b32 s9, s3 ; GFX10-NEXT: s_mov_b32 s3, 0 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 ; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) ; GFX10-NEXT: s_mov_b32 s11, s3 ; GFX10-NEXT: s_mov_b32 s9, s3 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_readfirstlane_b32 s4, v1 ; GFX10-NEXT: v_readfirstlane_b32 s2, v0 ; GFX10-NEXT: s_bfe_i32 s5, s4, 0x10000 ; GFX10-NEXT: s_and_b32 s10, s4, 2 ; GFX10-NEXT: s_lshl_b64 s[12:13], s[2:3], 1 ; GFX10-NEXT: s_and_b32 s8, s5, s2 ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_mov_b32 s5, s1 ; GFX10-NEXT: s_cselect_b32 s13, 0, s13 ; GFX10-NEXT: s_cselect_b32 s12, 0, s12 ; GFX10-NEXT: s_and_b32 s10, s4, 4 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 2 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 8 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 3 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 16 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 4 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 32 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 5 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 64 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 6 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x80 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 7 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x100 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 8 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x200 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 9 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x400 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 10 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x800 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 11 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x1000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 12 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x2000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 13 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x4000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 14 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x8000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 15 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x10000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 16 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x20000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 17 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x40000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 18 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x80000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 19 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x100000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 20 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x200000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 21 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x400000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 22 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x800000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 23 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x1000000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 24 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x2000000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 25 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x4000000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 26 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x8000000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 27 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x10000000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 28 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x20000000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 29 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 2.0 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 30 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s11, 0, s15 ; GFX10-NEXT: s_cselect_b32 s10, 0, s14 ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 31 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; GFX10-NEXT: s_cmp_gt_i32 s4, -1 ; GFX10-NEXT: s_mov_b32 s4, s0 ; GFX10-NEXT: s_cselect_b32 s3, 0, s3 ; GFX10-NEXT: s_cselect_b32 s2, 0, s2 ; GFX10-NEXT: s_xor_b64 s[2:3], s[8:9], s[2:3] ; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], 31 ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: test_clmulr_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-NEXT: s_mov_b32 s6, -1 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s10, s6 ; GFX11-NEXT: s_mov_b32 s11, s7 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s8, s2 ; GFX11-NEXT: s_mov_b32 s9, s3 ; GFX11-NEXT: s_mov_b32 s3, 0 ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0 ; GFX11-NEXT: s_mov_b32 s11, s3 ; GFX11-NEXT: s_mov_b32 s9, s3 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_readfirstlane_b32 s4, v1 ; GFX11-NEXT: v_readfirstlane_b32 s2, v0 ; GFX11-NEXT: s_bfe_i32 s5, s4, 0x10000 ; GFX11-NEXT: s_and_b32 s10, s4, 2 ; GFX11-NEXT: s_lshl_b64 s[12:13], s[2:3], 1 ; GFX11-NEXT: s_and_b32 s8, s5, s2 ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_mov_b32 s5, s1 ; GFX11-NEXT: s_cselect_b32 s13, 0, s13 ; GFX11-NEXT: s_cselect_b32 s12, 0, s12 ; GFX11-NEXT: s_and_b32 s10, s4, 4 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 2 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 8 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 3 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 16 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 4 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 32 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 5 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 64 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 6 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x80 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 7 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x100 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 8 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x200 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 9 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x400 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 10 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x800 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 11 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x1000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 12 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x2000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 13 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x4000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 14 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x8000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 15 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x10000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 16 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x20000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 17 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x40000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 18 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x80000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 19 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x100000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 20 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x200000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 21 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x400000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 22 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x800000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 23 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x1000000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 24 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x2000000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 25 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x4000000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 26 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x8000000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 27 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x10000000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 28 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x20000000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 29 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 2.0 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 30 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s11, 0, s15 ; GFX11-NEXT: s_cselect_b32 s10, 0, s14 ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 31 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; GFX11-NEXT: s_cmp_gt_i32 s4, -1 ; GFX11-NEXT: s_mov_b32 s4, s0 ; GFX11-NEXT: s_cselect_b32 s3, 0, s3 ; GFX11-NEXT: s_cselect_b32 s2, 0, s2 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_xor_b64 s[2:3], s[8:9], s[2:3] ; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], 31 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0 ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: test_clmulr_i32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX12-NEXT: s_mov_b32 s6, -1 ; GFX12-NEXT: s_mov_b32 s7, 0x31016000 ; GFX12-NEXT: s_mov_b32 s10, s6 ; GFX12-NEXT: s_mov_b32 s11, s7 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_mov_b32 s8, s2 ; GFX12-NEXT: s_mov_b32 s9, s3 ; GFX12-NEXT: s_mov_b32 s3, 0 ; GFX12-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null ; GFX12-NEXT: s_mov_b32 s5, s3 ; GFX12-NEXT: s_mov_b32 s9, s3 ; GFX12-NEXT: s_mov_b32 s11, s3 ; GFX12-NEXT: s_mov_b32 s13, s3 ; GFX12-NEXT: s_mov_b32 s15, s3 ; GFX12-NEXT: s_mov_b32 s17, s3 ; GFX12-NEXT: s_mov_b32 s19, s3 ; GFX12-NEXT: s_mov_b32 s21, s3 ; GFX12-NEXT: s_mov_b32 s23, s3 ; GFX12-NEXT: s_mov_b32 s25, s3 ; GFX12-NEXT: s_mov_b32 s27, s3 ; GFX12-NEXT: s_mov_b32 s29, s3 ; GFX12-NEXT: s_mov_b32 s31, s3 ; GFX12-NEXT: s_mov_b32 s35, s3 ; GFX12-NEXT: s_mov_b32 s37, s3 ; GFX12-NEXT: s_mov_b32 s39, s3 ; GFX12-NEXT: s_mov_b32 s41, s3 ; GFX12-NEXT: s_mov_b32 s43, s3 ; GFX12-NEXT: s_mov_b32 s45, s3 ; GFX12-NEXT: s_mov_b32 s47, s3 ; GFX12-NEXT: s_mov_b32 s49, s3 ; GFX12-NEXT: s_mov_b32 s51, s3 ; GFX12-NEXT: s_mov_b32 s53, s3 ; GFX12-NEXT: s_mov_b32 s55, s3 ; GFX12-NEXT: s_mov_b32 s57, s3 ; GFX12-NEXT: s_mov_b32 s59, s3 ; GFX12-NEXT: s_mov_b32 s61, s3 ; GFX12-NEXT: s_mov_b32 s63, s3 ; GFX12-NEXT: s_mov_b32 s65, s3 ; GFX12-NEXT: s_mov_b32 s67, s3 ; GFX12-NEXT: s_mov_b32 s69, s3 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_readfirstlane_b32 s33, v1 ; GFX12-NEXT: v_readfirstlane_b32 s2, v0 ; GFX12-NEXT: s_and_b32 s4, s33, 2 ; GFX12-NEXT: s_and_b32 s8, s33, 1 ; GFX12-NEXT: s_and_b32 s10, s33, 4 ; GFX12-NEXT: s_mul_u64 s[4:5], s[2:3], s[4:5] ; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[8:9] ; GFX12-NEXT: s_and_b32 s12, s33, 8 ; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[10:11] ; GFX12-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5] ; GFX12-NEXT: s_and_b32 s14, s33, 16 ; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[12:13] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11] ; GFX12-NEXT: s_and_b32 s16, s33, 32 ; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[14:15] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13] ; GFX12-NEXT: s_and_b32 s18, s33, 64 ; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[16:17] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15] ; GFX12-NEXT: s_and_b32 s20, s33, 0x80 ; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[18:19] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX12-NEXT: s_and_b32 s22, s33, 0x100 ; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[20:21] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11] ; GFX12-NEXT: s_and_b32 s24, s33, 0x200 ; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[22:23] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13] ; GFX12-NEXT: s_and_b32 s26, s33, 0x400 ; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[24:25] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15] ; GFX12-NEXT: s_and_b32 s28, s33, 0x800 ; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[26:27] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX12-NEXT: s_and_b32 s30, s33, 0x1000 ; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[28:29] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11] ; GFX12-NEXT: s_and_b32 s34, s33, 0x2000 ; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[30:31] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13] ; GFX12-NEXT: s_and_b32 s36, s33, 0x4000 ; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[34:35] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15] ; GFX12-NEXT: s_and_b32 s38, s33, 0x8000 ; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[36:37] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX12-NEXT: s_and_b32 s40, s33, 0x10000 ; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[38:39] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11] ; GFX12-NEXT: s_and_b32 s42, s33, 0x20000 ; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[40:41] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13] ; GFX12-NEXT: s_and_b32 s44, s33, 0x40000 ; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[42:43] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15] ; GFX12-NEXT: s_and_b32 s46, s33, 0x80000 ; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[44:45] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX12-NEXT: s_and_b32 s48, s33, 0x100000 ; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[46:47] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11] ; GFX12-NEXT: s_and_b32 s50, s33, 0x200000 ; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[48:49] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13] ; GFX12-NEXT: s_and_b32 s52, s33, 0x400000 ; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[50:51] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15] ; GFX12-NEXT: s_and_b32 s54, s33, 0x800000 ; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[52:53] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX12-NEXT: s_and_b32 s56, s33, 0x1000000 ; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[54:55] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11] ; GFX12-NEXT: s_and_b32 s58, s33, 0x2000000 ; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[56:57] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13] ; GFX12-NEXT: s_and_b32 s60, s33, 0x4000000 ; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[58:59] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15] ; GFX12-NEXT: s_and_b32 s62, s33, 0x8000000 ; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[60:61] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX12-NEXT: s_and_b32 s64, s33, 0x10000000 ; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[62:63] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11] ; GFX12-NEXT: s_and_b32 s66, s33, 0x20000000 ; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[64:65] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13] ; GFX12-NEXT: s_and_b32 s68, s33, 2.0 ; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[66:67] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15] ; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[68:69] ; GFX12-NEXT: s_and_b32 s12, s33, 0x80000000 ; GFX12-NEXT: s_mov_b32 s13, s3 ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[12:13] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11] ; GFX12-NEXT: s_mul_u64 s[2:3], s[2:3], 0 ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13] ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] ; GFX12-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3] ; GFX12-NEXT: s_mov_b32 s4, s0 ; GFX12-NEXT: s_lshr_b64 s[2:3], s[2:3], 31 ; GFX12-NEXT: s_mov_b32 s5, s1 ; GFX12-NEXT: v_mov_b32_e32 v0, s2 ; GFX12-NEXT: buffer_store_b32 v0, off, s[4:7], null ; GFX12-NEXT: s_endpgm ; ; GFX1250-LABEL: test_clmulr_i32: ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv ; GFX1250-NEXT: s_mov_b32 s6, -1 ; GFX1250-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1250-NEXT: s_mov_b32 s10, s6 ; GFX1250-NEXT: s_mov_b32 s11, s7 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_mov_b32 s8, s2 ; GFX1250-NEXT: s_mov_b32 s9, s3 ; GFX1250-NEXT: s_mov_b32 s3, 0 ; GFX1250-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null ; GFX1250-NEXT: s_mov_b32 s5, s3 ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_mov_b64 s[8:9], 0x80000000 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: v_readfirstlane_b32 s4, v1 ; GFX1250-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1250-NEXT: s_and_b64 s[10:11], s[4:5], 2 ; GFX1250-NEXT: s_and_b64 s[12:13], s[4:5], 1 ; GFX1250-NEXT: s_and_b64 s[14:15], s[4:5], 4 ; GFX1250-NEXT: s_mul_u64 s[10:11], s[2:3], s[10:11] ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[12:13] ; GFX1250-NEXT: s_and_b64 s[16:17], s[4:5], 8 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[14:15] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[12:13], s[10:11] ; GFX1250-NEXT: s_and_b64 s[18:19], s[4:5], 16 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[16:17] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[20:21], s[4:5], 32 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[18:19] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[22:23], s[4:5], 64 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[20:21] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[24:25], s[4:5], 0x80 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[22:23] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[26:27], s[4:5], 0x100 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[24:25] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[28:29], s[4:5], 0x200 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[26:27] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[30:31], s[4:5], 0x400 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[28:29] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[34:35], s[4:5], 0x800 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[30:31] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[36:37], s[4:5], 0x1000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[34:35] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[38:39], s[4:5], 0x2000 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[36:37] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[40:41], s[4:5], 0x4000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[38:39] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[42:43], s[4:5], 0x8000 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[40:41] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[44:45], s[4:5], 0x10000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[42:43] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[46:47], s[4:5], 0x20000 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[44:45] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[48:49], s[4:5], 0x40000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[46:47] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[50:51], s[4:5], 0x80000 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[48:49] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[52:53], s[4:5], 0x100000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[50:51] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[54:55], s[4:5], 0x200000 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[52:53] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[56:57], s[4:5], 0x400000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[54:55] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[58:59], s[4:5], 0x800000 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[56:57] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[60:61], s[4:5], 0x1000000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[58:59] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[62:63], s[4:5], 0x2000000 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[60:61] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[64:65], s[4:5], 0x4000000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[62:63] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[66:67], s[4:5], 0x8000000 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[64:65] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[68:69], s[4:5], 0x10000000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[66:67] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[70:71], s[4:5], 0x20000000 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[68:69] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[72:73], s[4:5], 0x40000000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[70:71] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[72:73] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] ; GFX1250-NEXT: s_xor_b64 s[8:9], s[10:11], s[14:15] ; GFX1250-NEXT: s_mul_u64 s[2:3], s[2:3], s[4:5] ; GFX1250-NEXT: s_mov_b32 s4, s0 ; GFX1250-NEXT: s_xor_b64 s[2:3], s[8:9], s[2:3] ; GFX1250-NEXT: s_mov_b32 s5, s1 ; GFX1250-NEXT: s_lshr_b64 s[2:3], s[2:3], 31 ; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1250-NEXT: v_mov_b32_e32 v0, s2 ; GFX1250-NEXT: buffer_store_b32 v0, off, s[4:7], null ; GFX1250-NEXT: s_endpgm ; ; EG-LABEL: test_clmulr_i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] ; EG-NEXT: TEX 0 @8 ; EG-NEXT: ALU 98, @11, KC0[], KC1[] ; EG-NEXT: ALU 110, @110, KC0[], KC1[] ; EG-NEXT: ALU 12, @221, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: Fetch clause starting at 8: ; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 ; EG-NEXT: ALU clause starting at 10: ; EG-NEXT: MOV * T0.X, KC0[2].Z, ; EG-NEXT: ALU clause starting at 11: ; EG-NEXT: LSHR * T0.W, T0.X, literal.x, ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, PV.W, literal.x, ; EG-NEXT: AND_INT T0.W, T0.Y, literal.x, ; EG-NEXT: LSHR * T1.W, T0.Y, literal.y, ; EG-NEXT: 65280(9.147676e-41), 8(1.121039e-44) ; EG-NEXT: AND_INT T1.X, PS, literal.x, ; EG-NEXT: LSHR T1.Y, T0.Y, literal.y, ; EG-NEXT: LSHL T1.Z, PV.W, literal.z, ; EG-NEXT: LSHL T0.W, T0.Y, literal.y, ; EG-NEXT: AND_INT * T1.W, T0.X, literal.x, ; EG-NEXT: 65280(9.147676e-41), 24(3.363116e-44) ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) ; EG-NEXT: LSHR T2.X, T0.X, literal.x, ; EG-NEXT: LSHL T0.Y, PS, literal.y, ; EG-NEXT: LSHL T2.Z, T0.X, literal.x, ; EG-NEXT: OR_INT T0.W, PV.W, PV.Z, ; EG-NEXT: OR_INT * T1.W, PV.X, PV.Y, ; EG-NEXT: 24(3.363116e-44), 8(1.121039e-44) ; EG-NEXT: OR_INT T1.Z, PV.W, PS, ; EG-NEXT: OR_INT T0.W, PV.Z, PV.Y, ; EG-NEXT: OR_INT * T1.W, T0.Z, PV.X, ; EG-NEXT: OR_INT T0.Z, PV.W, PS, ; EG-NEXT: AND_INT T0.W, PV.Z, literal.x, ; EG-NEXT: LSHR * T1.W, PV.Z, literal.y, ; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45) ; EG-NEXT: AND_INT T0.Y, PS, literal.x, ; EG-NEXT: LSHL T1.Z, PV.W, literal.y, ; EG-NEXT: AND_INT T0.W, PV.Z, literal.x, ; EG-NEXT: LSHR * T1.W, PV.Z, literal.y, ; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45) ; EG-NEXT: AND_INT T0.Z, PS, literal.x, ; EG-NEXT: LSHL T0.W, PV.W, literal.y, ; EG-NEXT: OR_INT * T1.W, PV.Y, PV.Z, ; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45) ; EG-NEXT: AND_INT T1.Z, PS, literal.x, ; EG-NEXT: LSHR T1.W, PS, literal.y, ; EG-NEXT: OR_INT * T0.W, PV.Z, PV.W, ; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45) ; EG-NEXT: AND_INT T0.Y, PS, literal.x, ; EG-NEXT: LSHR T0.Z, PS, literal.y, ; EG-NEXT: AND_INT T0.W, PV.W, literal.x, ; EG-NEXT: LSHL * T1.W, PV.Z, literal.y, ; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45) ; EG-NEXT: OR_INT T1.Z, PV.W, PS, ; EG-NEXT: AND_INT T0.W, PV.Z, literal.x, ; EG-NEXT: LSHL * T1.W, PV.Y, literal.y, ; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45) ; EG-NEXT: OR_INT T0.Z, PV.W, PS, ; EG-NEXT: AND_INT T0.W, PV.Z, literal.x, ; EG-NEXT: LSHR * T1.W, PV.Z, 1, ; EG-NEXT: 1431655765(1.466015e+13), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Y, PS, literal.x, ; EG-NEXT: LSHL T1.Z, PV.W, 1, ; EG-NEXT: AND_INT T0.W, PV.Z, literal.x, ; EG-NEXT: LSHR * T1.W, PV.Z, 1, ; EG-NEXT: 1431655765(1.466015e+13), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, PS, literal.x, ; EG-NEXT: LSHL T0.W, PV.W, 1, ; EG-NEXT: OR_INT * T1.W, PV.Y, PV.Z, ; EG-NEXT: 1431655765(1.466015e+13), 0(0.000000e+00) ; EG-NEXT: AND_INT T2.W, PS, 1, ; EG-NEXT: OR_INT * T0.W, PV.Z, PV.W, ; EG-NEXT: AND_INT T3.W, T1.W, literal.x, ; EG-NEXT: MULLO_INT * T0.X, PS, PV.W, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; EG-NEXT: AND_INT T2.W, T1.W, literal.x, ; EG-NEXT: MULLO_INT * T0.Y, T0.W, PV.W, ; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, T0.X, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.W, ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T2.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T2.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T2.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T2.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 128(1.793662e-43), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T2.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 256(3.587324e-43), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T2.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 512(7.174648e-43), 0(0.000000e+00) ; EG-NEXT: AND_INT * T1.Z, T1.W, literal.x, ; EG-NEXT: 1024(1.434930e-42), 0(0.000000e+00) ; EG-NEXT: ALU clause starting at 110: ; EG-NEXT: XOR_INT T3.W, T2.W, T0.X, ; EG-NEXT: MULLO_INT * T0.X, T0.W, T0.Z, ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, T1.Z, ; EG-NEXT: 2048(2.869859e-42), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 4096(5.739719e-42), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 8192(1.147944e-41), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 16384(2.295887e-41), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 32768(4.591775e-41), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 65536(9.183550e-41), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 131072(1.836710e-40), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 262144(3.673420e-40), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 524288(7.346840e-40), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 1048576(1.469368e-39), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 2097152(2.938736e-39), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 4194304(5.877472e-39), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 16777216(2.350989e-38), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 33554432(9.403955e-38), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T4.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 67108864(1.504633e-36), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T4.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 134217728(3.851860e-34), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T4.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 268435456(2.524355e-29), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T4.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 536870912(1.084202e-19), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T4.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 1073741824(2.000000e+00), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T1.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: -2147483648(-0.000000e+00), 0(0.000000e+00) ; EG-NEXT: XOR_INT T1.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: LSHR T0.Z, T3.W, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: AND_INT * T1.W, T3.W, literal.y, ; EG-NEXT: 8(1.121039e-44), 65280(9.147676e-41) ; EG-NEXT: LSHL T0.Y, PS, literal.x, ; EG-NEXT: LSHL T1.Z, T2.W, literal.y, ; EG-NEXT: LSHR T0.W, PV.W, literal.y, ; EG-NEXT: AND_INT * T1.W, PV.Z, literal.z, ; EG-NEXT: 8(1.121039e-44), 24(3.363116e-44) ; EG-NEXT: 65280(9.147676e-41), 0(0.000000e+00) ; EG-NEXT: OR_INT T0.W, PS, PV.W, ; EG-NEXT: OR_INT * T1.W, PV.Z, PV.Y, ; EG-NEXT: OR_INT * T0.W, PS, PV.W, ; EG-NEXT: AND_INT T1.W, PV.W, literal.x, ; EG-NEXT: LSHR * T0.W, PV.W, literal.y, ; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45) ; EG-NEXT: AND_INT T0.W, PS, literal.x, ; EG-NEXT: LSHL * T1.W, PV.W, literal.y, ; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45) ; EG-NEXT: OR_INT * T0.W, PV.W, PS, ; EG-NEXT: AND_INT T1.W, PV.W, literal.x, ; EG-NEXT: LSHR * T0.W, PV.W, literal.y, ; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45) ; EG-NEXT: ALU clause starting at 221: ; EG-NEXT: AND_INT T0.W, T0.W, literal.x, ; EG-NEXT: LSHL * T1.W, T1.W, literal.y, ; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45) ; EG-NEXT: OR_INT * T0.W, PV.W, PS, ; EG-NEXT: AND_INT T1.W, PV.W, literal.x, ; EG-NEXT: LSHR * T0.W, PV.W, 1, ; EG-NEXT: 1431655765(1.466015e+13), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.W, PS, literal.x, ; EG-NEXT: LSHL * T1.W, PV.W, 1, ; EG-NEXT: 1431655765(1.466015e+13), 0(0.000000e+00) ; EG-NEXT: OR_INT T0.X, PV.W, PS, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 %a = load i32, ptr addrspace(1) %in %b = load i32, ptr addrspace(1) %b_ptr %a.ext = zext i32 %a to i64 %b.ext = zext i32 %b to i64 %clmul = call i64 @llvm.clmul.i64(i64 %a.ext, i64 %b.ext) %res.ext = lshr i64 %clmul, 31 %res = trunc i64 %res.ext to i32 store i32 %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @test_clmulh_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; SI-LABEL: test_clmulh_i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_mov_b32 s10, s2 ; SI-NEXT: s_mov_b32 s11, s3 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b32 s8, s6 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 ; SI-NEXT: s_mov_b32 s7, 0 ; SI-NEXT: s_mov_b32 s21, s7 ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: s_mov_b32 s15, s7 ; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: s_mov_b32 s13, s7 ; SI-NEXT: s_mov_b32 s17, s7 ; SI-NEXT: s_mov_b32 s19, s7 ; SI-NEXT: s_mov_b32 s23, s7 ; SI-NEXT: s_mov_b32 s25, s7 ; SI-NEXT: s_mov_b32 s27, s7 ; SI-NEXT: s_mov_b32 s29, s7 ; SI-NEXT: s_mov_b32 s31, s7 ; SI-NEXT: s_mov_b32 s35, s7 ; SI-NEXT: s_mov_b32 s37, s7 ; SI-NEXT: s_mov_b32 s39, s7 ; SI-NEXT: s_mov_b32 s41, s7 ; SI-NEXT: s_mov_b32 s43, s7 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_readfirstlane_b32 s33, v1 ; SI-NEXT: s_and_b32 s20, s33, 2 ; SI-NEXT: v_readfirstlane_b32 s6, v0 ; SI-NEXT: s_bfe_i32 s8, s33, 0x10000 ; SI-NEXT: v_cmp_eq_u64_e64 s[20:21], s[20:21], 0 ; SI-NEXT: s_lshl_b64 s[4:5], s[6:7], 1 ; SI-NEXT: s_and_b32 s8, s8, s6 ; SI-NEXT: s_and_b64 s[20:21], s[20:21], exec ; SI-NEXT: s_cselect_b32 s21, 0, s5 ; SI-NEXT: s_cselect_b32 s20, 0, s4 ; SI-NEXT: s_and_b32 s14, s33, 4 ; SI-NEXT: s_xor_b64 s[20:21], s[8:9], s[20:21] ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[14:15], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 2 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s15, 0, s15 ; SI-NEXT: s_cselect_b32 s14, 0, s14 ; SI-NEXT: s_and_b32 s10, s33, 8 ; SI-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], 0 ; SI-NEXT: s_xor_b64 s[14:15], s[20:21], s[14:15] ; SI-NEXT: s_lshl_b64 s[20:21], s[6:7], 3 ; SI-NEXT: s_and_b64 s[10:11], s[10:11], exec ; SI-NEXT: s_cselect_b32 s11, 0, s21 ; SI-NEXT: s_cselect_b32 s10, 0, s20 ; SI-NEXT: s_and_b32 s12, s33, 16 ; SI-NEXT: v_cmp_eq_u64_e64 s[12:13], s[12:13], 0 ; SI-NEXT: s_lshl_b64 s[20:21], s[6:7], 4 ; SI-NEXT: s_xor_b64 s[10:11], s[14:15], s[10:11] ; SI-NEXT: s_and_b64 s[12:13], s[12:13], exec ; SI-NEXT: s_cselect_b32 s13, 0, s21 ; SI-NEXT: s_cselect_b32 s12, 0, s20 ; SI-NEXT: s_and_b32 s16, s33, 32 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[16:17], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 5 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s18, s33, 64 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[18:19], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 6 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s22, s33, 0x80 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[22:23], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 7 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s24, s33, 0x100 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[24:25], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 8 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s26, s33, 0x200 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[26:27], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 9 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s28, s33, 0x400 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[28:29], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 10 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s30, s33, 0x800 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[30:31], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 11 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s34, s33, 0x1000 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[34:35], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 12 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s36, s33, 0x2000 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[36:37], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 13 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s38, s33, 0x4000 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[38:39], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 14 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s40, s33, 0x8000 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[40:41], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 15 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s42, s33, 0x10000 ; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[42:43], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 16 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec ; SI-NEXT: s_mov_b32 s5, s7 ; SI-NEXT: s_cselect_b32 s13, 0, s15 ; SI-NEXT: s_cselect_b32 s12, 0, s14 ; SI-NEXT: s_and_b32 s4, s33, 0x20000 ; SI-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], 0 ; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 17 ; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; SI-NEXT: s_and_b64 s[4:5], s[4:5], exec ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: s_cselect_b32 s5, 0, s15 ; SI-NEXT: s_cselect_b32 s4, 0, s14 ; SI-NEXT: s_and_b32 s8, s33, 0x40000 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[12:13], s[6:7], 18 ; SI-NEXT: s_xor_b64 s[4:5], s[10:11], s[4:5] ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s13 ; SI-NEXT: s_cselect_b32 s8, 0, s12 ; SI-NEXT: s_and_b32 s10, s33, 0x80000 ; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[10:11], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 19 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x100000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 20 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x200000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 21 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x400000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 22 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x800000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 23 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x1000000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 24 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x2000000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 25 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x4000000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 26 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x8000000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 27 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x10000000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 28 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 0x20000000 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 29 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_and_b32 s8, s33, 2.0 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 ; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 30 ; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec ; SI-NEXT: s_cselect_b32 s9, 0, s11 ; SI-NEXT: s_cselect_b32 s8, 0, s10 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; SI-NEXT: s_lshl_b64 s[6:7], s[6:7], 31 ; SI-NEXT: s_cmp_gt_i32 s33, -1 ; SI-NEXT: s_cselect_b32 s7, 0, s7 ; SI-NEXT: s_cselect_b32 s6, 0, s6 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7] ; SI-NEXT: v_mov_b32_e32 v0, s5 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_clmulh_i32: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_mov_b32 s10, s2 ; VI-NEXT: s_mov_b32 s11, s3 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_mov_b32 s8, s6 ; VI-NEXT: s_mov_b32 s9, s7 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 ; VI-NEXT: s_mov_b32 s0, s4 ; VI-NEXT: s_mov_b32 s7, 0 ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: s_mov_b32 s11, s7 ; VI-NEXT: s_mov_b32 s9, s7 ; VI-NEXT: s_mov_b32 s13, s7 ; VI-NEXT: s_mov_b32 s15, s7 ; VI-NEXT: s_mov_b32 s17, s7 ; VI-NEXT: s_mov_b32 s19, s7 ; VI-NEXT: s_mov_b32 s21, s7 ; VI-NEXT: s_mov_b32 s23, s7 ; VI-NEXT: s_mov_b32 s25, s7 ; VI-NEXT: s_mov_b32 s27, s7 ; VI-NEXT: s_mov_b32 s29, s7 ; VI-NEXT: s_mov_b32 s31, s7 ; VI-NEXT: s_mov_b32 s35, s7 ; VI-NEXT: s_mov_b32 s37, s7 ; VI-NEXT: s_mov_b32 s39, s7 ; VI-NEXT: s_mov_b32 s41, s7 ; VI-NEXT: s_mov_b32 s43, s7 ; VI-NEXT: s_mov_b32 s45, s7 ; VI-NEXT: s_mov_b32 s47, s7 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_readfirstlane_b32 s4, v1 ; VI-NEXT: v_readfirstlane_b32 s6, v0 ; VI-NEXT: s_bfe_i32 s5, s4, 0x10000 ; VI-NEXT: s_lshl_b64 s[48:49], s[6:7], 1 ; VI-NEXT: s_and_b32 s10, s4, 2 ; VI-NEXT: s_and_b32 s8, s5, s6 ; VI-NEXT: s_cmp_eq_u64 s[10:11], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s49 ; VI-NEXT: s_cselect_b32 s10, 0, s48 ; VI-NEXT: s_lshl_b64 s[48:49], s[6:7], 2 ; VI-NEXT: s_and_b32 s12, s4, 4 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s49 ; VI-NEXT: s_cselect_b32 s10, 0, s48 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 3 ; VI-NEXT: s_and_b32 s14, s4, 8 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[14:15], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 4 ; VI-NEXT: s_and_b32 s16, s4, 16 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[16:17], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 5 ; VI-NEXT: s_and_b32 s18, s4, 32 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[18:19], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 6 ; VI-NEXT: s_and_b32 s20, s4, 64 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[20:21], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 7 ; VI-NEXT: s_and_b32 s22, s4, 0x80 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[22:23], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 8 ; VI-NEXT: s_and_b32 s24, s4, 0x100 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[24:25], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 9 ; VI-NEXT: s_and_b32 s26, s4, 0x200 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[26:27], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 10 ; VI-NEXT: s_and_b32 s28, s4, 0x400 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[28:29], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 11 ; VI-NEXT: s_and_b32 s30, s4, 0x800 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[30:31], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 12 ; VI-NEXT: s_and_b32 s34, s4, 0x1000 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[34:35], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 13 ; VI-NEXT: s_and_b32 s36, s4, 0x2000 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[36:37], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 14 ; VI-NEXT: s_and_b32 s38, s4, 0x4000 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[38:39], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 15 ; VI-NEXT: s_and_b32 s40, s4, 0x8000 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[40:41], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 16 ; VI-NEXT: s_and_b32 s42, s4, 0x10000 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[42:43], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 17 ; VI-NEXT: s_and_b32 s44, s4, 0x20000 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[44:45], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 18 ; VI-NEXT: s_and_b32 s46, s4, 0x40000 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_cmp_eq_u64 s[46:47], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 19 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_and_b32 s10, s4, 0x80000 ; VI-NEXT: s_mov_b32 s11, s7 ; VI-NEXT: s_cmp_eq_u64 s[10:11], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s13 ; VI-NEXT: s_cselect_b32 s10, 0, s12 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 20 ; VI-NEXT: s_and_b32 s12, s4, 0x100000 ; VI-NEXT: s_mov_b32 s13, s7 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 21 ; VI-NEXT: s_and_b32 s12, s4, 0x200000 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 22 ; VI-NEXT: s_and_b32 s12, s4, 0x400000 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 23 ; VI-NEXT: s_and_b32 s12, s4, 0x800000 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 24 ; VI-NEXT: s_and_b32 s12, s4, 0x1000000 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 25 ; VI-NEXT: s_and_b32 s12, s4, 0x2000000 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 26 ; VI-NEXT: s_and_b32 s12, s4, 0x4000000 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 27 ; VI-NEXT: s_and_b32 s12, s4, 0x8000000 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 28 ; VI-NEXT: s_and_b32 s12, s4, 0x10000000 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 29 ; VI-NEXT: s_and_b32 s12, s4, 0x20000000 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 30 ; VI-NEXT: s_and_b32 s12, s4, 2.0 ; VI-NEXT: s_cmp_eq_u64 s[12:13], 0 ; VI-NEXT: s_cselect_b32 s11, 0, s11 ; VI-NEXT: s_cselect_b32 s10, 0, s10 ; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; VI-NEXT: s_lshl_b64 s[6:7], s[6:7], 31 ; VI-NEXT: s_cmp_gt_i32 s4, -1 ; VI-NEXT: s_cselect_b32 s5, 0, s7 ; VI-NEXT: s_cselect_b32 s4, 0, s6 ; VI-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5] ; VI-NEXT: v_mov_b32_e32 v0, s5 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: test_clmulh_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_mov_b32 s6, s2 ; GFX9-NEXT: s_mov_b32 s7, s3 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s4, s10 ; GFX9-NEXT: s_mov_b32 s5, s11 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 ; GFX9-NEXT: s_mov_b32 s0, s8 ; GFX9-NEXT: s_mov_b32 s5, 0 ; GFX9-NEXT: s_mov_b32 s11, s5 ; GFX9-NEXT: s_mov_b32 s7, s5 ; GFX9-NEXT: s_mov_b32 s13, s5 ; GFX9-NEXT: s_mov_b32 s15, s5 ; GFX9-NEXT: s_mov_b32 s17, s5 ; GFX9-NEXT: s_mov_b32 s19, s5 ; GFX9-NEXT: s_mov_b32 s21, s5 ; GFX9-NEXT: s_mov_b32 s23, s5 ; GFX9-NEXT: s_mov_b32 s25, s5 ; GFX9-NEXT: s_mov_b32 s27, s5 ; GFX9-NEXT: s_mov_b32 s29, s5 ; GFX9-NEXT: s_mov_b32 s31, s5 ; GFX9-NEXT: s_mov_b32 s35, s5 ; GFX9-NEXT: s_mov_b32 s37, s5 ; GFX9-NEXT: s_mov_b32 s39, s5 ; GFX9-NEXT: s_mov_b32 s41, s5 ; GFX9-NEXT: s_mov_b32 s43, s5 ; GFX9-NEXT: s_mov_b32 s45, s5 ; GFX9-NEXT: s_mov_b32 s47, s5 ; GFX9-NEXT: s_mov_b32 s1, s9 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s8, v1 ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_bfe_i32 s6, s8, 0x10000 ; GFX9-NEXT: s_lshl_b64 s[48:49], s[4:5], 1 ; GFX9-NEXT: s_and_b32 s10, s8, 2 ; GFX9-NEXT: s_and_b32 s6, s6, s4 ; GFX9-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s49 ; GFX9-NEXT: s_cselect_b32 s10, 0, s48 ; GFX9-NEXT: s_lshl_b64 s[48:49], s[4:5], 2 ; GFX9-NEXT: s_and_b32 s12, s8, 4 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s49 ; GFX9-NEXT: s_cselect_b32 s10, 0, s48 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 3 ; GFX9-NEXT: s_and_b32 s14, s8, 8 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[14:15], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 4 ; GFX9-NEXT: s_and_b32 s16, s8, 16 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[16:17], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 5 ; GFX9-NEXT: s_and_b32 s18, s8, 32 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[18:19], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 6 ; GFX9-NEXT: s_and_b32 s20, s8, 64 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[20:21], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 7 ; GFX9-NEXT: s_and_b32 s22, s8, 0x80 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[22:23], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 8 ; GFX9-NEXT: s_and_b32 s24, s8, 0x100 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[24:25], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 9 ; GFX9-NEXT: s_and_b32 s26, s8, 0x200 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[26:27], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 10 ; GFX9-NEXT: s_and_b32 s28, s8, 0x400 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[28:29], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 11 ; GFX9-NEXT: s_and_b32 s30, s8, 0x800 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[30:31], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 12 ; GFX9-NEXT: s_and_b32 s34, s8, 0x1000 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[34:35], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 13 ; GFX9-NEXT: s_and_b32 s36, s8, 0x2000 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[36:37], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 14 ; GFX9-NEXT: s_and_b32 s38, s8, 0x4000 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[38:39], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 15 ; GFX9-NEXT: s_and_b32 s40, s8, 0x8000 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[40:41], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 16 ; GFX9-NEXT: s_and_b32 s42, s8, 0x10000 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[42:43], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 17 ; GFX9-NEXT: s_and_b32 s44, s8, 0x20000 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[44:45], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 18 ; GFX9-NEXT: s_and_b32 s46, s8, 0x40000 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_cmp_eq_u64 s[46:47], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 19 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_and_b32 s10, s8, 0x80000 ; GFX9-NEXT: s_mov_b32 s11, s5 ; GFX9-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s13 ; GFX9-NEXT: s_cselect_b32 s10, 0, s12 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 20 ; GFX9-NEXT: s_and_b32 s12, s8, 0x100000 ; GFX9-NEXT: s_mov_b32 s13, s5 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 21 ; GFX9-NEXT: s_and_b32 s12, s8, 0x200000 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 22 ; GFX9-NEXT: s_and_b32 s12, s8, 0x400000 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 23 ; GFX9-NEXT: s_and_b32 s12, s8, 0x800000 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 24 ; GFX9-NEXT: s_and_b32 s12, s8, 0x1000000 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 25 ; GFX9-NEXT: s_and_b32 s12, s8, 0x2000000 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 26 ; GFX9-NEXT: s_and_b32 s12, s8, 0x4000000 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 27 ; GFX9-NEXT: s_and_b32 s12, s8, 0x8000000 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 28 ; GFX9-NEXT: s_and_b32 s12, s8, 0x10000000 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 29 ; GFX9-NEXT: s_and_b32 s12, s8, 0x20000000 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 30 ; GFX9-NEXT: s_and_b32 s12, s8, 2.0 ; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0 ; GFX9-NEXT: s_cselect_b32 s11, 0, s11 ; GFX9-NEXT: s_cselect_b32 s10, 0, s10 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[4:5], s[4:5], 31 ; GFX9-NEXT: s_cmp_gt_i32 s8, -1 ; GFX9-NEXT: s_cselect_b32 s5, 0, s5 ; GFX9-NEXT: s_cselect_b32 s4, 0, s4 ; GFX9-NEXT: s_xor_b64 s[4:5], s[6:7], s[4:5] ; GFX9-NEXT: v_mov_b32_e32 v0, s5 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: test_clmulh_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX10-NEXT: s_mov_b32 s6, -1 ; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; GFX10-NEXT: s_mov_b32 s10, s6 ; GFX10-NEXT: s_mov_b32 s11, s7 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_mov_b32 s8, s2 ; GFX10-NEXT: s_mov_b32 s9, s3 ; GFX10-NEXT: s_mov_b32 s3, 0 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 ; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) ; GFX10-NEXT: s_mov_b32 s11, s3 ; GFX10-NEXT: s_mov_b32 s9, s3 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_readfirstlane_b32 s4, v1 ; GFX10-NEXT: v_readfirstlane_b32 s2, v0 ; GFX10-NEXT: s_bfe_i32 s5, s4, 0x10000 ; GFX10-NEXT: s_and_b32 s10, s4, 2 ; GFX10-NEXT: s_lshl_b64 s[12:13], s[2:3], 1 ; GFX10-NEXT: s_and_b32 s8, s5, s2 ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_mov_b32 s5, s1 ; GFX10-NEXT: s_cselect_b32 s13, 0, s13 ; GFX10-NEXT: s_cselect_b32 s12, 0, s12 ; GFX10-NEXT: s_and_b32 s10, s4, 4 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 2 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 8 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 3 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 16 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 4 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 32 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 5 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 64 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 6 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x80 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 7 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x100 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 8 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x200 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 9 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x400 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 10 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x800 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 11 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x1000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 12 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x2000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 13 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x4000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 14 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x8000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 15 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x10000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 16 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x20000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 17 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x40000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 18 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x80000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 19 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x100000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 20 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x200000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 21 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x400000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 22 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x800000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 23 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x1000000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 24 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x2000000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 25 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x4000000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 26 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x8000000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 27 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x10000000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 28 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 0x20000000 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 29 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s13, 0, s15 ; GFX10-NEXT: s_cselect_b32 s12, 0, s14 ; GFX10-NEXT: s_and_b32 s10, s4, 2.0 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 30 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX10-NEXT: s_cselect_b32 s11, 0, s15 ; GFX10-NEXT: s_cselect_b32 s10, 0, s14 ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 31 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; GFX10-NEXT: s_cmp_gt_i32 s4, -1 ; GFX10-NEXT: s_mov_b32 s4, s0 ; GFX10-NEXT: s_cselect_b32 s3, 0, s3 ; GFX10-NEXT: s_cselect_b32 s2, 0, s2 ; GFX10-NEXT: s_xor_b64 s[2:3], s[8:9], s[2:3] ; GFX10-NEXT: v_mov_b32_e32 v0, s3 ; GFX10-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: test_clmulh_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-NEXT: s_mov_b32 s6, -1 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s10, s6 ; GFX11-NEXT: s_mov_b32 s11, s7 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s8, s2 ; GFX11-NEXT: s_mov_b32 s9, s3 ; GFX11-NEXT: s_mov_b32 s3, 0 ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0 ; GFX11-NEXT: s_mov_b32 s11, s3 ; GFX11-NEXT: s_mov_b32 s9, s3 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_readfirstlane_b32 s4, v1 ; GFX11-NEXT: v_readfirstlane_b32 s2, v0 ; GFX11-NEXT: s_bfe_i32 s5, s4, 0x10000 ; GFX11-NEXT: s_and_b32 s10, s4, 2 ; GFX11-NEXT: s_lshl_b64 s[12:13], s[2:3], 1 ; GFX11-NEXT: s_and_b32 s8, s5, s2 ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_mov_b32 s5, s1 ; GFX11-NEXT: s_cselect_b32 s13, 0, s13 ; GFX11-NEXT: s_cselect_b32 s12, 0, s12 ; GFX11-NEXT: s_and_b32 s10, s4, 4 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 2 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 8 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 3 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 16 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 4 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 32 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 5 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 64 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 6 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x80 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 7 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x100 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 8 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x200 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 9 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x400 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 10 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x800 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 11 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x1000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 12 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x2000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 13 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x4000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 14 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x8000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 15 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x10000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 16 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x20000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 17 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x40000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 18 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x80000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 19 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x100000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 20 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x200000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 21 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x400000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 22 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x800000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 23 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x1000000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 24 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x2000000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 25 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x4000000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 26 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x8000000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 27 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x10000000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 28 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 0x20000000 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 29 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s13, 0, s15 ; GFX11-NEXT: s_cselect_b32 s12, 0, s14 ; GFX11-NEXT: s_and_b32 s10, s4, 2.0 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 30 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 ; GFX11-NEXT: s_cselect_b32 s11, 0, s15 ; GFX11-NEXT: s_cselect_b32 s10, 0, s14 ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 31 ; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] ; GFX11-NEXT: s_cmp_gt_i32 s4, -1 ; GFX11-NEXT: s_mov_b32 s4, s0 ; GFX11-NEXT: s_cselect_b32 s3, 0, s3 ; GFX11-NEXT: s_cselect_b32 s2, 0, s2 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_xor_b64 s[2:3], s[8:9], s[2:3] ; GFX11-NEXT: v_mov_b32_e32 v0, s3 ; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0 ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: test_clmulh_i32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX12-NEXT: s_mov_b32 s6, -1 ; GFX12-NEXT: s_mov_b32 s7, 0x31016000 ; GFX12-NEXT: s_mov_b32 s10, s6 ; GFX12-NEXT: s_mov_b32 s11, s7 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_mov_b32 s8, s2 ; GFX12-NEXT: s_mov_b32 s9, s3 ; GFX12-NEXT: s_mov_b32 s3, 0 ; GFX12-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null ; GFX12-NEXT: s_mov_b32 s5, s3 ; GFX12-NEXT: s_mov_b32 s9, s3 ; GFX12-NEXT: s_mov_b32 s11, s3 ; GFX12-NEXT: s_mov_b32 s13, s3 ; GFX12-NEXT: s_mov_b32 s15, s3 ; GFX12-NEXT: s_mov_b32 s17, s3 ; GFX12-NEXT: s_mov_b32 s19, s3 ; GFX12-NEXT: s_mov_b32 s21, s3 ; GFX12-NEXT: s_mov_b32 s23, s3 ; GFX12-NEXT: s_mov_b32 s25, s3 ; GFX12-NEXT: s_mov_b32 s27, s3 ; GFX12-NEXT: s_mov_b32 s29, s3 ; GFX12-NEXT: s_mov_b32 s31, s3 ; GFX12-NEXT: s_mov_b32 s35, s3 ; GFX12-NEXT: s_mov_b32 s37, s3 ; GFX12-NEXT: s_mov_b32 s39, s3 ; GFX12-NEXT: s_mov_b32 s41, s3 ; GFX12-NEXT: s_mov_b32 s43, s3 ; GFX12-NEXT: s_mov_b32 s45, s3 ; GFX12-NEXT: s_mov_b32 s47, s3 ; GFX12-NEXT: s_mov_b32 s49, s3 ; GFX12-NEXT: s_mov_b32 s51, s3 ; GFX12-NEXT: s_mov_b32 s53, s3 ; GFX12-NEXT: s_mov_b32 s55, s3 ; GFX12-NEXT: s_mov_b32 s57, s3 ; GFX12-NEXT: s_mov_b32 s59, s3 ; GFX12-NEXT: s_mov_b32 s61, s3 ; GFX12-NEXT: s_mov_b32 s63, s3 ; GFX12-NEXT: s_mov_b32 s65, s3 ; GFX12-NEXT: s_mov_b32 s67, s3 ; GFX12-NEXT: s_mov_b32 s69, s3 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_readfirstlane_b32 s33, v1 ; GFX12-NEXT: v_readfirstlane_b32 s2, v0 ; GFX12-NEXT: s_and_b32 s4, s33, 2 ; GFX12-NEXT: s_and_b32 s8, s33, 1 ; GFX12-NEXT: s_and_b32 s10, s33, 4 ; GFX12-NEXT: s_mul_u64 s[4:5], s[2:3], s[4:5] ; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[8:9] ; GFX12-NEXT: s_and_b32 s12, s33, 8 ; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[10:11] ; GFX12-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5] ; GFX12-NEXT: s_and_b32 s14, s33, 16 ; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[12:13] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11] ; GFX12-NEXT: s_and_b32 s16, s33, 32 ; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[14:15] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13] ; GFX12-NEXT: s_and_b32 s18, s33, 64 ; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[16:17] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15] ; GFX12-NEXT: s_and_b32 s20, s33, 0x80 ; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[18:19] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX12-NEXT: s_and_b32 s22, s33, 0x100 ; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[20:21] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11] ; GFX12-NEXT: s_and_b32 s24, s33, 0x200 ; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[22:23] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13] ; GFX12-NEXT: s_and_b32 s26, s33, 0x400 ; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[24:25] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15] ; GFX12-NEXT: s_and_b32 s28, s33, 0x800 ; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[26:27] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX12-NEXT: s_and_b32 s30, s33, 0x1000 ; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[28:29] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11] ; GFX12-NEXT: s_and_b32 s34, s33, 0x2000 ; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[30:31] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13] ; GFX12-NEXT: s_and_b32 s36, s33, 0x4000 ; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[34:35] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15] ; GFX12-NEXT: s_and_b32 s38, s33, 0x8000 ; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[36:37] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX12-NEXT: s_and_b32 s40, s33, 0x10000 ; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[38:39] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11] ; GFX12-NEXT: s_and_b32 s42, s33, 0x20000 ; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[40:41] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13] ; GFX12-NEXT: s_and_b32 s44, s33, 0x40000 ; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[42:43] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15] ; GFX12-NEXT: s_and_b32 s46, s33, 0x80000 ; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[44:45] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX12-NEXT: s_and_b32 s48, s33, 0x100000 ; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[46:47] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11] ; GFX12-NEXT: s_and_b32 s50, s33, 0x200000 ; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[48:49] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13] ; GFX12-NEXT: s_and_b32 s52, s33, 0x400000 ; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[50:51] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15] ; GFX12-NEXT: s_and_b32 s54, s33, 0x800000 ; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[52:53] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX12-NEXT: s_and_b32 s56, s33, 0x1000000 ; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[54:55] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11] ; GFX12-NEXT: s_and_b32 s58, s33, 0x2000000 ; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[56:57] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13] ; GFX12-NEXT: s_and_b32 s60, s33, 0x4000000 ; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[58:59] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15] ; GFX12-NEXT: s_and_b32 s62, s33, 0x8000000 ; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[60:61] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX12-NEXT: s_and_b32 s64, s33, 0x10000000 ; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[62:63] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11] ; GFX12-NEXT: s_and_b32 s66, s33, 0x20000000 ; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[64:65] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13] ; GFX12-NEXT: s_and_b32 s68, s33, 2.0 ; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[66:67] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15] ; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[68:69] ; GFX12-NEXT: s_and_b32 s12, s33, 0x80000000 ; GFX12-NEXT: s_mov_b32 s13, s3 ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX12-NEXT: s_mul_u64 s[2:3], s[2:3], s[12:13] ; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11] ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX12-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3] ; GFX12-NEXT: s_mov_b32 s4, s0 ; GFX12-NEXT: v_mov_b32_e32 v0, s3 ; GFX12-NEXT: s_mov_b32 s5, s1 ; GFX12-NEXT: buffer_store_b32 v0, off, s[4:7], null ; GFX12-NEXT: s_endpgm ; ; GFX1250-LABEL: test_clmulh_i32: ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv ; GFX1250-NEXT: s_mov_b32 s6, -1 ; GFX1250-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1250-NEXT: s_mov_b32 s10, s6 ; GFX1250-NEXT: s_mov_b32 s11, s7 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_mov_b32 s8, s2 ; GFX1250-NEXT: s_mov_b32 s9, s3 ; GFX1250-NEXT: s_mov_b32 s3, 0 ; GFX1250-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null ; GFX1250-NEXT: s_mov_b32 s5, s3 ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_mov_b64 s[8:9], 0x80000000 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: v_readfirstlane_b32 s4, v1 ; GFX1250-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1250-NEXT: s_and_b64 s[10:11], s[4:5], 2 ; GFX1250-NEXT: s_and_b64 s[12:13], s[4:5], 1 ; GFX1250-NEXT: s_and_b64 s[14:15], s[4:5], 4 ; GFX1250-NEXT: s_mul_u64 s[10:11], s[2:3], s[10:11] ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[12:13] ; GFX1250-NEXT: s_and_b64 s[16:17], s[4:5], 8 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[14:15] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[12:13], s[10:11] ; GFX1250-NEXT: s_and_b64 s[18:19], s[4:5], 16 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[16:17] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[20:21], s[4:5], 32 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[18:19] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[22:23], s[4:5], 64 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[20:21] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[24:25], s[4:5], 0x80 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[22:23] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[26:27], s[4:5], 0x100 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[24:25] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[28:29], s[4:5], 0x200 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[26:27] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[30:31], s[4:5], 0x400 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[28:29] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[34:35], s[4:5], 0x800 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[30:31] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[36:37], s[4:5], 0x1000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[34:35] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[38:39], s[4:5], 0x2000 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[36:37] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[40:41], s[4:5], 0x4000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[38:39] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[42:43], s[4:5], 0x8000 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[40:41] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[44:45], s[4:5], 0x10000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[42:43] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[46:47], s[4:5], 0x20000 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[44:45] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[48:49], s[4:5], 0x40000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[46:47] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[50:51], s[4:5], 0x80000 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[48:49] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[52:53], s[4:5], 0x100000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[50:51] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[54:55], s[4:5], 0x200000 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[52:53] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[56:57], s[4:5], 0x400000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[54:55] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[58:59], s[4:5], 0x800000 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[56:57] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[60:61], s[4:5], 0x1000000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[58:59] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[62:63], s[4:5], 0x2000000 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[60:61] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[64:65], s[4:5], 0x4000000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[62:63] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[66:67], s[4:5], 0x8000000 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[64:65] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[68:69], s[4:5], 0x10000000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[66:67] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_and_b64 s[70:71], s[4:5], 0x20000000 ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[68:69] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[72:73], s[4:5], 0x40000000 ; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[70:71] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15] ; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[72:73] ; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] ; GFX1250-NEXT: s_xor_b64 s[8:9], s[10:11], s[14:15] ; GFX1250-NEXT: s_mul_u64 s[2:3], s[2:3], s[4:5] ; GFX1250-NEXT: s_mov_b32 s4, s0 ; GFX1250-NEXT: s_xor_b64 s[2:3], s[8:9], s[2:3] ; GFX1250-NEXT: s_mov_b32 s5, s1 ; GFX1250-NEXT: v_mov_b32_e32 v0, s3 ; GFX1250-NEXT: buffer_store_b32 v0, off, s[4:7], null ; GFX1250-NEXT: s_endpgm ; ; EG-LABEL: test_clmulh_i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] ; EG-NEXT: TEX 0 @8 ; EG-NEXT: ALU 98, @11, KC0[], KC1[] ; EG-NEXT: ALU 110, @110, KC0[], KC1[] ; EG-NEXT: ALU 13, @221, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: Fetch clause starting at 8: ; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 ; EG-NEXT: ALU clause starting at 10: ; EG-NEXT: MOV * T0.X, KC0[2].Z, ; EG-NEXT: ALU clause starting at 11: ; EG-NEXT: LSHR * T0.W, T0.X, literal.x, ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, PV.W, literal.x, ; EG-NEXT: AND_INT T0.W, T0.Y, literal.x, ; EG-NEXT: LSHR * T1.W, T0.Y, literal.y, ; EG-NEXT: 65280(9.147676e-41), 8(1.121039e-44) ; EG-NEXT: AND_INT T1.X, PS, literal.x, ; EG-NEXT: LSHR T1.Y, T0.Y, literal.y, ; EG-NEXT: LSHL T1.Z, PV.W, literal.z, ; EG-NEXT: LSHL T0.W, T0.Y, literal.y, ; EG-NEXT: AND_INT * T1.W, T0.X, literal.x, ; EG-NEXT: 65280(9.147676e-41), 24(3.363116e-44) ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) ; EG-NEXT: LSHR T2.X, T0.X, literal.x, ; EG-NEXT: LSHL T0.Y, PS, literal.y, ; EG-NEXT: LSHL T2.Z, T0.X, literal.x, ; EG-NEXT: OR_INT T0.W, PV.W, PV.Z, ; EG-NEXT: OR_INT * T1.W, PV.X, PV.Y, ; EG-NEXT: 24(3.363116e-44), 8(1.121039e-44) ; EG-NEXT: OR_INT T1.Z, PV.W, PS, ; EG-NEXT: OR_INT T0.W, PV.Z, PV.Y, ; EG-NEXT: OR_INT * T1.W, T0.Z, PV.X, ; EG-NEXT: OR_INT T0.Z, PV.W, PS, ; EG-NEXT: AND_INT T0.W, PV.Z, literal.x, ; EG-NEXT: LSHR * T1.W, PV.Z, literal.y, ; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45) ; EG-NEXT: AND_INT T0.Y, PS, literal.x, ; EG-NEXT: LSHL T1.Z, PV.W, literal.y, ; EG-NEXT: AND_INT T0.W, PV.Z, literal.x, ; EG-NEXT: LSHR * T1.W, PV.Z, literal.y, ; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45) ; EG-NEXT: AND_INT T0.Z, PS, literal.x, ; EG-NEXT: LSHL T0.W, PV.W, literal.y, ; EG-NEXT: OR_INT * T1.W, PV.Y, PV.Z, ; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45) ; EG-NEXT: AND_INT T1.Z, PS, literal.x, ; EG-NEXT: LSHR T1.W, PS, literal.y, ; EG-NEXT: OR_INT * T0.W, PV.Z, PV.W, ; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45) ; EG-NEXT: AND_INT T0.Y, PS, literal.x, ; EG-NEXT: LSHR T0.Z, PS, literal.y, ; EG-NEXT: AND_INT T0.W, PV.W, literal.x, ; EG-NEXT: LSHL * T1.W, PV.Z, literal.y, ; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45) ; EG-NEXT: OR_INT T1.Z, PV.W, PS, ; EG-NEXT: AND_INT T0.W, PV.Z, literal.x, ; EG-NEXT: LSHL * T1.W, PV.Y, literal.y, ; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45) ; EG-NEXT: OR_INT T0.Z, PV.W, PS, ; EG-NEXT: AND_INT T0.W, PV.Z, literal.x, ; EG-NEXT: LSHR * T1.W, PV.Z, 1, ; EG-NEXT: 1431655765(1.466015e+13), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Y, PS, literal.x, ; EG-NEXT: LSHL T1.Z, PV.W, 1, ; EG-NEXT: AND_INT T0.W, PV.Z, literal.x, ; EG-NEXT: LSHR * T1.W, PV.Z, 1, ; EG-NEXT: 1431655765(1.466015e+13), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, PS, literal.x, ; EG-NEXT: LSHL T0.W, PV.W, 1, ; EG-NEXT: OR_INT * T1.W, PV.Y, PV.Z, ; EG-NEXT: 1431655765(1.466015e+13), 0(0.000000e+00) ; EG-NEXT: AND_INT T2.W, PS, 1, ; EG-NEXT: OR_INT * T0.W, PV.Z, PV.W, ; EG-NEXT: AND_INT T3.W, T1.W, literal.x, ; EG-NEXT: MULLO_INT * T0.X, PS, PV.W, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; EG-NEXT: AND_INT T2.W, T1.W, literal.x, ; EG-NEXT: MULLO_INT * T0.Y, T0.W, PV.W, ; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, T0.X, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.W, ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T2.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T2.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T2.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T2.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 128(1.793662e-43), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T2.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 256(3.587324e-43), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T2.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 512(7.174648e-43), 0(0.000000e+00) ; EG-NEXT: AND_INT * T1.Z, T1.W, literal.x, ; EG-NEXT: 1024(1.434930e-42), 0(0.000000e+00) ; EG-NEXT: ALU clause starting at 110: ; EG-NEXT: XOR_INT T3.W, T2.W, T0.X, ; EG-NEXT: MULLO_INT * T0.X, T0.W, T0.Z, ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, T1.Z, ; EG-NEXT: 2048(2.869859e-42), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 4096(5.739719e-42), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 8192(1.147944e-41), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 16384(2.295887e-41), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 32768(4.591775e-41), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 65536(9.183550e-41), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 131072(1.836710e-40), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 262144(3.673420e-40), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 524288(7.346840e-40), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 1048576(1.469368e-39), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 2097152(2.938736e-39), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 4194304(5.877472e-39), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 16777216(2.350989e-38), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T3.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 33554432(9.403955e-38), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T4.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 67108864(1.504633e-36), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T4.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 134217728(3.851860e-34), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T4.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 268435456(2.524355e-29), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T4.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 536870912(1.084202e-19), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T4.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: 1073741824(2.000000e+00), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.Z, T1.W, literal.x, ; EG-NEXT: XOR_INT T1.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: -2147483648(-0.000000e+00), 0(0.000000e+00) ; EG-NEXT: XOR_INT T1.W, PV.W, PS, ; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z, ; EG-NEXT: LSHR T0.Z, T3.W, literal.x, ; EG-NEXT: XOR_INT T0.W, PV.W, PS, ; EG-NEXT: AND_INT * T1.W, T3.W, literal.y, ; EG-NEXT: 8(1.121039e-44), 65280(9.147676e-41) ; EG-NEXT: LSHL T0.Y, PS, literal.x, ; EG-NEXT: LSHL T1.Z, T2.W, literal.y, ; EG-NEXT: LSHR T0.W, PV.W, literal.y, ; EG-NEXT: AND_INT * T1.W, PV.Z, literal.z, ; EG-NEXT: 8(1.121039e-44), 24(3.363116e-44) ; EG-NEXT: 65280(9.147676e-41), 0(0.000000e+00) ; EG-NEXT: OR_INT T0.W, PS, PV.W, ; EG-NEXT: OR_INT * T1.W, PV.Z, PV.Y, ; EG-NEXT: OR_INT * T0.W, PS, PV.W, ; EG-NEXT: AND_INT T1.W, PV.W, literal.x, ; EG-NEXT: LSHR * T0.W, PV.W, literal.y, ; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45) ; EG-NEXT: AND_INT T0.W, PS, literal.x, ; EG-NEXT: LSHL * T1.W, PV.W, literal.y, ; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45) ; EG-NEXT: OR_INT * T0.W, PV.W, PS, ; EG-NEXT: AND_INT T1.W, PV.W, literal.x, ; EG-NEXT: LSHR * T0.W, PV.W, literal.y, ; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45) ; EG-NEXT: ALU clause starting at 221: ; EG-NEXT: AND_INT T0.W, T0.W, literal.x, ; EG-NEXT: LSHL * T1.W, T1.W, literal.y, ; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45) ; EG-NEXT: OR_INT * T0.W, PV.W, PS, ; EG-NEXT: AND_INT T1.W, PV.W, literal.x, ; EG-NEXT: LSHR * T0.W, PV.W, 1, ; EG-NEXT: 1431655765(1.466015e+13), 0(0.000000e+00) ; EG-NEXT: AND_INT T0.W, PS, literal.x, ; EG-NEXT: LSHL * T1.W, PV.W, 1, ; EG-NEXT: 1431655764(1.466015e+13), 0(0.000000e+00) ; EG-NEXT: OR_INT * T0.W, PV.W, PS, ; EG-NEXT: LSHR T0.X, PV.W, 1, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 %a = load i32, ptr addrspace(1) %in %b = load i32, ptr addrspace(1) %b_ptr %a.ext = zext i32 %a to i64 %b.ext = zext i32 %b to i64 %clmul = call i64 @llvm.clmul.i64(i64 %a.ext, i64 %b.ext) %res.ext = lshr i64 %clmul, 32 %res = trunc i64 %res.ext to i32 store i32 %res, ptr addrspace(1) %out ret void } declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone}