diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/mul.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/mul.ll | 434 |
1 files changed, 428 insertions, 6 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/mul.ll b/llvm/test/CodeGen/AMDGPU/mul.ll index b5e7589..8d3716e 100644 --- a/llvm/test/CodeGen/AMDGPU/mul.ll +++ b/llvm/test/CodeGen/AMDGPU/mul.ll @@ -1,10 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=verde < %s | FileCheck -check-prefixes=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX10 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX12 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX1250 %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG %s ; mul24 and mad24 are affected @@ -124,6 +125,25 @@ define amdgpu_kernel void @test_mul_v2i32(ptr addrspace(1) %out, ptr addrspace(1 ; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[4:7], null ; GFX12-NEXT: s_endpgm ; +; GFX1250-LABEL: test_mul_v2i32: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s6, -1 +; GFX1250-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-NEXT: s_mov_b32 s10, s6 +; GFX1250-NEXT: s_mov_b32 s11, s7 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_mov_b32 s8, s2 +; GFX1250-NEXT: s_mov_b32 s9, s3 +; GFX1250-NEXT: s_mov_b32 s4, s0 +; GFX1250-NEXT: buffer_load_b128 v[0:3], off, s[8:11], null +; GFX1250-NEXT: s_mov_b32 s5, s1 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: v_mul_lo_u32 v1, v1, v3 +; GFX1250-NEXT: v_mul_lo_u32 v0, v0, v2 +; GFX1250-NEXT: buffer_store_b64 v[0:1], off, s[4:7], null +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: test_mul_v2i32: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] @@ -286,6 +306,29 @@ define amdgpu_kernel void @v_mul_v4i32(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX12-NEXT: buffer_store_b128 v[0:3], off, s[4:7], null ; GFX12-NEXT: s_endpgm ; +; GFX1250-LABEL: v_mul_v4i32: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s6, -1 +; GFX1250-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-NEXT: s_mov_b32 s10, s6 +; GFX1250-NEXT: s_mov_b32 s11, s7 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_mov_b32 s8, s2 +; GFX1250-NEXT: s_mov_b32 s9, s3 +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: buffer_load_b128 v[0:3], off, s[8:11], null +; GFX1250-NEXT: buffer_load_b128 v[4:7], off, s[8:11], null offset:16 +; GFX1250-NEXT: s_mov_b32 s4, s0 +; GFX1250-NEXT: s_mov_b32 s5, s1 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: v_mul_lo_u32 v3, v3, v7 +; GFX1250-NEXT: v_mul_lo_u32 v2, v2, v6 +; GFX1250-NEXT: v_mul_lo_u32 v1, v1, v5 +; GFX1250-NEXT: v_mul_lo_u32 v0, v0, v4 +; GFX1250-NEXT: buffer_store_b128 v[0:3], off, s[4:7], null +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: v_mul_v4i32: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] @@ -402,6 +445,19 @@ define amdgpu_kernel void @s_trunc_i64_mul_to_i32(ptr addrspace(1) %out, i64 %a, ; GFX12-NEXT: buffer_store_b32 v0, off, s[0:3], null ; GFX12-NEXT: s_endpgm ; +; GFX1250-LABEL: s_trunc_i64_mul_to_i32: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x34 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_mul_i32 s2, s3, s2 +; GFX1250-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-NEXT: s_mov_b32 s2, -1 +; GFX1250-NEXT: buffer_store_b32 v0, off, s[0:3], null +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: s_trunc_i64_mul_to_i32: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] @@ -555,6 +611,29 @@ define amdgpu_kernel void @v_trunc_i64_mul_to_i32(ptr addrspace(1) %out, ptr add ; GFX12-NEXT: buffer_store_b32 v0, off, s[8:11], null ; GFX12-NEXT: s_endpgm ; +; GFX1250-LABEL: v_trunc_i64_mul_to_i32: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1250-NEXT: s_mov_b32 s10, -1 +; GFX1250-NEXT: s_mov_b32 s11, 0x31016000 +; GFX1250-NEXT: s_mov_b32 s14, s10 +; GFX1250-NEXT: s_mov_b32 s15, s11 +; GFX1250-NEXT: s_mov_b32 s6, s10 +; GFX1250-NEXT: s_mov_b32 s7, s11 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_mov_b32 s12, s2 +; GFX1250-NEXT: s_mov_b32 s13, s3 +; GFX1250-NEXT: buffer_load_b32 v0, off, s[12:15], null +; GFX1250-NEXT: buffer_load_b32 v1, off, s[4:7], null +; GFX1250-NEXT: s_mov_b32 s8, s0 +; GFX1250-NEXT: s_mov_b32 s9, s1 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: v_mul_lo_u32 v0, v1, v0 +; GFX1250-NEXT: buffer_store_b32 v0, off, s[8:11], null +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: v_trunc_i64_mul_to_i32: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[] @@ -670,6 +749,19 @@ define amdgpu_kernel void @mul64_sext_c(ptr addrspace(1) %out, i32 %in) { ; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null ; GFX12-NEXT: s_endpgm ; +; GFX1250-LABEL: mul64_sext_c: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_ashr_i32 s3, s2, 31 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: s_mul_u64 s[4:5], s[2:3], 0x50 +; GFX1250-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[4:5] +; GFX1250-NEXT: s_mov_b32 s2, -1 +; GFX1250-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: mul64_sext_c: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[] @@ -773,6 +865,18 @@ define amdgpu_kernel void @mul64_zext_c(ptr addrspace(1) %out, i32 %in) { ; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null ; GFX12-NEXT: s_endpgm ; +; GFX1250-LABEL: mul64_zext_c: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s3, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_mul_u64 s[4:5], s[2:3], 0x50 +; GFX1250-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[4:5] +; GFX1250-NEXT: s_mov_b32 s2, -1 +; GFX1250-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: mul64_zext_c: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[] @@ -909,6 +1013,26 @@ define amdgpu_kernel void @v_mul64_sext_c(ptr addrspace(1) %out, ptr addrspace(1 ; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[4:7], null ; GFX12-NEXT: s_endpgm ; +; GFX1250-LABEL: v_mul64_sext_c: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s6, -1 +; GFX1250-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-NEXT: s_mov_b32 s10, s6 +; GFX1250-NEXT: s_mov_b32 s11, s7 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_mov_b32 s8, s2 +; GFX1250-NEXT: s_mov_b32 s9, s3 +; GFX1250-NEXT: s_mov_b32 s4, s0 +; GFX1250-NEXT: buffer_load_b32 v0, off, s[8:11], null +; GFX1250-NEXT: s_mov_b32 s5, s1 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_u64_e32 v[0:1], 0x50, v[0:1] +; GFX1250-NEXT: buffer_store_b64 v[0:1], off, s[4:7], null +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: v_mul64_sext_c: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] @@ -1052,6 +1176,25 @@ define amdgpu_kernel void @v_mul64_zext_c(ptr addrspace(1) %out, ptr addrspace(1 ; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[4:7], null ; GFX12-NEXT: s_endpgm ; +; GFX1250-LABEL: v_mul64_zext_c: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s6, -1 +; GFX1250-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-NEXT: s_mov_b32 s10, s6 +; GFX1250-NEXT: s_mov_b32 s11, s7 +; GFX1250-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_mov_b32 s8, s2 +; GFX1250-NEXT: s_mov_b32 s9, s3 +; GFX1250-NEXT: s_mov_b32 s4, s0 +; GFX1250-NEXT: buffer_load_b32 v0, off, s[8:11], null +; GFX1250-NEXT: s_mov_b32 s5, s1 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: v_mul_u64_e32 v[0:1], 0x50, v[0:1] +; GFX1250-NEXT: buffer_store_b64 v[0:1], off, s[4:7], null +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: v_mul64_zext_c: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] @@ -1192,6 +1335,26 @@ define amdgpu_kernel void @v_mul64_sext_inline_imm(ptr addrspace(1) %out, ptr ad ; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[4:7], null ; GFX12-NEXT: s_endpgm ; +; GFX1250-LABEL: v_mul64_sext_inline_imm: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s6, -1 +; GFX1250-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-NEXT: s_mov_b32 s10, s6 +; GFX1250-NEXT: s_mov_b32 s11, s7 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_mov_b32 s8, s2 +; GFX1250-NEXT: s_mov_b32 s9, s3 +; GFX1250-NEXT: s_mov_b32 s4, s0 +; GFX1250-NEXT: buffer_load_b32 v0, off, s[8:11], null +; GFX1250-NEXT: s_mov_b32 s5, s1 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_u64_e32 v[0:1], 9, v[0:1] +; GFX1250-NEXT: buffer_store_b64 v[0:1], off, s[4:7], null +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: v_mul64_sext_inline_imm: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] @@ -1300,6 +1463,20 @@ define amdgpu_kernel void @s_mul_i32(ptr addrspace(1) %out, [8 x i32], i32 %a, [ ; GFX12-NEXT: buffer_store_b32 v0, off, s[0:3], null ; GFX12-NEXT: s_endpgm ; +; GFX1250-LABEL: s_mul_i32: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_clause 0x2 +; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4c +; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x70 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_mul_i32 s2, s2, s3 +; GFX1250-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-NEXT: s_mov_b32 s2, -1 +; GFX1250-NEXT: buffer_store_b32 v0, off, s[0:3], null +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: s_mul_i32: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] @@ -1425,6 +1602,24 @@ define amdgpu_kernel void @v_mul_i32(ptr addrspace(1) %out, ptr addrspace(1) %in ; GFX12-NEXT: buffer_store_b32 v0, off, s[4:7], null ; GFX12-NEXT: s_endpgm ; +; GFX1250-LABEL: v_mul_i32: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s6, -1 +; GFX1250-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-NEXT: s_mov_b32 s10, s6 +; GFX1250-NEXT: s_mov_b32 s11, s7 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_mov_b32 s8, s2 +; GFX1250-NEXT: s_mov_b32 s9, s3 +; GFX1250-NEXT: s_mov_b32 s4, s0 +; GFX1250-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null +; GFX1250-NEXT: s_mov_b32 s5, s1 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX1250-NEXT: buffer_store_b32 v0, off, s[4:7], null +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: v_mul_i32: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] @@ -1540,6 +1735,22 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8 ; GFX12-NEXT: buffer_store_b8 v0, off, s[0:3], null ; GFX12-NEXT: s_endpgm ; +; GFX1250-LABEL: s_mul_i1: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_clause 0x2 +; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4c +; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x70 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_and_b32 s2, s2, s3 +; GFX1250-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-NEXT: s_and_b32 s2, s2, 1 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-NEXT: s_mov_b32 s2, -1 +; GFX1250-NEXT: buffer_store_b8 v0, off, s[0:3], null +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: s_mul_i1: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 0, @10, KC0[], KC1[] @@ -1699,6 +1910,28 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in) ; GFX12-NEXT: buffer_store_b8 v0, off, s[4:7], null ; GFX12-NEXT: s_endpgm ; +; GFX1250-LABEL: v_mul_i1: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s6, -1 +; GFX1250-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-NEXT: s_mov_b32 s10, s6 +; GFX1250-NEXT: s_mov_b32 s11, s7 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_mov_b32 s8, s2 +; GFX1250-NEXT: s_mov_b32 s9, s3 +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: buffer_load_u8 v0, off, s[8:11], null +; GFX1250-NEXT: buffer_load_u8 v1, off, s[8:11], null offset:4 +; GFX1250-NEXT: s_mov_b32 s4, s0 +; GFX1250-NEXT: s_mov_b32 s5, s1 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: v_and_b32_e32 v0, v0, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX1250-NEXT: buffer_store_b8 v0, off, s[4:7], null +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: v_mul_i1: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] @@ -1856,6 +2089,19 @@ define amdgpu_kernel void @s_mul_i64(ptr addrspace(1) %out, i64 %a, i64 %b) noun ; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null ; GFX12-NEXT: s_endpgm ; +; GFX1250-LABEL: s_mul_i64: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_mul_u64 s[4:5], s[2:3], s[4:5] +; GFX1250-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[4:5] +; GFX1250-NEXT: s_mov_b32 s2, -1 +; GFX1250-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: s_mul_i64: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] @@ -2044,6 +2290,29 @@ define amdgpu_kernel void @v_mul_i64(ptr addrspace(1) %out, ptr addrspace(1) %ap ; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null ; GFX12-NEXT: s_endpgm ; +; GFX1250-LABEL: v_mul_i64: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1250-NEXT: s_mov_b32 s10, -1 +; GFX1250-NEXT: s_mov_b32 s11, 0x31016000 +; GFX1250-NEXT: s_mov_b32 s14, s10 +; GFX1250-NEXT: s_mov_b32 s15, s11 +; GFX1250-NEXT: s_mov_b32 s6, s10 +; GFX1250-NEXT: s_mov_b32 s7, s11 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_mov_b32 s12, s2 +; GFX1250-NEXT: s_mov_b32 s13, s3 +; GFX1250-NEXT: buffer_load_b64 v[0:1], off, s[12:15], null +; GFX1250-NEXT: buffer_load_b64 v[2:3], off, s[4:7], null +; GFX1250-NEXT: s_mov_b32 s8, s0 +; GFX1250-NEXT: s_mov_b32 s9, s1 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: v_mul_u64_e32 v[0:1], v[0:1], v[2:3] +; GFX1250-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: v_mul_i64: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[] @@ -2286,6 +2555,41 @@ define amdgpu_kernel void @mul32_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX12-NEXT: buffer_store_b32 v0, off, s[0:3], null ; GFX12-NEXT: s_endpgm ; +; GFX1250-LABEL: mul32_in_branch: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x34 +; GFX1250-NEXT: s_mov_b32 s6, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_cmp_lg_u32 s0, 0 +; GFX1250-NEXT: s_cbranch_scc0 .LBB15_2 +; GFX1250-NEXT: ; %bb.1: ; %else +; GFX1250-NEXT: s_mul_i32 s7, s0, s1 +; GFX1250-NEXT: s_branch .LBB15_3 +; GFX1250-NEXT: .LBB15_2: +; GFX1250-NEXT: s_mov_b32 s6, -1 +; GFX1250-NEXT: ; implicit-def: $sgpr7 +; GFX1250-NEXT: .LBB15_3: ; %Flow +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s6 +; GFX1250-NEXT: s_cbranch_vccnz .LBB15_5 +; GFX1250-NEXT: ; %bb.4: ; %if +; GFX1250-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-NEXT: s_mov_b32 s6, -1 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_mov_b32 s4, s2 +; GFX1250-NEXT: s_mov_b32 s5, s3 +; GFX1250-NEXT: buffer_load_b32 v0, off, s[4:7], null +; GFX1250-NEXT: s_branch .LBB15_6 +; GFX1250-NEXT: .LBB15_5: +; GFX1250-NEXT: v_mov_b32_e32 v0, s7 +; GFX1250-NEXT: .LBB15_6: ; %endif +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-NEXT: s_mov_b32 s2, -1 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: buffer_store_b32 v0, off, s[0:3], null +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: mul32_in_branch: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU_PUSH_BEFORE 3, @14, KC0[CB0:0-32], KC1[] @@ -2539,6 +2843,34 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null ; GFX12-NEXT: s_endpgm ; +; GFX1250-LABEL: mul64_in_branch: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_cmp_lg_u64 s[4:5], 0 +; GFX1250-NEXT: s_cbranch_scc0 .LBB16_3 +; GFX1250-NEXT: ; %bb.1: ; %else +; GFX1250-NEXT: s_mul_u64 s[4:5], s[4:5], s[6:7] +; GFX1250-NEXT: s_cbranch_execnz .LBB16_4 +; GFX1250-NEXT: .LBB16_2: ; %if +; GFX1250-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-NEXT: s_mov_b32 s6, -1 +; GFX1250-NEXT: s_mov_b32 s4, s2 +; GFX1250-NEXT: s_mov_b32 s5, s3 +; GFX1250-NEXT: buffer_load_b64 v[0:1], off, s[4:7], null +; GFX1250-NEXT: s_branch .LBB16_5 +; GFX1250-NEXT: .LBB16_3: +; GFX1250-NEXT: ; implicit-def: $sgpr4_sgpr5 +; GFX1250-NEXT: s_branch .LBB16_2 +; GFX1250-NEXT: .LBB16_4: +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[4:5] +; GFX1250-NEXT: .LBB16_5: ; %endif +; GFX1250-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-NEXT: s_mov_b32 s2, -1 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: mul64_in_branch: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU_PUSH_BEFORE 4, @14, KC0[CB0:0-32], KC1[] @@ -2882,6 +3214,52 @@ define amdgpu_kernel void @s_mul_i128(ptr addrspace(1) %out, [8 x i32], i128 %a, ; GFX12-NEXT: buffer_store_b128 v[0:3], off, s[0:3], null ; GFX12-NEXT: s_endpgm ; +; GFX1250-LABEL: s_mul_i128: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_clause 0x2 +; GFX1250-NEXT: s_load_b128 s[8:11], s[4:5], 0x7c +; GFX1250-NEXT: s_load_b128 s[12:15], s[4:5], 0x4c +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_mov_b64 s[4:5], lit64(0xffffffff) +; GFX1250-NEXT: s_mov_b32 s3, 0 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: s_mov_b32 s7, s3 +; GFX1250-NEXT: s_mov_b32 s17, s3 +; GFX1250-NEXT: s_mov_b32 s19, s3 +; GFX1250-NEXT: s_mov_b32 s20, s3 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_mov_b32 s2, s8 +; GFX1250-NEXT: s_and_b64 s[4:5], s[12:13], s[4:5] +; GFX1250-NEXT: s_mov_b32 s6, s13 +; GFX1250-NEXT: s_mul_u64 s[10:11], s[10:11], s[12:13] +; GFX1250-NEXT: s_mul_u64 s[12:13], s[4:5], s[2:3] +; GFX1250-NEXT: s_mov_b32 s16, s9 +; GFX1250-NEXT: s_mul_u64 s[8:9], s[8:9], s[14:15] +; GFX1250-NEXT: s_mul_u64 s[14:15], s[6:7], s[2:3] +; GFX1250-NEXT: s_mov_b32 s2, s13 +; GFX1250-NEXT: s_mul_u64 s[4:5], s[4:5], s[16:17] +; GFX1250-NEXT: s_add_nc_u64 s[14:15], s[14:15], s[2:3] +; GFX1250-NEXT: s_mul_u64 s[6:7], s[6:7], s[16:17] +; GFX1250-NEXT: s_mov_b32 s2, s15 +; GFX1250-NEXT: s_mov_b32 s15, s3 +; GFX1250-NEXT: s_mov_b32 s13, s3 +; GFX1250-NEXT: s_add_nc_u64 s[4:5], s[4:5], s[14:15] +; GFX1250-NEXT: s_add_nc_u64 s[8:9], s[10:11], s[8:9] +; GFX1250-NEXT: s_mov_b32 s18, s5 +; GFX1250-NEXT: s_mov_b32 s21, s4 +; GFX1250-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[18:19] +; GFX1250-NEXT: s_or_b64 s[4:5], s[12:13], s[20:21] +; GFX1250-NEXT: s_add_nc_u64 s[2:3], s[6:7], s[2:3] +; GFX1250-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GFX1250-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[8:9] +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX1250-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-NEXT: s_mov_b32 s2, -1 +; GFX1250-NEXT: buffer_store_b128 v[0:3], off, s[0:3], null +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: s_mul_i128: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 41, @4, KC0[CB0:0-32], KC1[] @@ -3159,6 +3537,43 @@ define amdgpu_kernel void @v_mul_i128(ptr addrspace(1) %out, ptr addrspace(1) %a ; GFX12-NEXT: global_store_b128 v13, v[8:11], s[2:3] ; GFX12-NEXT: s_endpgm ; +; GFX1250-LABEL: v_mul_i128: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c +; GFX1250-NEXT: v_and_b32_e32 v16, 0x3ff, v0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: global_load_b128 v[0:3], v16, s[2:3] scale_offset +; GFX1250-NEXT: global_load_b128 v[4:7], v16, s[0:1] scale_offset +; GFX1250-NEXT: s_wait_loadcnt 0x1 +; GFX1250-NEXT: v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v10, v0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX1250-NEXT: v_dual_mov_b32 v9, v11 :: v_dual_mov_b32 v8, v4 +; GFX1250-NEXT: v_mul_u64_e32 v[6:7], v[0:1], v[6:7] +; GFX1250-NEXT: v_mul_lo_u32 v3, v3, v4 +; GFX1250-NEXT: v_mul_u64_e32 v[8:9], v[8:9], v[10:11] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1250-NEXT: v_mad_co_u64_u32 v[6:7], null, v2, v4, v[6:7] +; GFX1250-NEXT: v_mul_lo_u32 v2, v2, v5 +; GFX1250-NEXT: v_mov_b32_e32 v10, v9 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-NEXT: v_mad_co_u64_u32 v[12:13], null, v5, v0, v[10:11] +; GFX1250-NEXT: v_add3_u32 v7, v3, v7, v2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_dual_mov_b32 v10, v13 :: v_dual_mov_b32 v13, v11 +; GFX1250-NEXT: v_mad_co_u64_u32 v[12:13], null, v4, v1, v[12:13] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_dual_mov_b32 v15, v11 :: v_dual_mov_b32 v9, v12 +; GFX1250-NEXT: v_mov_b32_e32 v14, v13 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_add_nc_u64_e32 v[10:11], v[10:11], v[14:15] +; GFX1250-NEXT: v_mad_co_u64_u32 v[0:1], null, v5, v1, v[10:11] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_add_nc_u64_e32 v[10:11], v[0:1], v[6:7] +; GFX1250-NEXT: global_store_b128 v16, v[8:11], s[2:3] scale_offset +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: v_mul_i128: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 3, @10, KC0[CB0:0-32], KC1[] @@ -3271,6 +3686,13 @@ define i32 @mul_pow2_plus_1(i32 %val) { ; GFX12-NEXT: v_lshl_add_u32 v0, v0, 3, v0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; +; GFX1250-LABEL: mul_pow2_plus_1: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_lshl_add_u32 v0, v0, 3, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +; ; EG-LABEL: mul_pow2_plus_1: ; EG: ; %bb.0: ; EG-NEXT: CF_END |