diff options
Diffstat (limited to 'llvm/test')
38 files changed, 2334 insertions, 317 deletions
diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt index 2a6135d..3426b6f 100644 --- a/llvm/test/CMakeLists.txt +++ b/llvm/test/CMakeLists.txt @@ -97,6 +97,7 @@ set(LLVM_TEST_DEPENDS llvm-exegesis llvm-extract llvm-gsymutil + llvm-ir2vec llvm-isel-fuzzer llvm-ifs llvm-install-name-tool diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir index cebdffc..eba64b8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir @@ -223,37 +223,37 @@ body: | ; GFX7-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>)) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX9-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>)) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX10-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>)) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX11-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>)) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX12-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX12-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>)) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir index eafc96d..474f130 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir @@ -252,30 +252,30 @@ body: | ; GFX7-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>), addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX7-FLAT-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX9-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (<2 x s32>), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX10-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (<2 x s32>), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir index 2675295..ae010a8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir @@ -22,6 +22,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s32)) + ; ; GFX9-LABEL: name: atomic_store_flat_s32_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -51,6 +52,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 ; GFX7-NEXT: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p0) :: (store seq_cst (<2 x s16>)) + ; ; GFX9-LABEL: name: atomic_store_flat_v2s16_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -80,6 +82,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 ; GFX7-NEXT: G_STORE [[COPY]](p3), [[COPY1]](p0) :: (store seq_cst (p3)) + ; ; GFX9-LABEL: name: atomic_store_flat_p3_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -109,6 +112,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 ; GFX7-NEXT: G_STORE [[COPY]](p5), [[COPY1]](p0) :: (store seq_cst (p5)) + ; ; GFX9-LABEL: name: atomic_store_flat_p5_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -138,6 +142,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 ; GFX7-NEXT: G_STORE [[COPY]](p6), [[COPY1]](p0) :: (store seq_cst (p6)) + ; ; GFX9-LABEL: name: atomic_store_flat_p6_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -167,6 +172,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s64)) + ; ; GFX9-LABEL: name: atomic_store_flat_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -193,15 +199,16 @@ body: | ; GFX7-LABEL: name: atomic_store_flat_v2s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p0) :: (store seq_cst (<2 x s32>)) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (<2 x s32>)) + ; ; GFX9-LABEL: name: atomic_store_flat_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p0) :: (store seq_cst (<2 x s32>)) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (<2 x s32>)) %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = COPY $vgpr2_vgpr3 G_STORE %0, %1 :: (store seq_cst (<2 x s32>), align 8, addrspace 0) @@ -225,6 +232,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 ; GFX7-NEXT: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p0) :: (store seq_cst (<4 x s16>)) + ; ; GFX9-LABEL: name: atomic_store_flat_v4s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -254,6 +262,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 ; GFX7-NEXT: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store seq_cst (p0)) + ; ; GFX9-LABEL: name: atomic_store_flat_p0_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -282,6 +291,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 ; GFX7-NEXT: G_STORE [[COPY]](p1), [[COPY1]](p0) :: (store seq_cst (p1)) + ; ; GFX9-LABEL: name: atomic_store_flat_p1_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll b/llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll index f0988a1..f54fbba 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll @@ -998,24 +998,11 @@ define amdgpu_ps <2 x half> @flat_load_saddr_p3_immneg128(ptr inreg %sbase, i32 } define amdgpu_ps <2 x float> @flat_load_saddr_f64(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_f64: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_f64: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_f64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load double, ptr %gep0 @@ -1024,24 +1011,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_f64(ptr inreg %sbase, i32 %voffset } define amdgpu_ps <2 x float> @flat_load_saddr_f64_immneg128(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_f64_immneg128: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_f64_immneg128: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_f64_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1051,24 +1025,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_f64_immneg128(ptr inreg %sbase, i3 } define amdgpu_ps <2 x float> @flat_load_saddr_i64(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_i64: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_i64: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load i64, ptr %gep0 @@ -1077,24 +1038,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_i64(ptr inreg %sbase, i32 %voffset } define amdgpu_ps <2 x float> @flat_load_saddr_i64_immneg128(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_i64_immneg128: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_i64_immneg128: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_i64_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1104,24 +1052,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_i64_immneg128(ptr inreg %sbase, i3 } define amdgpu_ps <2 x float> @flat_load_saddr_v2f32(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_v2f32: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_v2f32: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_v2f32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load <2 x float>, ptr %gep0 @@ -1129,24 +1064,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v2f32(ptr inreg %sbase, i32 %voffs } define amdgpu_ps <2 x float> @flat_load_saddr_v2f32_immneg128(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_v2f32_immneg128: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_v2f32_immneg128: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_v2f32_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1155,24 +1077,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v2f32_immneg128(ptr inreg %sbase, } define amdgpu_ps <2 x float> @flat_load_saddr_v2i32(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_v2i32: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_v2i32: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_v2i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load <2 x i32>, ptr %gep0 @@ -1181,24 +1090,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v2i32(ptr inreg %sbase, i32 %voffs } define amdgpu_ps <2 x float> @flat_load_saddr_v2i32_immneg128(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_v2i32_immneg128: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_v2i32_immneg128: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_v2i32_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1208,24 +1104,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v2i32_immneg128(ptr inreg %sbase, } define amdgpu_ps <2 x float> @flat_load_saddr_v4i16(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_v4i16: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_v4i16: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_v4i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load <4 x i16>, ptr %gep0 @@ -1234,24 +1117,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v4i16(ptr inreg %sbase, i32 %voffs } define amdgpu_ps <2 x float> @flat_load_saddr_v4i16_immneg128(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_v4i16_immneg128: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_v4i16_immneg128: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_v4i16_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1261,24 +1131,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v4i16_immneg128(ptr inreg %sbase, } define amdgpu_ps <2 x float> @flat_load_saddr_v4f16(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_v4f16: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_v4f16: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_v4f16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load <4 x half>, ptr %gep0 @@ -1287,24 +1144,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v4f16(ptr inreg %sbase, i32 %voffs } define amdgpu_ps <2 x float> @flat_load_saddr_v4f16_immneg128(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_v4f16_immneg128: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_v4f16_immneg128: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_v4f16_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1314,24 +1158,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v4f16_immneg128(ptr inreg %sbase, } define amdgpu_ps <2 x float> @flat_load_saddr_p1(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_p1: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_p1: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_p1: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load ptr, ptr %gep0 @@ -1341,24 +1172,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_p1(ptr inreg %sbase, i32 %voffset) } define amdgpu_ps <2 x float> @flat_load_saddr_p1_immneg128(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_p1_immneg128: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_p1_immneg128: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_p1_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-wmma-xdl.mir b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-wmma-xdl.mir index 7c3170d..0abf347 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-wmma-xdl.mir +++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-wmma-xdl.mir @@ -65,20 +65,3 @@ body: | $vgpr12 = V_EXP_F32_e32 $vgpr12, implicit $exec, implicit $mode $vgpr13 = V_ADD_U32_e32 $vgpr13, $vgpr8, implicit $exec ... - ---- -name: dot_xdl_dep_2 -tracksRegLiveness: true -body: | - bb.0: - ; CHECK-LABEL: {{^}}dot_xdl_dep_2: - ; CHECK: %bb.0: - ; CHECK-NEXT: v_dot4_i32_iu8 v0, s2, s3, v0 neg_lo:[1,1,0] - ; CHECK-NEXT: v_dot4_i32_iu8 v1, s2, s3, v2 neg_lo:[1,1,0] - ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) - ; CHECK-NEXT: v_add_nc_u32_e32 v2, v0, v0 - liveins: $vgpr0, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 - $vgpr0 = V_DOT4_I32_IU8 9, $sgpr2, 9, $sgpr3, 8, $vgpr0, 0, 0, 0, implicit $exec - $vgpr1 = V_DOT4_I32_IU8 9, $sgpr2, 9, $sgpr3, 8, $vgpr2, 0, 0, 0, implicit $exec - $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec -... diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.bf16.ll new file mode 100644 index 0000000..091859f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.bf16.ll @@ -0,0 +1,33 @@ +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN %s +; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s + +; FIXME: GlobalISel does not work with bf16 + +declare bfloat @llvm.amdgcn.cos.bf16(bfloat) #0 + +; GCN-LABEL: {{^}}cos_bf16: +; GCN: v_cos_bf16_e32 {{v[0-9]+}}, {{s[0-9]+}} +define amdgpu_kernel void @cos_bf16(ptr addrspace(1) %out, bfloat %src) #1 { + %cos = call bfloat @llvm.amdgcn.cos.bf16(bfloat %src) #0 + store bfloat %cos, ptr addrspace(1) %out, align 2 + ret void +} + +; GCN-LABEL: {{^}}cos_bf16_constant_4 +; GCN: v_cos_bf16_e32 v0, 4.0 +define amdgpu_kernel void @cos_bf16_constant_4(ptr addrspace(1) %out) #1 { + %cos = call bfloat @llvm.amdgcn.cos.bf16(bfloat 4.0) #0 + store bfloat %cos, ptr addrspace(1) %out, align 2 + ret void +} + +; GCN-LABEL: {{^}}cos_bf16_constant_100 +; GCN: v_cos_bf16_e32 {{v[0-9]+}}, 0x42c8 +define amdgpu_kernel void @cos_bf16_constant_100(ptr addrspace(1) %out) #1 { + %cos = call bfloat @llvm.amdgcn.cos.bf16(bfloat 100.0) #0 + store bfloat %cos, ptr addrspace(1) %out, align 2 + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll index 344c011..81db735 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll @@ -1,14 +1,97 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; xUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefix=SDAG-REAL16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefix=SDAG-REAL16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefix=SDAG-FAKE16 %s ; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefix=GISEL-REAL16 %s ; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefix=GISEL-FAKE16 %s -; FIXME: t16 doesn't work at the moment because the store of s16 under t16 mode fails to select. ; FIXME: GlobalISel does not work with bf16 +declare float @llvm.amdgcn.tanh.f32(float) #0 declare bfloat @llvm.amdgcn.tanh.bf16(bfloat) #0 +define amdgpu_kernel void @tanh_f32(ptr addrspace(1) %out, float %src) #1 { +; SDAG-REAL16-LABEL: tanh_f32: +; SDAG-REAL16: ; %bb.0: +; SDAG-REAL16-NEXT: s_load_b96 s[0:2], s[4:5], 0x0 +; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0 +; SDAG-REAL16-NEXT: v_tanh_f32_e32 v0, s2 +; SDAG-REAL16-NEXT: global_store_b32 v1, v0, s[0:1] +; SDAG-REAL16-NEXT: s_endpgm +; +; SDAG-FAKE16-LABEL: tanh_f32: +; SDAG-FAKE16: ; %bb.0: +; SDAG-FAKE16-NEXT: s_load_b96 s[0:2], s[4:5], 0x0 +; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; SDAG-FAKE16-NEXT: v_tanh_f32_e32 v0, s2 +; SDAG-FAKE16-NEXT: global_store_b32 v1, v0, s[0:1] +; SDAG-FAKE16-NEXT: s_endpgm + %tanh = call float @llvm.amdgcn.tanh.f32(float %src) #0 + store float %tanh, ptr addrspace(1) %out, align 4 + ret void +} + +; TODO: Really these should be constant folded +define amdgpu_kernel void @tanh_f32_constant_4.0(ptr addrspace(1) %out) #1 { +; SDAG-REAL16-LABEL: tanh_f32_constant_4.0: +; SDAG-REAL16: ; %bb.0: +; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; SDAG-REAL16-NEXT: v_tanh_f32_e32 v0, 4.0 +; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0 +; SDAG-REAL16-NEXT: global_store_b32 v1, v0, s[0:1] +; SDAG-REAL16-NEXT: s_endpgm +; +; SDAG-FAKE16-LABEL: tanh_f32_constant_4.0: +; SDAG-FAKE16: ; %bb.0: +; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; SDAG-FAKE16-NEXT: v_tanh_f32_e32 v0, 4.0 +; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; SDAG-FAKE16-NEXT: global_store_b32 v1, v0, s[0:1] +; SDAG-FAKE16-NEXT: s_endpgm + %tanh = call float @llvm.amdgcn.tanh.f32(float 4.0) #0 + store float %tanh, ptr addrspace(1) %out, align 4 + ret void +} + +define amdgpu_kernel void @tanh_f32_constant_100.0(ptr addrspace(1) %out) #1 { +; SDAG-REAL16-LABEL: tanh_f32_constant_100.0: +; SDAG-REAL16: ; %bb.0: +; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; SDAG-REAL16-NEXT: v_tanh_f32_e32 v0, 0x42c80000 +; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0 +; SDAG-REAL16-NEXT: global_store_b32 v1, v0, s[0:1] +; SDAG-REAL16-NEXT: s_endpgm +; +; SDAG-FAKE16-LABEL: tanh_f32_constant_100.0: +; SDAG-FAKE16: ; %bb.0: +; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; SDAG-FAKE16-NEXT: v_tanh_f32_e32 v0, 0x42c80000 +; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; SDAG-FAKE16-NEXT: global_store_b32 v1, v0, s[0:1] +; SDAG-FAKE16-NEXT: s_endpgm + %tanh = call float @llvm.amdgcn.tanh.f32(float 100.0) #0 + store float %tanh, ptr addrspace(1) %out, align 4 + ret void +} + +define amdgpu_kernel void @tanh_undef_f32(ptr addrspace(1) %out) #1 { +; SDAG-REAL16-LABEL: tanh_undef_f32: +; SDAG-REAL16: ; %bb.0: +; SDAG-REAL16-NEXT: s_endpgm +; +; SDAG-FAKE16-LABEL: tanh_undef_f32: +; SDAG-FAKE16: ; %bb.0: +; SDAG-FAKE16-NEXT: s_endpgm + %tanh = call float @llvm.amdgcn.tanh.f32(float undef) + store float %tanh, ptr addrspace(1) %out, align 4 + ret void +} + define amdgpu_kernel void @tanh_bf16(ptr addrspace(1) %out, bfloat %src) #1 { ; SDAG-REAL16-LABEL: tanh_bf16: ; SDAG-REAL16: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu-flat.mir b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu-flat.mir new file mode 100644 index 0000000..95ccf6c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu-flat.mir @@ -0,0 +1,357 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=si-fix-sgpr-copies -o - %s | FileCheck --check-prefix=GCN %s + +--- +name: flat_load_saddr_to_valu +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: flat_load_saddr_to_valu + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64_align2 = PHI [[COPY]], %bb.0, %7, %bb.1 + ; GCN-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PHI]], 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 + ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE]], 0, implicit $exec + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]], implicit $exec + ; GCN-NEXT: $vcc_lo = S_AND_B32 $exec_lo, [[V_CMP_NE_U64_e64_]], implicit-def $scc + ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:sreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1 + %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %4:vgpr_32 = FLAT_LOAD_DWORD_SADDR %1, %3, 0, 0, implicit $exec, implicit $flat_scr + %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc + S_CMP_LG_U64 %2, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... + +--- +name: flat_load_saddr_to_valu_non_zero_vaddr +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: flat_load_saddr_to_valu_non_zero_vaddr + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64_align2 = PHI [[COPY]], %bb.0, %7, %bb.1 + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec + ; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GCN-NEXT: [[FLAT_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 + ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE1]], 0, implicit $exec + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]], implicit $exec + ; GCN-NEXT: $vcc_lo = S_AND_B32 $exec_lo, [[V_CMP_NE_U64_e64_]], implicit-def $scc + ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:sreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1 + %3:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + %4:vgpr_32 = FLAT_LOAD_DWORD_SADDR %1, %3, 0, 0, implicit $exec, implicit $flat_scr + %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc + S_CMP_LG_U64 %2, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... + + +--- +name: flat_load_saddr_to_valu_undef_vaddr +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: flat_load_saddr_to_valu_undef_vaddr + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64_align2 = PHI [[COPY]], %bb.0, %7, %bb.1 + ; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec + ; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GCN-NEXT: [[FLAT_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD_SADDR [[REG_SEQUENCE]], undef %4:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 + ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE1]], 0, implicit $exec + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]], implicit $exec + ; GCN-NEXT: $vcc_lo = S_AND_B32 $exec_lo, [[V_CMP_NE_U64_e64_]], implicit-def $scc + ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:sreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1 + %4:vgpr_32 = FLAT_LOAD_DWORD_SADDR %1, undef %3:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr + %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc + S_CMP_LG_U64 %2, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... + +--- +name: flat_store_saddr_to_valu +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: flat_store_saddr_to_valu + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64_align2 = PHI [[COPY]], %bb.0, %7, %bb.1 + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: FLAT_STORE_DWORD [[PHI]], [[DEF]], 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 + ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE]], 0, implicit $exec + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]], implicit $exec + ; GCN-NEXT: $vcc_lo = S_AND_B32 $exec_lo, [[V_CMP_NE_U64_e64_]], implicit-def $scc + ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:sreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1 + %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %4:vgpr_32 = IMPLICIT_DEF + FLAT_STORE_DWORD_SADDR %3, %4, %1, 0, 0, implicit $exec, implicit $flat_scr + %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc + S_CMP_LG_U64 %2, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... + +--- +name: flat_atomic_noret_saddr_to_valu +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: flat_atomic_noret_saddr_to_valu + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64_align2 = PHI [[COPY]], %bb.0, %6, %bb.1 + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: FLAT_ATOMIC_ADD [[PHI]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 + ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE]], 0, implicit $exec + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]], implicit $exec + ; GCN-NEXT: $vcc_lo = S_AND_B32 $exec_lo, [[V_CMP_NE_U64_e64_]], implicit-def $scc + ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:sreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1 + %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + FLAT_ATOMIC_ADD_SADDR %3, %3, %1, 0, 0, implicit $exec, implicit $flat_scr + %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc + S_CMP_LG_U64 %2, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... + +--- +name: flat_atomic_rtn_saddr_to_valu +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: flat_atomic_rtn_saddr_to_valu + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64_align2 = PHI [[COPY]], %bb.0, %7, %bb.1 + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PHI]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 + ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE]], 0, implicit $exec + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]], implicit $exec + ; GCN-NEXT: $vcc_lo = S_AND_B32 $exec_lo, [[V_CMP_NE_U64_e64_]], implicit-def $scc + ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:sreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1 + %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %4:vgpr_32 = FLAT_ATOMIC_ADD_SADDR_RTN %3, %3, %1, 0, 0, implicit $exec, implicit $flat_scr + %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc + S_CMP_LG_U64 %2, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... + +--- +name: scratch_load_saddr_to_valu +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: scratch_load_saddr_to_valu + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %6, %bb.1 + ; GCN-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[PHI]], 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec + ; GCN-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 [[V_AND_B32_e64_]], 0, implicit $exec + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec + ; GCN-NEXT: $vcc_lo = S_AND_B32 $exec_lo, [[V_CMP_NE_U32_e64_]], implicit-def $scc + ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0 + %0:sgpr_32 = COPY $vgpr0 + + bb.1: + %1:sgpr_32 = PHI %0, %bb.0, %2, %bb.1 + %4:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %1, 0, 0, implicit $exec, implicit $flat_scr + %2:sgpr_32 = S_AND_B32 %1, 1, implicit-def $scc + S_CMP_LG_U32 %2, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... + +--- +name: scratch_store_saddr_to_valu +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: scratch_store_saddr_to_valu + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %6, %bb.1 + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: SCRATCH_STORE_DWORD [[DEF]], [[PHI]], 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec + ; GCN-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 [[V_AND_B32_e64_]], 0, implicit $exec + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec + ; GCN-NEXT: $vcc_lo = S_AND_B32 $exec_lo, [[V_CMP_NE_U32_e64_]], implicit-def $scc + ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0 + %0:sgpr_32 = COPY $vgpr0 + + bb.1: + %1:sgpr_32 = PHI %0, %bb.0, %2, %bb.1 + %4:vgpr_32 = IMPLICIT_DEF + SCRATCH_STORE_DWORD_SADDR %4, %1, 0, 0, implicit $exec, implicit $flat_scr + %2:sgpr_32 = S_AND_B32 %1, 1, implicit-def $scc + S_CMP_LG_U32 %2, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir b/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir index c2c5340..8145a1d7 100644 --- a/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir +++ b/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir @@ -167,3 +167,59 @@ body: | %1:sreg_32 = COPY %0 S_BRANCH %bb.2 ... + +--- + +name: phi_moveimm_av_pseudo_input +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: phi_moveimm_av_pseudo_input + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: liveins: $sgpr0, $sgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.3(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI %5, %bb.3, [[S_ADD_U32_]], %bb.1 + ; GCN-NEXT: S_BRANCH %bb.3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.3: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GCN-NEXT: S_BRANCH %bb.2 + bb.0: + successors: %bb.1 + liveins: $sgpr0, $sgpr1 + + %0:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + + %4:sreg_32 = COPY $sgpr0 + %5:sreg_32 = COPY $sgpr1 + + bb.1: + successors: %bb.2 + %2:sreg_32 = S_ADD_U32 %4, %5, implicit-def $scc + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3 + %3:sreg_32 = PHI %1, %bb.3, %2, %bb.1 + S_BRANCH %bb.3 + + bb.3: + successors: %bb.2 + %1:sreg_32 = COPY %0 + S_BRANCH %bb.2 +... diff --git a/llvm/test/CodeGen/DirectX/flatten-array.ll b/llvm/test/CodeGen/DirectX/flatten-array.ll index 1376a1d..a2e1055 100644 --- a/llvm/test/CodeGen/DirectX/flatten-array.ll +++ b/llvm/test/CodeGen/DirectX/flatten-array.ll @@ -218,6 +218,28 @@ define void @two_index_gep_const() { ret void } +define void @zero_index_global() { + ; CHECK-LABEL: define void @zero_index_global( + ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [4 x float], ptr addrspace(3) @g.1dim, i32 0, i32 0 + ; CHECK-NEXT: load float, ptr addrspace(3) [[GEP]], align 4 + ; CHECK-NEXT: ret void + %1 = getelementptr inbounds nuw [2 x [2 x float]], ptr addrspace(3) @g, i32 0, i32 0, i32 0 + %2 = load float, ptr addrspace(3) %1, align 4 + ret void +} + +; Note: A ConstantExpr GEP with all 0 indices is equivalent to the pointer +; operand of the GEP. Therefore the visitLoadInst will not see the pointer operand +; as a ConstantExpr GEP and will not create a GEP instruction to be visited. +; The later dxil-legalize pass will insert a GEP in this instance. +define void @zero_index_global_const() { + ; CHECK-LABEL: define void @zero_index_global_const( + ; CHECK-NEXT: load float, ptr addrspace(3) @g.1dim, align 4 + ; CHECK-NEXT: ret void + %1 = load float, ptr addrspace(3) getelementptr inbounds nuw ([2 x [2 x float]], ptr addrspace(3) @g, i32 0, i32 0, i32 0), align 4 + ret void +} + define void @gep_4d_index_test() { ; CHECK-LABEL: gep_4d_index_test ; CHECK: [[a:%.*]] = alloca [16 x i32], align 4 diff --git a/llvm/test/CodeGen/DirectX/legalize-load-store-array-alloca.ll b/llvm/test/CodeGen/DirectX/legalize-load-store-array-alloca.ll index b25b3de..c6789ac 100644 --- a/llvm/test/CodeGen/DirectX/legalize-load-store-array-alloca.ll +++ b/llvm/test/CodeGen/DirectX/legalize-load-store-array-alloca.ll @@ -21,3 +21,21 @@ define void @store() { store i32 0, ptr %a, align 4
ret void
}
+
+@g = local_unnamed_addr addrspace(3) global [4 x i32] zeroinitializer, align 4
+define void @load_whole_global () {
+; CHECK-LABEL: define void @load_whole_global
+; CHECK-NEXT: load [4 x i32], ptr addrspace(3) @g, align 4
+; CHECK-NEXT: ret void
+ %l = load [4 x i32], ptr addrspace(3) @g, align 4
+ ret void
+}
+
+define void @load_global_index0 () {
+; CHECK-LABEL: define void @load_global_index0
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) @g, i32 0, i32 0
+; CHECK-NEXT: load i32, ptr addrspace(3) [[GEP]], align 4
+; CHECK-NEXT: ret void
+ %l = load i32, ptr addrspace(3) @g, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll index 27a8925..0c91c53 100644 --- a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll +++ b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll @@ -24,7 +24,8 @@ define <4 x i32> @load_array_vec_test() #0 { ; CHECK-LABEL: define <4 x i32> @load_array_vec_test( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[GEP]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1), align 4 ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 2), align 4 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 3), align 4 @@ -52,7 +53,8 @@ define <4 x i32> @load_array_vec_test() #0 { define <4 x i32> @load_vec_test() #0 { ; CHECK-LABEL: define <4 x i32> @load_vec_test( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) @vecData.scalarized, align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[GEP]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 1), align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 2), align 4 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 3), align 4 @@ -203,7 +205,8 @@ define <4 x i32> @load_static_array_of_vec_from_i8_gep_test(i32 %index) #0 { define <4 x i32> @multid_load_test() #0 { ; CHECK-LABEL: define <4 x i32> @multid_load_test( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[GEP]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 2), align 4 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 3), align 4 diff --git a/llvm/test/CodeGen/DirectX/scalar-store.ll b/llvm/test/CodeGen/DirectX/scalar-store.ll index a124c66..4394235 100644 --- a/llvm/test/CodeGen/DirectX/scalar-store.ll +++ b/llvm/test/CodeGen/DirectX/scalar-store.ll @@ -14,7 +14,8 @@ ; CHECK-LABEL: store_array_vec_test define void @store_array_vec_test () local_unnamed_addr #0 { -; CHECK-NEXT: store float 1.000000e+00, ptr addrspace(3) @arrayofVecData.scalarized.1dim, align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 0 +; CHECK-NEXT: store float 1.000000e+00, ptr addrspace(3) [[GEP]], align 16 ; CHECK-NEXT: store float 2.000000e+00, ptr addrspace(3) getelementptr ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1), align 4 ; CHECK-NEXT: store float 3.000000e+00, ptr addrspace(3) getelementptr ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 2), align 8 ; CHECK-NEXT: store float 2.000000e+00, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 3), align 16 @@ -30,7 +31,8 @@ define void @store_array_vec_test () local_unnamed_addr #0 { ; CHECK-LABEL: store_vec_test define void @store_vec_test(<4 x i32> %inputVec) #0 { ; CHECK-NEXT: [[INPUTVEC_I01:%.*]] = extractelement <4 x i32> %inputVec, i32 0 -; CHECK-NEXT: store i32 [[INPUTVEC_I01]], ptr addrspace(3) @vecData.scalarized, align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 0 +; CHECK-NEXT: store i32 [[INPUTVEC_I01]], ptr addrspace(3) [[GEP]], align 4 ; CHECK-NEXT: [[INPUTVEC_I12:%.*]] = extractelement <4 x i32> %inputVec, i32 1 ; CHECK-NEXT: store i32 [[INPUTVEC_I12]], ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 1), align 4 ; CHECK-NEXT: [[INPUTVEC_I23:%.*]] = extractelement <4 x i32> %inputVec, i32 2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index 019bbe2..dbc8e89 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -1721,8 +1721,9 @@ define void @load_factor4_one_active_storeback_full(ptr %ptr) { define <4 x i32> @vp_load_factor3_one_active(ptr %ptr) { ; CHECK-LABEL: vp_load_factor3_one_active: ; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 12 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vlseg3e32.v v8, (a0) +; CHECK-NEXT: vlse32.v v8, (a0), a1 ; CHECK-NEXT: ret %interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> splat (i1 true), i32 12) %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9> @@ -1732,8 +1733,9 @@ define <4 x i32> @vp_load_factor3_one_active(ptr %ptr) { define <4 x i32> @vp_load_factor5_one_active(ptr %ptr) { ; CHECK-LABEL: vp_load_factor5_one_active: ; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 20 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vlseg5e32.v v8, (a0) +; CHECK-NEXT: vlse32.v v8, (a0), a1 ; CHECK-NEXT: ret %interleaved.vec = tail call <20 x i32> @llvm.vp.load.v20i32.p0(ptr %ptr, <20 x i1> splat (i1 true), i32 20) %v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 0, i32 5, i32 10, i32 15> diff --git a/llvm/test/CodeGen/SPIRV/transcoding/builtin_calls.ll b/llvm/test/CodeGen/SPIRV/transcoding/builtin_calls.ll index 0a02a8b..b179732 100644 --- a/llvm/test/CodeGen/SPIRV/transcoding/builtin_calls.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/builtin_calls.ll @@ -1,17 +1,109 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} -; CHECK-SPIRV-DAG: OpDecorate %[[#Id:]] BuiltIn GlobalInvocationId -; CHECK-SPIRV-DAG: OpDecorate %[[#Id:]] BuiltIn GlobalLinearId -; CHECK-SPIRV: %[[#Id:]] = OpVariable %[[#]] -; CHECK-SPIRV: %[[#Id:]] = OpVariable %[[#]] +; CHECK-SPIRV-DAG: OpDecorate %[[#Id0:]] BuiltIn GlobalLinearId +; CHECK-SPIRV-DAG: OpDecorate %[[#Id1:]] BuiltIn GlobalInvocationId +; CHECK-SPIRV-DAG: OpDecorate %[[#Id2:]] BuiltIn LocalInvocationIndex +; CHECK-SPIRV-DAG: OpDecorate %[[#Id3:]] BuiltIn WorkDim +; CHECK-SPIRV-DAG: OpDecorate %[[#Id4:]] BuiltIn SubgroupSize +; CHECK-SPIRV-DAG: OpDecorate %[[#Id5:]] BuiltIn SubgroupMaxSize +; CHECK-SPIRV-DAG: OpDecorate %[[#Id6:]] BuiltIn NumSubgroups +; CHECK-SPIRV-DAG: OpDecorate %[[#Id7:]] BuiltIn NumEnqueuedSubgroups +; CHECK-SPIRV-DAG: OpDecorate %[[#Id8:]] BuiltIn SubgroupId +; CHECK-SPIRV-DAG: OpDecorate %[[#Id9:]] BuiltIn SubgroupLocalInvocationId +; CHECK-SPIRV-DAG: OpDecorate %[[#Id10:]] BuiltIn SubgroupEqMask +; CHECK-SPIRV-DAG: OpDecorate %[[#Id11:]] BuiltIn SubgroupGeMask +; CHECK-SPIRV-DAG: OpDecorate %[[#Id12:]] BuiltIn SubgroupGtMask +; CHECK-SPIRV-DAG: OpDecorate %[[#Id13:]] BuiltIn SubgroupLeMask +; CHECK-SPIRV-DAG: OpDecorate %[[#Id14:]] BuiltIn SubgroupLtMask +; CHECK-SPIRV-DAG: OpDecorate %[[#Id15:]] BuiltIn LocalInvocationId +; CHECK-SPIRV-DAG: OpDecorate %[[#Id16:]] BuiltIn WorkgroupSize +; CHECK-SPIRV-DAG: OpDecorate %[[#Id17:]] BuiltIn GlobalSize +; CHECK-SPIRV-DAG: OpDecorate %[[#Id18:]] BuiltIn WorkgroupId +; CHECK-SPIRV-DAG: OpDecorate %[[#Id19:]] BuiltIn EnqueuedWorkgroupSize +; CHECK-SPIRV-DAG: OpDecorate %[[#Id20:]] BuiltIn NumWorkgroups +; CHECK-SPIRV-DAG: OpDecorate %[[#Id21:]] BuiltIn GlobalOffset + +; CHECK-SPIRV: %[[#Id0:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id1:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id2:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id3:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id4:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id5:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id6:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id7:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id8:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id9:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id10:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id11:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id12:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id13:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id14:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id15:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id16:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id17:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id18:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id19:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id20:]] = OpVariable %[[#]] Input +; CHECK-SPIRV: %[[#Id21:]] = OpVariable %[[#]] Input define spir_kernel void @f() { entry: %0 = call spir_func i32 @_Z29__spirv_BuiltInGlobalLinearIdv() %1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 1) + %2 = call spir_func i64 @_Z35__spirv_BuiltInLocalInvocationIndexv() + %3 = call spir_func i32 @_Z22__spirv_BuiltInWorkDimv() + %4 = call spir_func i32 @_Z27__spirv_BuiltInSubgroupSizev() + %5 = call spir_func i32 @_Z30__spirv_BuiltInSubgroupMaxSizev() + %6 = call spir_func i32 @_Z27__spirv_BuiltInNumSubgroupsv() + %7 = call spir_func i32 @_Z35__spirv_BuiltInNumEnqueuedSubgroupsv() + %8 = call spir_func i32 @_Z25__spirv_BuiltInSubgroupIdv() + %9 = call spir_func i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() + %10 = call spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupEqMaskv() + %11 = call spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupEqMaskKHRv() + %12 = call spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupGeMaskv() + %13 = call spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupGeMaskKHRv() + %14 = call spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupGtMaskv() + %15 = call spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupGtMaskKHRv() + %16 = call spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupLeMaskv() + %17 = call spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupLeMaskKHRv() + %18 = call spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupLtMaskv() + %19 = call spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupLtMaskKHRv() + %20 = call spir_func i64 @_Z32__spirv_BuiltInLocalInvocationIdi(i32 0) + %21 = call spir_func i64 @_Z28__spirv_BuiltInWorkgroupSizei(i32 0) + %22 = call spir_func i64 @_Z25__spirv_BuiltInGlobalSizei(i32 0) + %23 = call spir_func i64 @_Z26__spirv_BuiltInWorkgroupIdi(i32 0) + %24 = call spir_func i64 @_Z36__spirv_BuiltInEnqueuedWorkgroupSizei(i32 0) + %25 = call spir_func i64 @_Z28__spirv_BuiltInNumWorkgroupsi(i32 0) + %26 = call spir_func i64 @_Z27__spirv_BuiltInGlobalOffseti(i32 0) + ret void } declare spir_func i32 @_Z29__spirv_BuiltInGlobalLinearIdv() declare spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32) +declare spir_func i64 @_Z35__spirv_BuiltInLocalInvocationIndexv() +declare spir_func i32 @_Z22__spirv_BuiltInWorkDimv() +declare spir_func i32 @_Z27__spirv_BuiltInSubgroupSizev() +declare spir_func i32 @_Z30__spirv_BuiltInSubgroupMaxSizev() +declare spir_func i32 @_Z27__spirv_BuiltInNumSubgroupsv() +declare spir_func i32 @_Z35__spirv_BuiltInNumEnqueuedSubgroupsv() +declare spir_func i32 @_Z25__spirv_BuiltInSubgroupIdv() +declare spir_func i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv() +declare spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupEqMaskv() +declare spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupEqMaskKHRv() +declare spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupGeMaskv() +declare spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupGeMaskKHRv() +declare spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupGtMaskv() +declare spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupGtMaskKHRv() +declare spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupLeMaskv() +declare spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupLeMaskKHRv() +declare spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupLtMaskv() +declare spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupLtMaskKHRv() +declare spir_func i64 @_Z32__spirv_BuiltInLocalInvocationIdi(i32) +declare spir_func i64 @_Z28__spirv_BuiltInWorkgroupSizei(i32) +declare spir_func i64 @_Z25__spirv_BuiltInGlobalSizei(i32) +declare spir_func i64 @_Z26__spirv_BuiltInWorkgroupIdi(i32) +declare spir_func i64 @_Z36__spirv_BuiltInEnqueuedWorkgroupSizei(i32) +declare spir_func i64 @_Z28__spirv_BuiltInNumWorkgroupsi(i32) +declare spir_func i64 @_Z27__spirv_BuiltInGlobalOffseti(i32) diff --git a/llvm/test/CodeGen/X86/invalid-operand-bundle-call.ll b/llvm/test/CodeGen/X86/invalid-operand-bundle-call.ll index ac4963f..17065a4 100644 --- a/llvm/test/CodeGen/X86/invalid-operand-bundle-call.ll +++ b/llvm/test/CodeGen/X86/invalid-operand-bundle-call.ll @@ -1,10 +1,10 @@ ; RUN: not llc -mtriple=x86_64-unknown-linux-gnu < %s 2>&1 | FileCheck %s -; CHECK: LLVM ERROR: cannot lower calls with arbitrary operand bundles: foo +; CHECK: LLVM ERROR: cannot lower calls with arbitrary operand bundles: foo, bar, baz declare void @g() define void @f(i32 %arg) { - call void @g() [ "foo"(i32 %arg) ] + call void @g() [ "foo"(i32 %arg), "bar"(i32 %arg), "baz"(i32 %arg) ] ret void } diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s index f51d709..f9e217d 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s @@ -28,6 +28,51 @@ v_mov_b64 v[4:5], 0.5 v_mov_b64 v[254:255], 0xaf123456 // GFX1250: v_mov_b64_e32 v[254:255], lit64(0xaf123456) ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +v_tanh_f32 v5, v1 +// GFX1250: v_tanh_f32_e32 v5, v1 ; encoding: [0x01,0x3d,0x0a,0x7e] + +v_tanh_f32 v5, v255 +// GFX1250: v_tanh_f32_e32 v5, v255 ; encoding: [0xff,0x3d,0x0a,0x7e] + +v_tanh_f32 v5, s1 +// GFX1250: v_tanh_f32_e32 v5, s1 ; encoding: [0x01,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, s105 +// GFX1250: v_tanh_f32_e32 v5, s105 ; encoding: [0x69,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, vcc_lo +// GFX1250: v_tanh_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, vcc_hi +// GFX1250: v_tanh_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, ttmp15 +// GFX1250: v_tanh_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, m0 +// GFX1250: v_tanh_f32_e32 v5, m0 ; encoding: [0x7d,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, exec_lo +// GFX1250: v_tanh_f32_e32 v5, exec_lo ; encoding: [0x7e,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, exec_hi +// GFX1250: v_tanh_f32_e32 v5, exec_hi ; encoding: [0x7f,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, null +// GFX1250: v_tanh_f32_e32 v5, null ; encoding: [0x7c,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, -1 +// GFX1250: v_tanh_f32_e32 v5, -1 ; encoding: [0xc1,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, 0.5 +// GFX1250: v_tanh_f32_e32 v5, 0.5 ; encoding: [0xf0,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, src_scc +// GFX1250: v_tanh_f32_e32 v5, src_scc ; encoding: [0xfd,0x3c,0x0a,0x7e] + +v_tanh_f32 v255, 0xaf123456 +// GFX1250: v_tanh_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x3c,0xfe,0x7f,0x56,0x34,0x12,0xaf] + v_tanh_bf16 v5, v1 // GFX1250: v_tanh_bf16_e32 v5, v1 ; encoding: [0x01,0x95,0x0a,0x7e] @@ -343,6 +388,51 @@ v_sin_bf16 v5, src_scc v_sin_bf16 v127, 0x8000 // GFX1250: v_sin_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xfc,0xfe,0x7e,0x00,0x80,0x00,0x00] +v_cos_bf16 v5, v1 +// GFX1250: v_cos_bf16_e32 v5, v1 ; encoding: [0x01,0xff,0x0a,0x7e] + +v_cos_bf16 v5, v127 +// GFX1250: v_cos_bf16_e32 v5, v127 ; encoding: [0x7f,0xff,0x0a,0x7e] + +v_cos_bf16 v5, s1 +// GFX1250: v_cos_bf16_e32 v5, s1 ; encoding: [0x01,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, s105 +// GFX1250: v_cos_bf16_e32 v5, s105 ; encoding: [0x69,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, vcc_lo +// GFX1250: v_cos_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, vcc_hi +// GFX1250: v_cos_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, ttmp15 +// GFX1250: v_cos_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, m0 +// GFX1250: v_cos_bf16_e32 v5, m0 ; encoding: [0x7d,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, exec_lo +// GFX1250: v_cos_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, exec_hi +// GFX1250: v_cos_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, null +// GFX1250: v_cos_bf16_e32 v5, null ; encoding: [0x7c,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, -1 +// GFX1250: v_cos_bf16_e32 v5, -1 ; encoding: [0xc1,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, 0.5 +// GFX1250: v_cos_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, src_scc +// GFX1250: v_cos_bf16_e32 v5, src_scc ; encoding: [0xfd,0xfe,0x0a,0x7e] + +v_cos_bf16 v127, 0x8000 +// GFX1250: v_cos_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xfe,0xfe,0x7e,0x00,0x80,0x00,0x00] + v_cvt_f32_bf16 v5, v1 // GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s index 39fc73d..d51ef68b 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s @@ -28,6 +28,51 @@ v_mov_b64 v[4:5], 0.5 v_mov_b64 v[254:255], 0xaf123456 // GFX1250: v_mov_b64_e32 v[254:255], lit64(0xaf123456) ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +v_tanh_f32 v5, v1 +// GFX1250: v_tanh_f32_e32 v5, v1 ; encoding: [0x01,0x3d,0x0a,0x7e] + +v_tanh_f32 v5, v255 +// GFX1250: v_tanh_f32_e32 v5, v255 ; encoding: [0xff,0x3d,0x0a,0x7e] + +v_tanh_f32 v5, s1 +// GFX1250: v_tanh_f32_e32 v5, s1 ; encoding: [0x01,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, s105 +// GFX1250: v_tanh_f32_e32 v5, s105 ; encoding: [0x69,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, vcc_lo +// GFX1250: v_tanh_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, vcc_hi +// GFX1250: v_tanh_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, ttmp15 +// GFX1250: v_tanh_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, m0 +// GFX1250: v_tanh_f32_e32 v5, m0 ; encoding: [0x7d,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, exec_lo +// GFX1250: v_tanh_f32_e32 v5, exec_lo ; encoding: [0x7e,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, exec_hi +// GFX1250: v_tanh_f32_e32 v5, exec_hi ; encoding: [0x7f,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, null +// GFX1250: v_tanh_f32_e32 v5, null ; encoding: [0x7c,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, -1 +// GFX1250: v_tanh_f32_e32 v5, -1 ; encoding: [0xc1,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, 0.5 +// GFX1250: v_tanh_f32_e32 v5, 0.5 ; encoding: [0xf0,0x3c,0x0a,0x7e] + +v_tanh_f32 v5, src_scc +// GFX1250: v_tanh_f32_e32 v5, src_scc ; encoding: [0xfd,0x3c,0x0a,0x7e] + +v_tanh_f32 v255, 0xaf123456 +// GFX1250: v_tanh_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x3c,0xfe,0x7f,0x56,0x34,0x12,0xaf] + v_tanh_bf16 v5, v1 // GFX1250: v_tanh_bf16_e32 v5, v1 ; encoding: [0x01,0x95,0x0a,0x7e] @@ -364,6 +409,54 @@ v_sin_bf16 v127, 0x8000 v_sin_bf16 v5.h, v1.h // GFX1250: v_sin_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xfd,0x0a,0x7f] +v_cos_bf16 v5, v1 +// GFX1250: v_cos_bf16_e32 v5, v1 ; encoding: [0x01,0xff,0x0a,0x7e] + +v_cos_bf16 v5, v127 +// GFX1250: v_cos_bf16_e32 v5, v127 ; encoding: [0x7f,0xff,0x0a,0x7e] + +v_cos_bf16 v5, s1 +// GFX1250: v_cos_bf16_e32 v5, s1 ; encoding: [0x01,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, s105 +// GFX1250: v_cos_bf16_e32 v5, s105 ; encoding: [0x69,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, vcc_lo +// GFX1250: v_cos_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, vcc_hi +// GFX1250: v_cos_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, ttmp15 +// GFX1250: v_cos_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, m0 +// GFX1250: v_cos_bf16_e32 v5, m0 ; encoding: [0x7d,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, exec_lo +// GFX1250: v_cos_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, exec_hi +// GFX1250: v_cos_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, null +// GFX1250: v_cos_bf16_e32 v5, null ; encoding: [0x7c,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, -1 +// GFX1250: v_cos_bf16_e32 v5, -1 ; encoding: [0xc1,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, 0.5 +// GFX1250: v_cos_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, src_scc +// GFX1250: v_cos_bf16_e32 v5, src_scc ; encoding: [0xfd,0xfe,0x0a,0x7e] + +v_cos_bf16 v127, 0x8000 +// GFX1250: v_cos_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xfe,0xfe,0x7e,0x00,0x80,0x00,0x00] + +v_cos_bf16 v5.h, v1.h +// GFX1250: v_cos_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xff,0x0a,0x7f] + v_cvt_f32_bf16 v5, v1 // GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s index 97058eb..ae22f68 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s @@ -2,6 +2,62 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s +v_tanh_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_tanh_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_tanh_f32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_mirror +// GFX1250: v_tanh_f32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_half_mirror +// GFX1250: v_tanh_f32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_shl:1 +// GFX1250: v_tanh_f32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_shl:15 +// GFX1250: v_tanh_f32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_shr:1 +// GFX1250: v_tanh_f32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_shr:15 +// GFX1250: v_tanh_f32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_ror:1 +// GFX1250: v_tanh_f32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_ror:15 +// GFX1250: v_tanh_f32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_tanh_f32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_tanh_f32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_tanh_f32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_tanh_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3c,0xfe,0x7f,0xff,0x6f,0x35,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_tanh_bf16 v5, v1 quad_perm:[3,2,1,0] // GFX1250: v_tanh_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x94,0x0a,0x7e,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU @@ -394,6 +450,62 @@ v_sin_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi // GFX1250: v_sin_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfc,0xfe,0x7e,0x7f,0x6f,0x35,0x30] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cos_bf16 v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_mirror +// GFX1250: v_cos_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_half_mirror +// GFX1250: v_cos_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_shl:1 +// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_shl:15 +// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_shr:1 +// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_shr:15 +// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_ror:1 +// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_ror:15 +// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_cos_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_cos_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_cos_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_cos_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x7e,0x7f,0x6f,0x35,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0] // GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s index 6a293c1..37ecb66 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s @@ -2,6 +2,62 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s +v_tanh_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_tanh_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_tanh_f32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_mirror +// GFX1250: v_tanh_f32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_half_mirror +// GFX1250: v_tanh_f32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_shl:1 +// GFX1250: v_tanh_f32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_shl:15 +// GFX1250: v_tanh_f32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_shr:1 +// GFX1250: v_tanh_f32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_shr:15 +// GFX1250: v_tanh_f32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_ror:1 +// GFX1250: v_tanh_f32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_ror:15 +// GFX1250: v_tanh_f32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_tanh_f32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_tanh_f32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_tanh_f32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_tanh_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3c,0xfe,0x7f,0xff,0x6f,0x35,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_tanh_bf16 v5, v1 quad_perm:[3,2,1,0] // GFX1250: v_tanh_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x94,0x0a,0x7e,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU @@ -422,6 +478,66 @@ v_sin_bf16 v5.h, v1.h quad_perm:[3,2,1,0] // GFX1250: v_sin_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7f,0x81,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cos_bf16 v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_mirror +// GFX1250: v_cos_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_half_mirror +// GFX1250: v_cos_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_shl:1 +// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_shl:15 +// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_shr:1 +// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_shr:15 +// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_ror:1 +// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_ror:15 +// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_cos_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_cos_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_cos_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_cos_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x7e,0x7f,0x6f,0x35,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5.h, v1.h quad_perm:[3,2,1,0] +// GFX1250: v_cos_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7f,0x81,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0] // GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s index d1f53c7..f24122e 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s @@ -2,6 +2,18 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s +v_tanh_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_tanh_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x3c,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_tanh_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x3c,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_tanh_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x3c,0xfe,0x7f,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_tanh_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_tanh_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x94,0x0a,0x7e,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU @@ -86,6 +98,18 @@ v_sin_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX1250: v_sin_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfc,0xfe,0x7e,0x7f,0x00,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cos_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_cos_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s index dbee9f3..34abc82 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s @@ -2,6 +2,18 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s +v_tanh_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_tanh_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x3c,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_tanh_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x3c,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_tanh_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x3c,0xfe,0x7f,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_tanh_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_tanh_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x94,0x0a,0x7e,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU @@ -114,6 +126,22 @@ v_sin_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_sin_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfc,0x0a,0x7f,0x81,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cos_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_cos_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cos_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7f,0x81,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s index 4257334..340a785 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s @@ -127,6 +127,51 @@ v_cvt_f32_fp8 v1, v3 byte_sel:1 clamp v_cvt_f32_fp8 v1, v3 byte_sel:2 clamp // GFX1250: v_cvt_f32_fp8_e64 v1, v3 byte_sel:2 clamp ; encoding: [0x01,0x88,0xec,0xd5,0x03,0x01,0x00,0x00] +v_tanh_f32_e64 v5, v1 +// GFX1250: v_tanh_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x9e,0xd5,0x01,0x01,0x00,0x00] + +v_tanh_f32_e64 v5, v255 +// GFX1250: v_tanh_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x9e,0xd5,0xff,0x01,0x00,0x00] + +v_tanh_f32_e64 v5, s1 +// GFX1250: v_tanh_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x9e,0xd5,0x01,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, s105 +// GFX1250: v_tanh_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x9e,0xd5,0x69,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, vcc_lo +// GFX1250: v_tanh_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x9e,0xd5,0x6a,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, vcc_hi +// GFX1250: v_tanh_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x9e,0xd5,0x6b,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, ttmp15 +// GFX1250: v_tanh_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x9e,0xd5,0x7b,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, m0 +// GFX1250: v_tanh_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x9e,0xd5,0x7d,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, exec_lo +// GFX1250: v_tanh_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x9e,0xd5,0x7e,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, exec_hi +// GFX1250: v_tanh_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x9e,0xd5,0x7f,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, null +// GFX1250: v_tanh_f32_e64 v5, null ; encoding: [0x05,0x00,0x9e,0xd5,0x7c,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, -1 +// GFX1250: v_tanh_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x9e,0xd5,0xc1,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, 0.5 mul:2 +// GFX1250: v_tanh_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x9e,0xd5,0xf0,0x00,0x00,0x08] + +v_tanh_f32_e64 v5, src_scc mul:4 +// GFX1250: v_tanh_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x9e,0xd5,0xfd,0x00,0x00,0x10] + +v_tanh_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX1250: v_tanh_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x9e,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + v_rcp_bf16_e64 v5, v1 // GFX1250: v_rcp_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf9,0xd5,0x01,0x01,0x00,0x00] @@ -397,6 +442,51 @@ v_sin_bf16_e64 v5, src_scc mul:4 v_sin_bf16_e64 v255, -|0x8000| clamp div:2 // GFX1250: v_sin_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfe,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00] +v_cos_bf16_e64 v5, v1 +// GFX1250: v_cos_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x01,0x00,0x00] + +v_cos_bf16_e64 v5, v255 +// GFX1250: v_cos_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xff,0xd5,0xff,0x01,0x00,0x00] + +v_cos_bf16_e64 v5, s1 +// GFX1250: v_cos_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, s105 +// GFX1250: v_cos_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xff,0xd5,0x69,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, vcc_lo +// GFX1250: v_cos_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xff,0xd5,0x6a,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, vcc_hi +// GFX1250: v_cos_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xff,0xd5,0x6b,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, ttmp15 +// GFX1250: v_cos_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xff,0xd5,0x7b,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, m0 +// GFX1250: v_cos_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xff,0xd5,0x7d,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, exec_lo +// GFX1250: v_cos_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xff,0xd5,0x7e,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, exec_hi +// GFX1250: v_cos_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xff,0xd5,0x7f,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, null +// GFX1250: v_cos_bf16_e64 v5, null ; encoding: [0x05,0x00,0xff,0xd5,0x7c,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, -1 +// GFX1250: v_cos_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xff,0xd5,0xc1,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, 0.5 mul:2 +// GFX1250: v_cos_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xff,0xd5,0xf0,0x00,0x00,0x08] + +v_cos_bf16_e64 v5, src_scc mul:4 +// GFX1250: v_cos_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xff,0xd5,0xfd,0x00,0x00,0x10] + +v_cos_bf16_e64 v255, -|0x8000| clamp div:2 +// GFX1250: v_cos_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xff,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00] + v_cvt_f32_bf16_e64 v5, v1 // GFX1250: v_cvt_f32_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s index 83986a6..579a467 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s @@ -130,6 +130,51 @@ v_cvt_f32_fp8 v1, v3 byte_sel:1 clamp v_cvt_f32_fp8 v1, v3 byte_sel:2 clamp // GFX1250: v_cvt_f32_fp8_e64 v1, v3 byte_sel:2 clamp ; encoding: [0x01,0x88,0xec,0xd5,0x03,0x01,0x00,0x00] +v_tanh_f32_e64 v5, v1 +// GFX1250: v_tanh_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x9e,0xd5,0x01,0x01,0x00,0x00] + +v_tanh_f32_e64 v5, v255 +// GFX1250: v_tanh_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x9e,0xd5,0xff,0x01,0x00,0x00] + +v_tanh_f32_e64 v5, s1 +// GFX1250: v_tanh_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x9e,0xd5,0x01,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, s105 +// GFX1250: v_tanh_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x9e,0xd5,0x69,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, vcc_lo +// GFX1250: v_tanh_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x9e,0xd5,0x6a,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, vcc_hi +// GFX1250: v_tanh_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x9e,0xd5,0x6b,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, ttmp15 +// GFX1250: v_tanh_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x9e,0xd5,0x7b,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, m0 +// GFX1250: v_tanh_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x9e,0xd5,0x7d,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, exec_lo +// GFX1250: v_tanh_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x9e,0xd5,0x7e,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, exec_hi +// GFX1250: v_tanh_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x9e,0xd5,0x7f,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, null +// GFX1250: v_tanh_f32_e64 v5, null ; encoding: [0x05,0x00,0x9e,0xd5,0x7c,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, -1 +// GFX1250: v_tanh_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x9e,0xd5,0xc1,0x00,0x00,0x00] + +v_tanh_f32_e64 v5, 0.5 mul:2 +// GFX1250: v_tanh_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x9e,0xd5,0xf0,0x00,0x00,0x08] + +v_tanh_f32_e64 v5, src_scc mul:4 +// GFX1250: v_tanh_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x9e,0xd5,0xfd,0x00,0x00,0x10] + +v_tanh_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX1250: v_tanh_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x9e,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + v_rcp_bf16_e64 v5, v1 // GFX1250: v_rcp_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf9,0xd5,0x01,0x01,0x00,0x00] @@ -418,6 +463,54 @@ v_sin_bf16_e64 v255, -|0x8000| clamp div:2 v_sin_bf16 v5.h, v128.h // GFX1250: v_sin_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xfe,0xd5,0x80,0x01,0x00,0x00] +v_cos_bf16_e64 v5, v1 +// GFX1250: v_cos_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x01,0x00,0x00] + +v_cos_bf16_e64 v5, v255 +// GFX1250: v_cos_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xff,0xd5,0xff,0x01,0x00,0x00] + +v_cos_bf16_e64 v5, s1 +// GFX1250: v_cos_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, s105 +// GFX1250: v_cos_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xff,0xd5,0x69,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, vcc_lo +// GFX1250: v_cos_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xff,0xd5,0x6a,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, vcc_hi +// GFX1250: v_cos_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xff,0xd5,0x6b,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, ttmp15 +// GFX1250: v_cos_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xff,0xd5,0x7b,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, m0 +// GFX1250: v_cos_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xff,0xd5,0x7d,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, exec_lo +// GFX1250: v_cos_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xff,0xd5,0x7e,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, exec_hi +// GFX1250: v_cos_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xff,0xd5,0x7f,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, null +// GFX1250: v_cos_bf16_e64 v5, null ; encoding: [0x05,0x00,0xff,0xd5,0x7c,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, -1 +// GFX1250: v_cos_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xff,0xd5,0xc1,0x00,0x00,0x00] + +v_cos_bf16_e64 v5, 0.5 mul:2 +// GFX1250: v_cos_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xff,0xd5,0xf0,0x00,0x00,0x08] + +v_cos_bf16_e64 v5, src_scc mul:4 +// GFX1250: v_cos_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xff,0xd5,0xfd,0x00,0x00,0x10] + +v_cos_bf16_e64 v255, -|0x8000| clamp div:2 +// GFX1250: v_cos_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xff,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00] + +v_cos_bf16_e64 v5.h, v128.h +// GFX1250: v_cos_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xff,0xd5,0x80,0x01,0x00,0x00] + v_cvt_f32_bf16_e64 v5, v1 // GFX1250: v_cvt_f32_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s index bb6739e..423340c 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s @@ -2,6 +2,62 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s +v_tanh_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_tanh_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_tanh_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 row_mirror +// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 row_half_mirror +// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 row_shl:1 +// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 row_shl:15 +// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 row_shr:1 +// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 row_shr:15 +// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 row_ror:1 +// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 row_ror:15 +// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x9e,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_tanh_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] // GFX1250: v_tanh_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xca,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU @@ -394,6 +450,62 @@ v_sin_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask // GFX1250: v_sin_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfe,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cos_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_cos_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_cos_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 row_mirror +// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 row_half_mirror +// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 row_shl:1 +// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 row_shl:15 +// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 row_shr:1 +// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 row_shr:15 +// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 row_ror:1 +// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 row_ror:15 +// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xff,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] // GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s index 5f6f28e..7968b39 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s @@ -2,6 +2,62 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s +v_tanh_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_tanh_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_tanh_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 row_mirror +// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 row_half_mirror +// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 row_shl:1 +// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 row_shl:15 +// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 row_shr:1 +// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 row_shr:15 +// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 row_ror:1 +// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 row_ror:15 +// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x9e,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_tanh_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] // GFX1250: v_tanh_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xca,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU @@ -422,6 +478,66 @@ v_sin_bf16_e64_dpp v5.h, v128.h quad_perm:[3,2,1,0] // GFX1250: v_sin_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xfe,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cos_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_cos_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_cos_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 row_mirror +// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 row_half_mirror +// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 row_shl:1 +// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 row_shl:15 +// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 row_shr:1 +// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 row_shr:15 +// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 row_ror:1 +// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 row_ror:15 +// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xff,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5.h, v128.h quad_perm:[3,2,1,0] +// GFX1250: v_cos_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xff,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] // GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s index 037e7d6..dd469c2 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s @@ -2,6 +2,22 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s +v_tanh_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_tanh_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x9e,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x9e,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_tanh_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_tanh_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xca,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU @@ -114,6 +130,22 @@ v_sin_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX1250: v_sin_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfe,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cos_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cos_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xff,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xff,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s index 53fb0eb..9fce779 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s @@ -2,6 +2,22 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s +v_tanh_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_tanh_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x9e,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x9e,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_tanh_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_tanh_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xca,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU @@ -142,6 +158,26 @@ v_sin_bf16_e64_dpp v5.h, v128.h dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_sin_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xfe,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cos_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cos_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xff,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xff,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16_e64_dpp v5.h, v128.h dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cos_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xff,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt index fec2207..0a6fc39 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt @@ -29,6 +29,51 @@ 0x6a,0x3a,0x08,0x7e # GFX1250: v_mov_b64_e32 v[4:5], vcc ; encoding: [0x6a,0x3a,0x08,0x7e] +0xff,0x3c,0xfe,0x7f,0x56,0x34,0x12,0xaf +# GFX1250: v_tanh_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x3c,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +0xc1,0x3c,0x0a,0x7e +# GFX1250: v_tanh_f32_e32 v5, -1 ; encoding: [0xc1,0x3c,0x0a,0x7e] + +0xf0,0x3c,0x0a,0x7e +# GFX1250: v_tanh_f32_e32 v5, 0.5 ; encoding: [0xf0,0x3c,0x0a,0x7e] + +0x7f,0x3c,0x0a,0x7e +# GFX1250: v_tanh_f32_e32 v5, exec_hi ; encoding: [0x7f,0x3c,0x0a,0x7e] + +0x7e,0x3c,0x0a,0x7e +# GFX1250: v_tanh_f32_e32 v5, exec_lo ; encoding: [0x7e,0x3c,0x0a,0x7e] + +0x7d,0x3c,0x0a,0x7e +# GFX1250: v_tanh_f32_e32 v5, m0 ; encoding: [0x7d,0x3c,0x0a,0x7e] + +0x7c,0x3c,0x0a,0x7e +# GFX1250: v_tanh_f32_e32 v5, null ; encoding: [0x7c,0x3c,0x0a,0x7e] + +0x01,0x3c,0x0a,0x7e +# GFX1250: v_tanh_f32_e32 v5, s1 ; encoding: [0x01,0x3c,0x0a,0x7e] + +0x69,0x3c,0x0a,0x7e +# GFX1250: v_tanh_f32_e32 v5, s105 ; encoding: [0x69,0x3c,0x0a,0x7e] + +0xfd,0x3c,0x0a,0x7e +# GFX1250: v_tanh_f32_e32 v5, src_scc ; encoding: [0xfd,0x3c,0x0a,0x7e] + +0x7b,0x3c,0x0a,0x7e +# GFX1250: v_tanh_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x3c,0x0a,0x7e] + +0x01,0x3d,0x0a,0x7e +# GFX1250: v_tanh_f32_e32 v5, v1 ; encoding: [0x01,0x3d,0x0a,0x7e] + +0xff,0x3d,0x0a,0x7e +# GFX1250: v_tanh_f32_e32 v5, v255 ; encoding: [0xff,0x3d,0x0a,0x7e] + +0x6b,0x3c,0x0a,0x7e +# GFX1250: v_tanh_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x3c,0x0a,0x7e] + +0x6a,0x3c,0x0a,0x7e +# GFX1250: v_tanh_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x3c,0x0a,0x7e] + 0xff,0x94,0xfe,0x7e,0x00,0x80,0x00,0x00 # GFX1250-REAL16: v_tanh_bf16_e32 v127.l, 0x8000 ; encoding: [0xff,0x94,0xfe,0x7e,0x00,0x80,0x00,0x00] # GFX1250-FAKE16: v_tanh_bf16_e32 v127, 0x8000 ; encoding: [0xff,0x94,0xfe,0x7e,0x00,0x80,0x00,0x00] @@ -470,6 +515,69 @@ 0x81,0xfd,0x0a,0x7f # GFX1250-REAL16: v_sin_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xfd,0x0a,0x7f] +0xff,0xfe,0xfe,0x7e,0x00,0x80,0x00,0x00 +# GFX1250-REAL16: v_cos_bf16_e32 v127.l, 0x8000 ; encoding: [0xff,0xfe,0xfe,0x7e,0x00,0x80,0x00,0x00] +# GFX1250-FAKE16: v_cos_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xfe,0xfe,0x7e,0x00,0x80,0x00,0x00] + +0xc1,0xfe,0x0a,0x7e +# GFX1250-REAL16: v_cos_bf16_e32 v5.l, -1 ; encoding: [0xc1,0xfe,0x0a,0x7e] +# GFX1250-FAKE16: v_cos_bf16_e32 v5, -1 ; encoding: [0xc1,0xfe,0x0a,0x7e] + +0xf0,0xfe,0x0a,0x7e +# GFX1250-REAL16: v_cos_bf16_e32 v5.l, 0.5 ; encoding: [0xf0,0xfe,0x0a,0x7e] +# GFX1250-FAKE16: v_cos_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xfe,0x0a,0x7e] + +0x7f,0xfe,0x0a,0x7e +# GFX1250-REAL16: v_cos_bf16_e32 v5.l, exec_hi ; encoding: [0x7f,0xfe,0x0a,0x7e] +# GFX1250-FAKE16: v_cos_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xfe,0x0a,0x7e] + +0x7e,0xfe,0x0a,0x7e +# GFX1250-REAL16: v_cos_bf16_e32 v5.l, exec_lo ; encoding: [0x7e,0xfe,0x0a,0x7e] +# GFX1250-FAKE16: v_cos_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xfe,0x0a,0x7e] + +0x7d,0xfe,0x0a,0x7e +# GFX1250-REAL16: v_cos_bf16_e32 v5.l, m0 ; encoding: [0x7d,0xfe,0x0a,0x7e] +# GFX1250-FAKE16: v_cos_bf16_e32 v5, m0 ; encoding: [0x7d,0xfe,0x0a,0x7e] + +0x7c,0xfe,0x0a,0x7e +# GFX1250-REAL16: v_cos_bf16_e32 v5.l, null ; encoding: [0x7c,0xfe,0x0a,0x7e] +# GFX1250-FAKE16: v_cos_bf16_e32 v5, null ; encoding: [0x7c,0xfe,0x0a,0x7e] + +0x01,0xfe,0x0a,0x7e +# GFX1250-REAL16: v_cos_bf16_e32 v5.l, s1 ; encoding: [0x01,0xfe,0x0a,0x7e] +# GFX1250-FAKE16: v_cos_bf16_e32 v5, s1 ; encoding: [0x01,0xfe,0x0a,0x7e] + +0x69,0xfe,0x0a,0x7e +# GFX1250-REAL16: v_cos_bf16_e32 v5.l, s105 ; encoding: [0x69,0xfe,0x0a,0x7e] +# GFX1250-FAKE16: v_cos_bf16_e32 v5, s105 ; encoding: [0x69,0xfe,0x0a,0x7e] + +0xfd,0xfe,0x0a,0x7e +# GFX1250-REAL16: v_cos_bf16_e32 v5.l, src_scc ; encoding: [0xfd,0xfe,0x0a,0x7e] +# GFX1250-FAKE16: v_cos_bf16_e32 v5, src_scc ; encoding: [0xfd,0xfe,0x0a,0x7e] + +0x7b,0xfe,0x0a,0x7e +# GFX1250-REAL16: v_cos_bf16_e32 v5.l, ttmp15 ; encoding: [0x7b,0xfe,0x0a,0x7e] +# GFX1250-FAKE16: v_cos_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xfe,0x0a,0x7e] + +0x01,0xff,0x0a,0x7e +# GFX1250-REAL16: v_cos_bf16_e32 v5.l, v1.l ; encoding: [0x01,0xff,0x0a,0x7e] +# GFX1250-FAKE16: v_cos_bf16_e32 v5, v1 ; encoding: [0x01,0xff,0x0a,0x7e] + +0x7f,0xff,0x0a,0x7e +# GFX1250-REAL16: v_cos_bf16_e32 v5.l, v127.l ; encoding: [0x7f,0xff,0x0a,0x7e] +# GFX1250-FAKE16: v_cos_bf16_e32 v5, v127 ; encoding: [0x7f,0xff,0x0a,0x7e] + +0x6b,0xfe,0x0a,0x7e +# GFX1250-REAL16: v_cos_bf16_e32 v5.l, vcc_hi ; encoding: [0x6b,0xfe,0x0a,0x7e] +# GFX1250-FAKE16: v_cos_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xfe,0x0a,0x7e] + +0x6a,0xfe,0x0a,0x7e +# GFX1250-REAL16: v_cos_bf16_e32 v5.l, vcc_lo ; encoding: [0x6a,0xfe,0x0a,0x7e] +# GFX1250-FAKE16: v_cos_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xfe,0x0a,0x7e] + +0x81,0xff,0x0a,0x7f +# GFX1250-REAL16: v_cos_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xff,0x0a,0x7f] + 0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00 # GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt index dc8c6b1..f099ffc 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt @@ -2,6 +2,48 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s +0xfa,0x3c,0xfe,0x7f,0xff,0x6f,0x35,0x30 +# GFX1250: v_tanh_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3c,0xfe,0x7f,0xff,0x6f,0x35,0x30] + +0xfa,0x3c,0x0a,0x7e,0x01,0xe4,0x00,0xff +# GFX1250: v_tanh_f32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +0xfa,0x3c,0x0a,0x7e,0x01,0x1b,0x00,0xff +# GFX1250: v_tanh_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +0xfa,0x3c,0x0a,0x7e,0x01,0x41,0x01,0xff +# GFX1250: v_tanh_f32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x41,0x01,0xff] + +0xfa,0x3c,0x0a,0x7e,0x01,0x40,0x01,0xff +# GFX1250: v_tanh_f32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x40,0x01,0xff] + +0xfa,0x3c,0x0a,0x7e,0x01,0x21,0x01,0xff +# GFX1250: v_tanh_f32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x21,0x01,0xff] + +0xfa,0x3c,0x0a,0x7e,0x01,0x2f,0x01,0xff +# GFX1250: v_tanh_f32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +0xfa,0x3c,0x0a,0x7e,0x01,0x50,0x01,0xff +# GFX1250: v_tanh_f32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x50,0x01,0xff] + +0xfa,0x3c,0x0a,0x7e,0x01,0x5f,0x01,0x01 +# GFX1250: v_tanh_f32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +0xfa,0x3c,0x0a,0x7e,0x01,0x01,0x01,0xff +# GFX1250: v_tanh_f32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x01,0x01,0xff] + +0xfa,0x3c,0x0a,0x7e,0x01,0x0f,0x01,0xff +# GFX1250: v_tanh_f32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +0xfa,0x3c,0x0a,0x7e,0x01,0x11,0x01,0xff +# GFX1250: v_tanh_f32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x11,0x01,0xff] + +0xfa,0x3c,0x0a,0x7e,0x01,0x1f,0x01,0xff +# GFX1250: v_tanh_f32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +0xfa,0x3c,0x0a,0x7e,0x01,0x60,0x09,0x13 +# GFX1250: v_tanh_f32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x60,0x09,0x13] + 0xfa,0x94,0xfe,0x7e,0x7f,0x6f,0x35,0x30 # GFX1250-REAL16: v_tanh_bf16_dpp v127.l, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x94,0xfe,0x7e,0x7f,0x6f,0x35,0x30] # GFX1250-FAKE16: v_tanh_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x94,0xfe,0x7e,0x7f,0x6f,0x35,0x30] @@ -415,6 +457,65 @@ 0xfa,0xfc,0x0a,0x7f,0x81,0x1b,0x00,0xff # GFX1250-REAL16: v_sin_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7f,0x81,0x1b,0x00,0xff] +0xfa,0xfe,0xfe,0x7e,0x7f,0x6f,0x35,0x30 +# GFX1250-REAL16: v_cos_bf16_dpp v127.l, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x7e,0x7f,0x6f,0x35,0x30] +# GFX1250-FAKE16: v_cos_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +0xfa,0xfe,0x0a,0x7e,0x01,0xe4,0x00,0xff +# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +0xfa,0xfe,0x0a,0x7e,0x01,0x1b,0x00,0xff +# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1b,0x00,0xff] +# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +0xfa,0xfe,0x0a,0x7e,0x01,0x41,0x01,0xff +# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x41,0x01,0xff] +# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x41,0x01,0xff] + +0xfa,0xfe,0x0a,0x7e,0x01,0x40,0x01,0xff +# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x40,0x01,0xff] +# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x40,0x01,0xff] + +0xfa,0xfe,0x0a,0x7e,0x01,0x21,0x01,0xff +# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x21,0x01,0xff] +# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x21,0x01,0xff] + +0xfa,0xfe,0x0a,0x7e,0x01,0x2f,0x01,0xff +# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x2f,0x01,0xff] +# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +0xfa,0xfe,0x0a,0x7e,0x01,0x50,0x01,0xff +# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x50,0x01,0xff] +# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x50,0x01,0xff] + +0xfa,0xfe,0x0a,0x7e,0x01,0x5f,0x01,0x01 +# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x5f,0x01,0x01] +# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +0xfa,0xfe,0x0a,0x7e,0x01,0x01,0x01,0xff +# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x01,0x01,0xff] +# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x01,0x01,0xff] + +0xfa,0xfe,0x0a,0x7e,0x01,0x0f,0x01,0xff +# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x0f,0x01,0xff] +# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +0xfa,0xfe,0x0a,0x7e,0x01,0x11,0x01,0xff +# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x11,0x01,0xff] +# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x11,0x01,0xff] + +0xfa,0xfe,0x0a,0x7e,0x01,0x1f,0x01,0xff +# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1f,0x01,0xff] +# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +0xfa,0xfe,0x0a,0x7e,0x01,0x60,0x09,0x13 +# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x60,0x09,0x13] +# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x60,0x09,0x13] + +0xfa,0xfe,0x0a,0x7f,0x81,0x1b,0x00,0xff +# GFX1250-REAL16: v_cos_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7f,0x81,0x1b,0x00,0xff] + 0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30 # GFX1250: v_cvt_f32_bf16_dpp v127, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt index 741bf3f..d86d463 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt @@ -2,6 +2,15 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s +0xe9,0x3c,0xfe,0x7f,0xff,0x00,0x00,0x00 +# GFX1250: v_tanh_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x3c,0xfe,0x7f,0xff,0x00,0x00,0x00] + +0xe9,0x3c,0x0a,0x7e,0x01,0x77,0x39,0x05 +# GFX1250: v_tanh_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x3c,0x0a,0x7e,0x01,0x77,0x39,0x05] + +0xea,0x3c,0x0a,0x7e,0x01,0x77,0x39,0x05 +# GFX1250: v_tanh_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x3c,0x0a,0x7e,0x01,0x77,0x39,0x05] + 0xe9,0x94,0xfe,0x7e,0x7f,0x00,0x00,0x00 # GFX1250-REAL16: v_tanh_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x94,0xfe,0x7e,0x7f,0x00,0x00,0x00] # GFX1250-FAKE16: v_tanh_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x94,0xfe,0x7e,0x7f,0x00,0x00,0x00] @@ -110,6 +119,21 @@ 0xe9,0xfc,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX1250-REAL16: v_sin_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfc,0x0a,0x7f,0x81,0x77,0x39,0x05] +0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00 +# GFX1250-REAL16: v_cos_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00] +# GFX1250-FAKE16: v_cos_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +0xe9,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05] + +0xea,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05] + +0xe9,0xfe,0x0a,0x7f,0x81,0x77,0x39,0x05 +# GFX1250-REAL16: v_cos_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7f,0x81,0x77,0x39,0x05] + 0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00 # GFX1250: v_cvt_f32_bf16_dpp v127, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt index cd9b712..4dc7ed4 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt @@ -2,6 +2,51 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s +0xff,0x81,0x9e,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf +# GFX1250: v_tanh_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x9e,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +0x05,0x00,0x9e,0xd5,0xc1,0x00,0x00,0x00 +# GFX1250: v_tanh_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x9e,0xd5,0xc1,0x00,0x00,0x00] + +0x05,0x00,0x9e,0xd5,0xf0,0x00,0x00,0x08 +# GFX1250: v_tanh_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x9e,0xd5,0xf0,0x00,0x00,0x08] + +0x05,0x00,0x9e,0xd5,0x7f,0x00,0x00,0x00 +# GFX1250: v_tanh_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x9e,0xd5,0x7f,0x00,0x00,0x00] + +0x05,0x00,0x9e,0xd5,0x7e,0x00,0x00,0x00 +# GFX1250: v_tanh_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x9e,0xd5,0x7e,0x00,0x00,0x00] + +0x05,0x00,0x9e,0xd5,0x7d,0x00,0x00,0x00 +# GFX1250: v_tanh_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x9e,0xd5,0x7d,0x00,0x00,0x00] + +0x05,0x00,0x9e,0xd5,0x7c,0x00,0x00,0x00 +# GFX1250: v_tanh_f32_e64 v5, null ; encoding: [0x05,0x00,0x9e,0xd5,0x7c,0x00,0x00,0x00] + +0x05,0x00,0x9e,0xd5,0x01,0x00,0x00,0x00 +# GFX1250: v_tanh_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x9e,0xd5,0x01,0x00,0x00,0x00] + +0x05,0x00,0x9e,0xd5,0x69,0x00,0x00,0x00 +# GFX1250: v_tanh_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x9e,0xd5,0x69,0x00,0x00,0x00] + +0x05,0x00,0x9e,0xd5,0xfd,0x00,0x00,0x10 +# GFX1250: v_tanh_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x9e,0xd5,0xfd,0x00,0x00,0x10] + +0x05,0x00,0x9e,0xd5,0x7b,0x00,0x00,0x00 +# GFX1250: v_tanh_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x9e,0xd5,0x7b,0x00,0x00,0x00] + +0x05,0x00,0x9e,0xd5,0x01,0x01,0x00,0x00 +# GFX1250: v_tanh_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x9e,0xd5,0x01,0x01,0x00,0x00] + +0x05,0x00,0x9e,0xd5,0xff,0x01,0x00,0x00 +# GFX1250: v_tanh_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x9e,0xd5,0xff,0x01,0x00,0x00] + +0x05,0x00,0x9e,0xd5,0x6b,0x00,0x00,0x00 +# GFX1250: v_tanh_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x9e,0xd5,0x6b,0x00,0x00,0x00] + +0x05,0x00,0x9e,0xd5,0x6a,0x00,0x00,0x00 +# GFX1250: v_tanh_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x9e,0xd5,0x6a,0x00,0x00,0x00] + 0xff,0x81,0xca,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00 # GFX1250-REAL16: v_tanh_bf16_e64 v255.l, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xca,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00] # GFX1250-FAKE16: v_tanh_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xca,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00] @@ -450,6 +495,70 @@ # GFX1250-REAL16: v_sin_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xfe,0xd5,0x80,0x01,0x00,0x00] # GFX1250-FAKE16: v_sin_bf16_e64 v5, v128 ; encoding: [0x05,0x00,0xfe,0xd5,0x80,0x01,0x00,0x00] +0xff,0x81,0xff,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00 +# GFX1250-REAL16: v_cos_bf16_e64 v255.l, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xff,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00] +# GFX1250-FAKE16: v_cos_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xff,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00] + +0x05,0x00,0xff,0xd5,0xc1,0x00,0x00,0x00 +# GFX1250-REAL16: v_cos_bf16_e64 v5.l, -1 ; encoding: [0x05,0x00,0xff,0xd5,0xc1,0x00,0x00,0x00] +# GFX1250-FAKE16: v_cos_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xff,0xd5,0xc1,0x00,0x00,0x00] + +0x05,0x00,0xff,0xd5,0xf0,0x00,0x00,0x08 +# GFX1250-REAL16: v_cos_bf16_e64 v5.l, 0.5 mul:2 ; encoding: [0x05,0x00,0xff,0xd5,0xf0,0x00,0x00,0x08] +# GFX1250-FAKE16: v_cos_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xff,0xd5,0xf0,0x00,0x00,0x08] + +0x05,0x00,0xff,0xd5,0x7f,0x00,0x00,0x00 +# GFX1250-REAL16: v_cos_bf16_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0xff,0xd5,0x7f,0x00,0x00,0x00] +# GFX1250-FAKE16: v_cos_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xff,0xd5,0x7f,0x00,0x00,0x00] + +0x05,0x00,0xff,0xd5,0x7e,0x00,0x00,0x00 +# GFX1250-REAL16: v_cos_bf16_e64 v5.l, exec_lo ; encoding: [0x05,0x00,0xff,0xd5,0x7e,0x00,0x00,0x00] +# GFX1250-FAKE16: v_cos_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xff,0xd5,0x7e,0x00,0x00,0x00] + +0x05,0x00,0xff,0xd5,0x7d,0x00,0x00,0x00 +# GFX1250-REAL16: v_cos_bf16_e64 v5.l, m0 ; encoding: [0x05,0x00,0xff,0xd5,0x7d,0x00,0x00,0x00] +# GFX1250-FAKE16: v_cos_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xff,0xd5,0x7d,0x00,0x00,0x00] + +0x05,0x00,0xff,0xd5,0x7c,0x00,0x00,0x00 +# GFX1250-REAL16: v_cos_bf16_e64 v5.l, null ; encoding: [0x05,0x00,0xff,0xd5,0x7c,0x00,0x00,0x00] +# GFX1250-FAKE16: v_cos_bf16_e64 v5, null ; encoding: [0x05,0x00,0xff,0xd5,0x7c,0x00,0x00,0x00] + +0x05,0x00,0xff,0xd5,0x01,0x00,0x00,0x00 +# GFX1250-REAL16: v_cos_bf16_e64 v5.l, s1 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x00,0x00,0x00] +# GFX1250-FAKE16: v_cos_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x00,0x00,0x00] + +0x05,0x00,0xff,0xd5,0x69,0x00,0x00,0x00 +# GFX1250-REAL16: v_cos_bf16_e64 v5.l, s105 ; encoding: [0x05,0x00,0xff,0xd5,0x69,0x00,0x00,0x00] +# GFX1250-FAKE16: v_cos_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xff,0xd5,0x69,0x00,0x00,0x00] + +0x05,0x00,0xff,0xd5,0xfd,0x00,0x00,0x10 +# GFX1250-REAL16: v_cos_bf16_e64 v5.l, src_scc mul:4 ; encoding: [0x05,0x00,0xff,0xd5,0xfd,0x00,0x00,0x10] +# GFX1250-FAKE16: v_cos_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xff,0xd5,0xfd,0x00,0x00,0x10] + +0x05,0x00,0xff,0xd5,0x7b,0x00,0x00,0x00 +# GFX1250-REAL16: v_cos_bf16_e64 v5.l, ttmp15 ; encoding: [0x05,0x00,0xff,0xd5,0x7b,0x00,0x00,0x00] +# GFX1250-FAKE16: v_cos_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xff,0xd5,0x7b,0x00,0x00,0x00] + +0x05,0x00,0xff,0xd5,0x01,0x01,0x00,0x00 +# GFX1250-REAL16: v_cos_bf16_e64 v5.l, v1.l ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cos_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x01,0x00,0x00] + +0x05,0x00,0xff,0xd5,0xff,0x01,0x00,0x00 +# GFX1250-REAL16: v_cos_bf16_e64 v5.l, v255.l ; encoding: [0x05,0x00,0xff,0xd5,0xff,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cos_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xff,0xd5,0xff,0x01,0x00,0x00] + +0x05,0x00,0xff,0xd5,0x6b,0x00,0x00,0x00 +# GFX1250-REAL16: v_cos_bf16_e64 v5.l, vcc_hi ; encoding: [0x05,0x00,0xff,0xd5,0x6b,0x00,0x00,0x00] +# GFX1250-FAKE16: v_cos_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xff,0xd5,0x6b,0x00,0x00,0x00] + +0x05,0x00,0xff,0xd5,0x6a,0x00,0x00,0x00 +# GFX1250-REAL16: v_cos_bf16_e64 v5.l, vcc_lo ; encoding: [0x05,0x00,0xff,0xd5,0x6a,0x00,0x00,0x00] +# GFX1250-FAKE16: v_cos_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xff,0xd5,0x6a,0x00,0x00,0x00] + +0x05,0x48,0xff,0xd5,0x80,0x01,0x00,0x00 +# GFX1250-REAL16: v_cos_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xff,0xd5,0x80,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cos_bf16_e64 v5, v128 ; encoding: [0x05,0x00,0xff,0xd5,0x80,0x01,0x00,0x00] + 0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00 # GFX1250: v_cvt_f32_bf8_e64 v1, 3 ; encoding: [0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt index ed07393..1f03a43 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt @@ -2,6 +2,48 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250-REAL16 %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250-FAKE16 %s +0xff,0x81,0x9e,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30 +# GFX1250: v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x9e,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] + +0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13 +# GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX1250: v_tanh_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX1250: v_tanh_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX1250: v_tanh_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX1250: v_tanh_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX1250: v_tanh_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX1250: v_tanh_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX1250: v_tanh_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + 0xff,0x81,0xfb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30 # GFX1250-REAL16: v_rsq_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] # GFX1250-FAKE16: v_rsq_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] @@ -242,6 +284,66 @@ # GFX1250-REAL16: v_sin_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xfe,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] # GFX1250-FAKE16: v_sin_bf16_e64_dpp v5, v128 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfe,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] +0xff,0x81,0xff,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30 +# GFX1250-REAL16: v_cos_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xff,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xff,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] + +0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13 +# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x48,0xff,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff +# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xff,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v128 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] + 0xff,0x81,0xf9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30 # GFX1250-REAL16: v_rcp_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xf9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] # GFX1250-FAKE16: v_rcp_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xf9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt index a6d6713..e673f9f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt @@ -2,6 +2,18 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250-REAL16 %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250-FAKE16 %s +0xff,0x81,0x9e,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00 +# GFX1250: v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x9e,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +0x05,0x00,0x9e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 +# GFX1250: v_tanh_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +0x05,0x00,0x9e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 +# GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +0x05,0x00,0x9e,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05 +# GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x9e,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + 0xff,0x81,0xfb,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00 # GFX1250-REAL16: v_rsq_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfb,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] # GFX1250-FAKE16: v_rsq_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfb,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] @@ -82,6 +94,26 @@ # GFX1250-REAL16: v_sin_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xfe,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] # GFX1250-FAKE16: v_sin_bf16_e64_dpp v5, v128 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfe,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +0xff,0x81,0xff,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00 +# GFX1250-REAL16: v_cos_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xff,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xff,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +0x05,0x00,0xff,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xff,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xff,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +0x05,0x48,0xff,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05 +# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xff,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v128 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] + 0xff,0x81,0xf9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00 # GFX1250-REAL16: v_rcp_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xf9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] # GFX1250-FAKE16: v_rcp_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xf9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index 1076456..143cc38 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -93,6 +93,13 @@ config.substitutions.append(("%pluginext", config.llvm_plugin_ext)) config.substitutions.append(("%exeext", config.llvm_exe_ext)) config.substitutions.append(("%llvm_src_root", config.llvm_src_root)) +# Add IR2Vec test vocabulary path substitution +config.substitutions.append( + ( + "%ir2vec_test_vocab_dir", + os.path.join(config.test_source_root, "Analysis", "IR2Vec", "Inputs"), + ) +) lli_args = [] # The target triple used by default by lli is the process target triple (some @@ -197,6 +204,7 @@ tools.extend( "llvm-dlltool", "llvm-exegesis", "llvm-extract", + "llvm-ir2vec", "llvm-isel-fuzzer", "llvm-ifs", "llvm-install-name-tool", diff --git a/llvm/test/tools/llvm-ir2vec/embeddings.ll b/llvm/test/tools/llvm-ir2vec/embeddings.ll new file mode 100644 index 0000000..993ea86 --- /dev/null +++ b/llvm/test/tools/llvm-ir2vec/embeddings.ll @@ -0,0 +1,73 @@ +; RUN: llvm-ir2vec --mode=embeddings --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-DEFAULT +; RUN: llvm-ir2vec --mode=embeddings --level=func --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-FUNC-LEVEL +; RUN: llvm-ir2vec --mode=embeddings --level=func --function=abc --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-FUNC-LEVEL-ABC +; RUN: not llvm-ir2vec --mode=embeddings --level=func --function=def --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s 2>&1 | FileCheck %s -check-prefix=CHECK-FUNC-DEF +; RUN: llvm-ir2vec --mode=embeddings --level=bb --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-BB-LEVEL +; RUN: llvm-ir2vec --mode=embeddings --level=bb --function=abc_repeat --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-BB-LEVEL-ABC-REPEAT +; RUN: llvm-ir2vec --mode=embeddings --level=inst --function=abc_repeat --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-INST-LEVEL-ABC-REPEAT + +define dso_local noundef float @abc(i32 noundef %a, float noundef %b) #0 { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca float, align 4 + store i32 %a, ptr %a.addr, align 4 + store float %b, ptr %b.addr, align 4 + %0 = load i32, ptr %a.addr, align 4 + %1 = load i32, ptr %a.addr, align 4 + %mul = mul nsw i32 %0, %1 + %conv = sitofp i32 %mul to float + %2 = load float, ptr %b.addr, align 4 + %add = fadd float %conv, %2 + ret float %add +} + +define dso_local noundef float @abc_repeat(i32 noundef %a, float noundef %b) #0 { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca float, align 4 + store i32 %a, ptr %a.addr, align 4 + store float %b, ptr %b.addr, align 4 + %0 = load i32, ptr %a.addr, align 4 + %1 = load i32, ptr %a.addr, align 4 + %mul = mul nsw i32 %0, %1 + %conv = sitofp i32 %mul to float + %2 = load float, ptr %b.addr, align 4 + %add = fadd float %conv, %2 + ret float %add +} + +; CHECK-DEFAULT: Function: abc +; CHECK-DEFAULT-NEXT: [ 878.00 889.00 900.00 ] +; CHECK-DEFAULT-NEXT: Function: abc_repeat +; CHECK-DEFAULT-NEXT: [ 878.00 889.00 900.00 ] + +; CHECK-FUNC-LEVEL: Function: abc +; CHECK-FUNC-LEVEL-NEXT: [ 878.00 889.00 900.00 ] +; CHECK-FUNC-LEVEL-NEXT: Function: abc_repeat +; CHECK-FUNC-LEVEL-NEXT: [ 878.00 889.00 900.00 ] + +; CHECK-FUNC-LEVEL-ABC: Function: abc +; CHECK-FUNC-LEVEL-NEXT-ABC: [ 878.00 889.00 900.00 ] + +; CHECK-FUNC-DEF: Error: Function 'def' not found + +; CHECK-BB-LEVEL: Function: abc +; CHECK-BB-LEVEL-NEXT: entry: [ 878.00 889.00 900.00 ] +; CHECK-BB-LEVEL-NEXT: Function: abc_repeat +; CHECK-BB-LEVEL-NEXT: entry: [ 878.00 889.00 900.00 ] + +; CHECK-BB-LEVEL-ABC-REPEAT: Function: abc_repeat +; CHECK-BB-LEVEL-ABC-REPEAT-NEXT: entry: [ 878.00 889.00 900.00 ] + +; CHECK-INST-LEVEL-ABC-REPEAT: Function: abc_repeat +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %a.addr = alloca i32, align 4 [ 91.00 92.00 93.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %b.addr = alloca float, align 4 [ 91.00 92.00 93.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: store i32 %a, ptr %a.addr, align 4 [ 97.00 98.00 99.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: store float %b, ptr %b.addr, align 4 [ 97.00 98.00 99.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %0 = load i32, ptr %a.addr, align 4 [ 94.00 95.00 96.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %1 = load i32, ptr %a.addr, align 4 [ 94.00 95.00 96.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %mul = mul nsw i32 %0, %1 [ 49.00 50.00 51.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %conv = sitofp i32 %mul to float [ 130.00 131.00 132.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %2 = load float, ptr %b.addr, align 4 [ 94.00 95.00 96.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %add = fadd float %conv, %2 [ 40.00 41.00 42.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: ret float %add [ 1.00 2.00 3.00 ] diff --git a/llvm/test/tools/llvm-ir2vec/triplets.ll b/llvm/test/tools/llvm-ir2vec/triplets.ll new file mode 100644 index 0000000..d1ef5b3 --- /dev/null +++ b/llvm/test/tools/llvm-ir2vec/triplets.ll @@ -0,0 +1,38 @@ +; RUN: llvm-ir2vec --mode=triplets %s | FileCheck %s -check-prefix=TRIPLETS + +define i32 @simple_add(i32 %a, i32 %b) { +entry: + %add = add i32 %a, %b + ret i32 %add +} + +define i32 @simple_mul(i32 %x, i32 %y) { +entry: + %mul = mul i32 %x, %y + ret i32 %mul +} + +define i32 @test_function(i32 %arg1, i32 %arg2) { +entry: + %local1 = alloca i32, align 4 + %local2 = alloca i32, align 4 + store i32 %arg1, ptr %local1, align 4 + store i32 %arg2, ptr %local2, align 4 + %load1 = load i32, ptr %local1, align 4 + %load2 = load i32, ptr %local2, align 4 + %result = add i32 %load1, %load2 + ret i32 %result +} + +; TRIPLETS: Add IntegerTy Variable Variable +; TRIPLETS-NEXT: Ret VoidTy Variable +; TRIPLETS-NEXT: Mul IntegerTy Variable Variable +; TRIPLETS-NEXT: Ret VoidTy Variable +; TRIPLETS-NEXT: Alloca PointerTy Constant +; TRIPLETS-NEXT: Alloca PointerTy Constant +; TRIPLETS-NEXT: Store VoidTy Variable Pointer +; TRIPLETS-NEXT: Store VoidTy Variable Pointer +; TRIPLETS-NEXT: Load IntegerTy Pointer +; TRIPLETS-NEXT: Load IntegerTy Pointer +; TRIPLETS-NEXT: Add IntegerTy Variable Variable +; TRIPLETS-NEXT: Ret VoidTy Variable |