aboutsummaryrefslogtreecommitdiff
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CMakeLists.txt1
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir30
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir24
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir22
-rw-r--r--llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll322
-rw-r--r--llvm/test/CodeGen/AMDGPU/insert-delay-alu-wmma-xdl.mir17
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.bf16.ll33
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll87
-rw-r--r--llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu-flat.mir357
-rw-r--r--llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir56
-rw-r--r--llvm/test/CodeGen/DirectX/flatten-array.ll22
-rw-r--r--llvm/test/CodeGen/DirectX/legalize-load-store-array-alloca.ll18
-rw-r--r--llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll9
-rw-r--r--llvm/test/CodeGen/DirectX/scalar-store.ll6
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll6
-rw-r--r--llvm/test/CodeGen/SPIRV/transcoding/builtin_calls.ll100
-rw-r--r--llvm/test/CodeGen/X86/invalid-operand-bundle-call.ll4
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s90
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s93
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s112
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s116
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s24
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s28
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s90
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s93
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s112
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s116
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s32
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s36
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt108
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt101
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt24
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt109
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt102
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt32
-rw-r--r--llvm/test/lit.cfg.py8
-rw-r--r--llvm/test/tools/llvm-ir2vec/embeddings.ll73
-rw-r--r--llvm/test/tools/llvm-ir2vec/triplets.ll38
38 files changed, 2334 insertions, 317 deletions
diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index 2a6135d..3426b6f 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -97,6 +97,7 @@ set(LLVM_TEST_DEPENDS
llvm-exegesis
llvm-extract
llvm-gsymutil
+ llvm-ir2vec
llvm-isel-fuzzer
llvm-ifs
llvm-install-name-tool
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir
index cebdffc..eba64b8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir
@@ -223,37 +223,37 @@ body: |
; GFX7-LABEL: name: load_atomic_flat_v2s32_seq_cst
; GFX7: liveins: $vgpr0_vgpr1
; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>))
- ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>))
+ ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
;
; GFX9-LABEL: name: load_atomic_flat_v2s32_seq_cst
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>))
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>))
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
;
; GFX10-LABEL: name: load_atomic_flat_v2s32_seq_cst
; GFX10: liveins: $vgpr0_vgpr1
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>))
- ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>))
+ ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
;
; GFX11-LABEL: name: load_atomic_flat_v2s32_seq_cst
; GFX11: liveins: $vgpr0_vgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>))
- ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>))
+ ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
;
; GFX12-LABEL: name: load_atomic_flat_v2s32_seq_cst
; GFX12: liveins: $vgpr0_vgpr1
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>))
- ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>))
+ ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 0)
$vgpr0_vgpr1 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir
index eafc96d..474f130 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir
@@ -252,30 +252,30 @@ body: |
; GFX7-LABEL: name: load_atomic_global_v2s32_seq_cst
; GFX7: liveins: $vgpr0_vgpr1
; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1)
- ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>), addrspace 1)
+ ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
;
; GFX7-FLAT-LABEL: name: load_atomic_global_v2s32_seq_cst
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
;
; GFX9-LABEL: name: load_atomic_global_v2s32_seq_cst
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1)
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (<2 x s32>), addrspace 1)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
;
; GFX10-LABEL: name: load_atomic_global_v2s32_seq_cst
; GFX10: liveins: $vgpr0_vgpr1
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1)
- ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (<2 x s32>), addrspace 1)
+ ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 1)
$vgpr0_vgpr1 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir
index 2675295..ae010a8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir
@@ -22,6 +22,7 @@ body: |
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2
; GFX7-NEXT: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s32))
+ ;
; GFX9-LABEL: name: atomic_store_flat_s32_seq_cst
; GFX9: liveins: $vgpr0, $vgpr1_vgpr2
; GFX9-NEXT: {{ $}}
@@ -51,6 +52,7 @@ body: |
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2
; GFX7-NEXT: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p0) :: (store seq_cst (<2 x s16>))
+ ;
; GFX9-LABEL: name: atomic_store_flat_v2s16_seq_cst
; GFX9: liveins: $vgpr0, $vgpr1_vgpr2
; GFX9-NEXT: {{ $}}
@@ -80,6 +82,7 @@ body: |
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2
; GFX7-NEXT: G_STORE [[COPY]](p3), [[COPY1]](p0) :: (store seq_cst (p3))
+ ;
; GFX9-LABEL: name: atomic_store_flat_p3_seq_cst
; GFX9: liveins: $vgpr0, $vgpr1_vgpr2
; GFX9-NEXT: {{ $}}
@@ -109,6 +112,7 @@ body: |
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2
; GFX7-NEXT: G_STORE [[COPY]](p5), [[COPY1]](p0) :: (store seq_cst (p5))
+ ;
; GFX9-LABEL: name: atomic_store_flat_p5_seq_cst
; GFX9: liveins: $vgpr0, $vgpr1_vgpr2
; GFX9-NEXT: {{ $}}
@@ -138,6 +142,7 @@ body: |
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2
; GFX7-NEXT: G_STORE [[COPY]](p6), [[COPY1]](p0) :: (store seq_cst (p6))
+ ;
; GFX9-LABEL: name: atomic_store_flat_p6_seq_cst
; GFX9: liveins: $vgpr0, $vgpr1_vgpr2
; GFX9-NEXT: {{ $}}
@@ -167,6 +172,7 @@ body: |
; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s64))
+ ;
; GFX9-LABEL: name: atomic_store_flat_s64_seq_cst
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
@@ -193,15 +199,16 @@ body: |
; GFX7-LABEL: name: atomic_store_flat_v2s32_seq_cst
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
- ; GFX7-NEXT: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p0) :: (store seq_cst (<2 x s32>))
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (<2 x s32>))
+ ;
; GFX9-LABEL: name: atomic_store_flat_v2s32_seq_cst
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
- ; GFX9-NEXT: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p0) :: (store seq_cst (<2 x s32>))
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (<2 x s32>))
%0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
%1:vgpr(p0) = COPY $vgpr2_vgpr3
G_STORE %0, %1 :: (store seq_cst (<2 x s32>), align 8, addrspace 0)
@@ -225,6 +232,7 @@ body: |
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
; GFX7-NEXT: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p0) :: (store seq_cst (<4 x s16>))
+ ;
; GFX9-LABEL: name: atomic_store_flat_v4s16_seq_cst
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
@@ -254,6 +262,7 @@ body: |
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
; GFX7-NEXT: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store seq_cst (p0))
+ ;
; GFX9-LABEL: name: atomic_store_flat_p0_seq_cst
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
@@ -282,6 +291,7 @@ body: |
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
; GFX7-NEXT: G_STORE [[COPY]](p1), [[COPY1]](p0) :: (store seq_cst (p1))
+ ;
; GFX9-LABEL: name: atomic_store_flat_p1_seq_cst
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll b/llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll
index f0988a1..f54fbba 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll
@@ -998,24 +998,11 @@ define amdgpu_ps <2 x half> @flat_load_saddr_p3_immneg128(ptr inreg %sbase, i32
}
define amdgpu_ps <2 x float> @flat_load_saddr_f64(ptr inreg %sbase, i32 %voffset) {
-; GFX1250-SDAG-LABEL: flat_load_saddr_f64:
-; GFX1250-SDAG: ; %bb.0:
-; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
-; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
-; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1]
-; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-SDAG-NEXT: ; return to shader part epilog
-;
-; GFX1250-GISEL-LABEL: flat_load_saddr_f64:
-; GFX1250-GISEL: ; %bb.0:
-; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
-; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
-; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo
-; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1]
-; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-GISEL-NEXT: ; return to shader part epilog
+; GFX1250-LABEL: flat_load_saddr_f64:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%load = load double, ptr %gep0
@@ -1024,24 +1011,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_f64(ptr inreg %sbase, i32 %voffset
}
define amdgpu_ps <2 x float> @flat_load_saddr_f64_immneg128(ptr inreg %sbase, i32 %voffset) {
-; GFX1250-SDAG-LABEL: flat_load_saddr_f64_immneg128:
-; GFX1250-SDAG: ; %bb.0:
-; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
-; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
-; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128
-; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-SDAG-NEXT: ; return to shader part epilog
-;
-; GFX1250-GISEL-LABEL: flat_load_saddr_f64_immneg128:
-; GFX1250-GISEL: ; %bb.0:
-; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
-; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
-; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo
-; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128
-; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-GISEL-NEXT: ; return to shader part epilog
+; GFX1250-LABEL: flat_load_saddr_f64_immneg128:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
@@ -1051,24 +1025,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_f64_immneg128(ptr inreg %sbase, i3
}
define amdgpu_ps <2 x float> @flat_load_saddr_i64(ptr inreg %sbase, i32 %voffset) {
-; GFX1250-SDAG-LABEL: flat_load_saddr_i64:
-; GFX1250-SDAG: ; %bb.0:
-; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
-; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
-; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1]
-; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-SDAG-NEXT: ; return to shader part epilog
-;
-; GFX1250-GISEL-LABEL: flat_load_saddr_i64:
-; GFX1250-GISEL: ; %bb.0:
-; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
-; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
-; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo
-; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1]
-; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-GISEL-NEXT: ; return to shader part epilog
+; GFX1250-LABEL: flat_load_saddr_i64:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%load = load i64, ptr %gep0
@@ -1077,24 +1038,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_i64(ptr inreg %sbase, i32 %voffset
}
define amdgpu_ps <2 x float> @flat_load_saddr_i64_immneg128(ptr inreg %sbase, i32 %voffset) {
-; GFX1250-SDAG-LABEL: flat_load_saddr_i64_immneg128:
-; GFX1250-SDAG: ; %bb.0:
-; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
-; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
-; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128
-; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-SDAG-NEXT: ; return to shader part epilog
-;
-; GFX1250-GISEL-LABEL: flat_load_saddr_i64_immneg128:
-; GFX1250-GISEL: ; %bb.0:
-; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
-; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
-; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo
-; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128
-; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-GISEL-NEXT: ; return to shader part epilog
+; GFX1250-LABEL: flat_load_saddr_i64_immneg128:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
@@ -1104,24 +1052,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_i64_immneg128(ptr inreg %sbase, i3
}
define amdgpu_ps <2 x float> @flat_load_saddr_v2f32(ptr inreg %sbase, i32 %voffset) {
-; GFX1250-SDAG-LABEL: flat_load_saddr_v2f32:
-; GFX1250-SDAG: ; %bb.0:
-; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
-; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
-; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1]
-; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-SDAG-NEXT: ; return to shader part epilog
-;
-; GFX1250-GISEL-LABEL: flat_load_saddr_v2f32:
-; GFX1250-GISEL: ; %bb.0:
-; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
-; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
-; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo
-; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1]
-; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-GISEL-NEXT: ; return to shader part epilog
+; GFX1250-LABEL: flat_load_saddr_v2f32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%load = load <2 x float>, ptr %gep0
@@ -1129,24 +1064,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v2f32(ptr inreg %sbase, i32 %voffs
}
define amdgpu_ps <2 x float> @flat_load_saddr_v2f32_immneg128(ptr inreg %sbase, i32 %voffset) {
-; GFX1250-SDAG-LABEL: flat_load_saddr_v2f32_immneg128:
-; GFX1250-SDAG: ; %bb.0:
-; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
-; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
-; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128
-; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-SDAG-NEXT: ; return to shader part epilog
-;
-; GFX1250-GISEL-LABEL: flat_load_saddr_v2f32_immneg128:
-; GFX1250-GISEL: ; %bb.0:
-; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
-; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
-; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo
-; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128
-; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-GISEL-NEXT: ; return to shader part epilog
+; GFX1250-LABEL: flat_load_saddr_v2f32_immneg128:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
@@ -1155,24 +1077,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v2f32_immneg128(ptr inreg %sbase,
}
define amdgpu_ps <2 x float> @flat_load_saddr_v2i32(ptr inreg %sbase, i32 %voffset) {
-; GFX1250-SDAG-LABEL: flat_load_saddr_v2i32:
-; GFX1250-SDAG: ; %bb.0:
-; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
-; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
-; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1]
-; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-SDAG-NEXT: ; return to shader part epilog
-;
-; GFX1250-GISEL-LABEL: flat_load_saddr_v2i32:
-; GFX1250-GISEL: ; %bb.0:
-; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
-; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
-; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo
-; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1]
-; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-GISEL-NEXT: ; return to shader part epilog
+; GFX1250-LABEL: flat_load_saddr_v2i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%load = load <2 x i32>, ptr %gep0
@@ -1181,24 +1090,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v2i32(ptr inreg %sbase, i32 %voffs
}
define amdgpu_ps <2 x float> @flat_load_saddr_v2i32_immneg128(ptr inreg %sbase, i32 %voffset) {
-; GFX1250-SDAG-LABEL: flat_load_saddr_v2i32_immneg128:
-; GFX1250-SDAG: ; %bb.0:
-; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
-; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
-; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128
-; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-SDAG-NEXT: ; return to shader part epilog
-;
-; GFX1250-GISEL-LABEL: flat_load_saddr_v2i32_immneg128:
-; GFX1250-GISEL: ; %bb.0:
-; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
-; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
-; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo
-; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128
-; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-GISEL-NEXT: ; return to shader part epilog
+; GFX1250-LABEL: flat_load_saddr_v2i32_immneg128:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
@@ -1208,24 +1104,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v2i32_immneg128(ptr inreg %sbase,
}
define amdgpu_ps <2 x float> @flat_load_saddr_v4i16(ptr inreg %sbase, i32 %voffset) {
-; GFX1250-SDAG-LABEL: flat_load_saddr_v4i16:
-; GFX1250-SDAG: ; %bb.0:
-; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
-; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
-; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1]
-; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-SDAG-NEXT: ; return to shader part epilog
-;
-; GFX1250-GISEL-LABEL: flat_load_saddr_v4i16:
-; GFX1250-GISEL: ; %bb.0:
-; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
-; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
-; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo
-; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1]
-; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-GISEL-NEXT: ; return to shader part epilog
+; GFX1250-LABEL: flat_load_saddr_v4i16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%load = load <4 x i16>, ptr %gep0
@@ -1234,24 +1117,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v4i16(ptr inreg %sbase, i32 %voffs
}
define amdgpu_ps <2 x float> @flat_load_saddr_v4i16_immneg128(ptr inreg %sbase, i32 %voffset) {
-; GFX1250-SDAG-LABEL: flat_load_saddr_v4i16_immneg128:
-; GFX1250-SDAG: ; %bb.0:
-; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
-; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
-; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128
-; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-SDAG-NEXT: ; return to shader part epilog
-;
-; GFX1250-GISEL-LABEL: flat_load_saddr_v4i16_immneg128:
-; GFX1250-GISEL: ; %bb.0:
-; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
-; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
-; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo
-; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128
-; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-GISEL-NEXT: ; return to shader part epilog
+; GFX1250-LABEL: flat_load_saddr_v4i16_immneg128:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
@@ -1261,24 +1131,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v4i16_immneg128(ptr inreg %sbase,
}
define amdgpu_ps <2 x float> @flat_load_saddr_v4f16(ptr inreg %sbase, i32 %voffset) {
-; GFX1250-SDAG-LABEL: flat_load_saddr_v4f16:
-; GFX1250-SDAG: ; %bb.0:
-; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
-; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
-; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1]
-; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-SDAG-NEXT: ; return to shader part epilog
-;
-; GFX1250-GISEL-LABEL: flat_load_saddr_v4f16:
-; GFX1250-GISEL: ; %bb.0:
-; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
-; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
-; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo
-; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1]
-; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-GISEL-NEXT: ; return to shader part epilog
+; GFX1250-LABEL: flat_load_saddr_v4f16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%load = load <4 x half>, ptr %gep0
@@ -1287,24 +1144,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v4f16(ptr inreg %sbase, i32 %voffs
}
define amdgpu_ps <2 x float> @flat_load_saddr_v4f16_immneg128(ptr inreg %sbase, i32 %voffset) {
-; GFX1250-SDAG-LABEL: flat_load_saddr_v4f16_immneg128:
-; GFX1250-SDAG: ; %bb.0:
-; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
-; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
-; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128
-; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-SDAG-NEXT: ; return to shader part epilog
-;
-; GFX1250-GISEL-LABEL: flat_load_saddr_v4f16_immneg128:
-; GFX1250-GISEL: ; %bb.0:
-; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
-; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
-; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo
-; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128
-; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-GISEL-NEXT: ; return to shader part epilog
+; GFX1250-LABEL: flat_load_saddr_v4f16_immneg128:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
@@ -1314,24 +1158,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v4f16_immneg128(ptr inreg %sbase,
}
define amdgpu_ps <2 x float> @flat_load_saddr_p1(ptr inreg %sbase, i32 %voffset) {
-; GFX1250-SDAG-LABEL: flat_load_saddr_p1:
-; GFX1250-SDAG: ; %bb.0:
-; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
-; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
-; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1]
-; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-SDAG-NEXT: ; return to shader part epilog
-;
-; GFX1250-GISEL-LABEL: flat_load_saddr_p1:
-; GFX1250-GISEL: ; %bb.0:
-; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
-; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
-; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo
-; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1]
-; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-GISEL-NEXT: ; return to shader part epilog
+; GFX1250-LABEL: flat_load_saddr_p1:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%load = load ptr, ptr %gep0
@@ -1341,24 +1172,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_p1(ptr inreg %sbase, i32 %voffset)
}
define amdgpu_ps <2 x float> @flat_load_saddr_p1_immneg128(ptr inreg %sbase, i32 %voffset) {
-; GFX1250-SDAG-LABEL: flat_load_saddr_p1_immneg128:
-; GFX1250-SDAG: ; %bb.0:
-; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
-; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
-; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128
-; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-SDAG-NEXT: ; return to shader part epilog
-;
-; GFX1250-GISEL-LABEL: flat_load_saddr_p1_immneg128:
-; GFX1250-GISEL: ; %bb.0:
-; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
-; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
-; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo
-; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128
-; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-GISEL-NEXT: ; return to shader part epilog
+; GFX1250-LABEL: flat_load_saddr_p1_immneg128:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-wmma-xdl.mir b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-wmma-xdl.mir
index 7c3170d..0abf347 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-wmma-xdl.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-wmma-xdl.mir
@@ -65,20 +65,3 @@ body: |
$vgpr12 = V_EXP_F32_e32 $vgpr12, implicit $exec, implicit $mode
$vgpr13 = V_ADD_U32_e32 $vgpr13, $vgpr8, implicit $exec
...
-
----
-name: dot_xdl_dep_2
-tracksRegLiveness: true
-body: |
- bb.0:
- ; CHECK-LABEL: {{^}}dot_xdl_dep_2:
- ; CHECK: %bb.0:
- ; CHECK-NEXT: v_dot4_i32_iu8 v0, s2, s3, v0 neg_lo:[1,1,0]
- ; CHECK-NEXT: v_dot4_i32_iu8 v1, s2, s3, v2 neg_lo:[1,1,0]
- ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2)
- ; CHECK-NEXT: v_add_nc_u32_e32 v2, v0, v0
- liveins: $vgpr0, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2
- $vgpr0 = V_DOT4_I32_IU8 9, $sgpr2, 9, $sgpr3, 8, $vgpr0, 0, 0, 0, implicit $exec
- $vgpr1 = V_DOT4_I32_IU8 9, $sgpr2, 9, $sgpr3, 8, $vgpr2, 0, 0, 0, implicit $exec
- $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.bf16.ll
new file mode 100644
index 0000000..091859f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.bf16.ll
@@ -0,0 +1,33 @@
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN %s
+; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
+
+; FIXME: GlobalISel does not work with bf16
+
+declare bfloat @llvm.amdgcn.cos.bf16(bfloat) #0
+
+; GCN-LABEL: {{^}}cos_bf16:
+; GCN: v_cos_bf16_e32 {{v[0-9]+}}, {{s[0-9]+}}
+define amdgpu_kernel void @cos_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
+ %cos = call bfloat @llvm.amdgcn.cos.bf16(bfloat %src) #0
+ store bfloat %cos, ptr addrspace(1) %out, align 2
+ ret void
+}
+
+; GCN-LABEL: {{^}}cos_bf16_constant_4
+; GCN: v_cos_bf16_e32 v0, 4.0
+define amdgpu_kernel void @cos_bf16_constant_4(ptr addrspace(1) %out) #1 {
+ %cos = call bfloat @llvm.amdgcn.cos.bf16(bfloat 4.0) #0
+ store bfloat %cos, ptr addrspace(1) %out, align 2
+ ret void
+}
+
+; GCN-LABEL: {{^}}cos_bf16_constant_100
+; GCN: v_cos_bf16_e32 {{v[0-9]+}}, 0x42c8
+define amdgpu_kernel void @cos_bf16_constant_100(ptr addrspace(1) %out) #1 {
+ %cos = call bfloat @llvm.amdgcn.cos.bf16(bfloat 100.0) #0
+ store bfloat %cos, ptr addrspace(1) %out, align 2
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll
index 344c011..81db735 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll
@@ -1,14 +1,97 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; xUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefix=SDAG-REAL16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefix=SDAG-REAL16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefix=SDAG-FAKE16 %s
; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefix=GISEL-REAL16 %s
; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefix=GISEL-FAKE16 %s
-; FIXME: t16 doesn't work at the moment because the store of s16 under t16 mode fails to select.
; FIXME: GlobalISel does not work with bf16
+declare float @llvm.amdgcn.tanh.f32(float) #0
declare bfloat @llvm.amdgcn.tanh.bf16(bfloat) #0
+define amdgpu_kernel void @tanh_f32(ptr addrspace(1) %out, float %src) #1 {
+; SDAG-REAL16-LABEL: tanh_f32:
+; SDAG-REAL16: ; %bb.0:
+; SDAG-REAL16-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
+; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
+; SDAG-REAL16-NEXT: v_tanh_f32_e32 v0, s2
+; SDAG-REAL16-NEXT: global_store_b32 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: s_endpgm
+;
+; SDAG-FAKE16-LABEL: tanh_f32:
+; SDAG-FAKE16: ; %bb.0:
+; SDAG-FAKE16-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
+; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; SDAG-FAKE16-NEXT: v_tanh_f32_e32 v0, s2
+; SDAG-FAKE16-NEXT: global_store_b32 v1, v0, s[0:1]
+; SDAG-FAKE16-NEXT: s_endpgm
+ %tanh = call float @llvm.amdgcn.tanh.f32(float %src) #0
+ store float %tanh, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+; TODO: Really these should be constant folded
+define amdgpu_kernel void @tanh_f32_constant_4.0(ptr addrspace(1) %out) #1 {
+; SDAG-REAL16-LABEL: tanh_f32_constant_4.0:
+; SDAG-REAL16: ; %bb.0:
+; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; SDAG-REAL16-NEXT: v_tanh_f32_e32 v0, 4.0
+; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
+; SDAG-REAL16-NEXT: global_store_b32 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: s_endpgm
+;
+; SDAG-FAKE16-LABEL: tanh_f32_constant_4.0:
+; SDAG-FAKE16: ; %bb.0:
+; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; SDAG-FAKE16-NEXT: v_tanh_f32_e32 v0, 4.0
+; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; SDAG-FAKE16-NEXT: global_store_b32 v1, v0, s[0:1]
+; SDAG-FAKE16-NEXT: s_endpgm
+ %tanh = call float @llvm.amdgcn.tanh.f32(float 4.0) #0
+ store float %tanh, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @tanh_f32_constant_100.0(ptr addrspace(1) %out) #1 {
+; SDAG-REAL16-LABEL: tanh_f32_constant_100.0:
+; SDAG-REAL16: ; %bb.0:
+; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; SDAG-REAL16-NEXT: v_tanh_f32_e32 v0, 0x42c80000
+; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
+; SDAG-REAL16-NEXT: global_store_b32 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: s_endpgm
+;
+; SDAG-FAKE16-LABEL: tanh_f32_constant_100.0:
+; SDAG-FAKE16: ; %bb.0:
+; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; SDAG-FAKE16-NEXT: v_tanh_f32_e32 v0, 0x42c80000
+; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; SDAG-FAKE16-NEXT: global_store_b32 v1, v0, s[0:1]
+; SDAG-FAKE16-NEXT: s_endpgm
+ %tanh = call float @llvm.amdgcn.tanh.f32(float 100.0) #0
+ store float %tanh, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @tanh_undef_f32(ptr addrspace(1) %out) #1 {
+; SDAG-REAL16-LABEL: tanh_undef_f32:
+; SDAG-REAL16: ; %bb.0:
+; SDAG-REAL16-NEXT: s_endpgm
+;
+; SDAG-FAKE16-LABEL: tanh_undef_f32:
+; SDAG-FAKE16: ; %bb.0:
+; SDAG-FAKE16-NEXT: s_endpgm
+ %tanh = call float @llvm.amdgcn.tanh.f32(float undef)
+ store float %tanh, ptr addrspace(1) %out, align 4
+ ret void
+}
+
define amdgpu_kernel void @tanh_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
; SDAG-REAL16-LABEL: tanh_bf16:
; SDAG-REAL16: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu-flat.mir b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu-flat.mir
new file mode 100644
index 0000000..95ccf6c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu-flat.mir
@@ -0,0 +1,357 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=si-fix-sgpr-copies -o - %s | FileCheck --check-prefix=GCN %s
+
+---
+name: flat_load_saddr_to_valu
+tracksRegLiveness: true
+body: |
+ ; GCN-LABEL: name: flat_load_saddr_to_valu
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64_align2 = PHI [[COPY]], %bb.0, %7, %bb.1
+ ; GCN-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PHI]], 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
+ ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
+ ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
+ ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE]], 0, implicit $exec
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]], implicit $exec
+ ; GCN-NEXT: $vcc_lo = S_AND_B32 $exec_lo, [[V_CMP_NE_U64_e64_]], implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ %0:sreg_64 = COPY $vgpr0_vgpr1
+
+ bb.1:
+ %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1
+ %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %4:vgpr_32 = FLAT_LOAD_DWORD_SADDR %1, %3, 0, 0, implicit $exec, implicit $flat_scr
+ %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
+ S_CMP_LG_U64 %2, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit $scc
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: flat_load_saddr_to_valu_non_zero_vaddr
+tracksRegLiveness: true
+body: |
+ ; GCN-LABEL: name: flat_load_saddr_to_valu_non_zero_vaddr
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64_align2 = PHI [[COPY]], %bb.0, %7, %bb.1
+ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+ ; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
+ ; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GCN-NEXT: [[FLAT_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
+ ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
+ ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
+ ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
+ ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE1]], 0, implicit $exec
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]], implicit $exec
+ ; GCN-NEXT: $vcc_lo = S_AND_B32 $exec_lo, [[V_CMP_NE_U64_e64_]], implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ %0:sreg_64 = COPY $vgpr0_vgpr1
+
+ bb.1:
+ %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1
+ %3:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+ %4:vgpr_32 = FLAT_LOAD_DWORD_SADDR %1, %3, 0, 0, implicit $exec, implicit $flat_scr
+ %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
+ S_CMP_LG_U64 %2, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit $scc
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+
+---
+name: flat_load_saddr_to_valu_undef_vaddr
+tracksRegLiveness: true
+body: |
+ ; GCN-LABEL: name: flat_load_saddr_to_valu_undef_vaddr
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64_align2 = PHI [[COPY]], %bb.0, %7, %bb.1
+ ; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
+ ; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GCN-NEXT: [[FLAT_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD_SADDR [[REG_SEQUENCE]], undef %4:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
+ ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
+ ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
+ ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
+ ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE1]], 0, implicit $exec
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]], implicit $exec
+ ; GCN-NEXT: $vcc_lo = S_AND_B32 $exec_lo, [[V_CMP_NE_U64_e64_]], implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ %0:sreg_64 = COPY $vgpr0_vgpr1
+
+ bb.1:
+ %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1
+ %4:vgpr_32 = FLAT_LOAD_DWORD_SADDR %1, undef %3:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr
+ %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
+ S_CMP_LG_U64 %2, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit $scc
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: flat_store_saddr_to_valu
+tracksRegLiveness: true
+body: |
+ ; GCN-LABEL: name: flat_store_saddr_to_valu
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64_align2 = PHI [[COPY]], %bb.0, %7, %bb.1
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: FLAT_STORE_DWORD [[PHI]], [[DEF]], 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
+ ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
+ ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
+ ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE]], 0, implicit $exec
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]], implicit $exec
+ ; GCN-NEXT: $vcc_lo = S_AND_B32 $exec_lo, [[V_CMP_NE_U64_e64_]], implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ %0:sreg_64 = COPY $vgpr0_vgpr1
+
+ bb.1:
+ %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1
+ %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %4:vgpr_32 = IMPLICIT_DEF
+ FLAT_STORE_DWORD_SADDR %3, %4, %1, 0, 0, implicit $exec, implicit $flat_scr
+ %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
+ S_CMP_LG_U64 %2, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit $scc
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: flat_atomic_noret_saddr_to_valu
+tracksRegLiveness: true
+body: |
+ ; GCN-LABEL: name: flat_atomic_noret_saddr_to_valu
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64_align2 = PHI [[COPY]], %bb.0, %6, %bb.1
+ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: FLAT_ATOMIC_ADD [[PHI]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
+ ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
+ ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
+ ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE]], 0, implicit $exec
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]], implicit $exec
+ ; GCN-NEXT: $vcc_lo = S_AND_B32 $exec_lo, [[V_CMP_NE_U64_e64_]], implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ %0:sreg_64 = COPY $vgpr0_vgpr1
+
+ bb.1:
+ %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1
+ %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ FLAT_ATOMIC_ADD_SADDR %3, %3, %1, 0, 0, implicit $exec, implicit $flat_scr
+ %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
+ S_CMP_LG_U64 %2, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit $scc
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: flat_atomic_rtn_saddr_to_valu
+tracksRegLiveness: true
+body: |
+ ; GCN-LABEL: name: flat_atomic_rtn_saddr_to_valu
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64_align2 = PHI [[COPY]], %bb.0, %7, %bb.1
+ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PHI]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
+ ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
+ ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
+ ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE]], 0, implicit $exec
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]], implicit $exec
+ ; GCN-NEXT: $vcc_lo = S_AND_B32 $exec_lo, [[V_CMP_NE_U64_e64_]], implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ %0:sreg_64 = COPY $vgpr0_vgpr1
+
+ bb.1:
+ %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1
+ %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %4:vgpr_32 = FLAT_ATOMIC_ADD_SADDR_RTN %3, %3, %1, 0, 0, implicit $exec, implicit $flat_scr
+ %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
+ S_CMP_LG_U64 %2, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit $scc
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: scratch_load_saddr_to_valu
+tracksRegLiveness: true
+body: |
+ ; GCN-LABEL: name: scratch_load_saddr_to_valu
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %6, %bb.1
+ ; GCN-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[PHI]], 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec
+ ; GCN-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 [[V_AND_B32_e64_]], 0, implicit $exec
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec
+ ; GCN-NEXT: $vcc_lo = S_AND_B32 $exec_lo, [[V_CMP_NE_U32_e64_]], implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0
+ %0:sgpr_32 = COPY $vgpr0
+
+ bb.1:
+ %1:sgpr_32 = PHI %0, %bb.0, %2, %bb.1
+ %4:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %1, 0, 0, implicit $exec, implicit $flat_scr
+ %2:sgpr_32 = S_AND_B32 %1, 1, implicit-def $scc
+ S_CMP_LG_U32 %2, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit $scc
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: scratch_store_saddr_to_valu
+tracksRegLiveness: true
+body: |
+ ; GCN-LABEL: name: scratch_store_saddr_to_valu
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %6, %bb.1
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: SCRATCH_STORE_DWORD [[DEF]], [[PHI]], 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec
+ ; GCN-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 [[V_AND_B32_e64_]], 0, implicit $exec
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec
+ ; GCN-NEXT: $vcc_lo = S_AND_B32 $exec_lo, [[V_CMP_NE_U32_e64_]], implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0
+ %0:sgpr_32 = COPY $vgpr0
+
+ bb.1:
+ %1:sgpr_32 = PHI %0, %bb.0, %2, %bb.1
+ %4:vgpr_32 = IMPLICIT_DEF
+ SCRATCH_STORE_DWORD_SADDR %4, %1, 0, 0, implicit $exec, implicit $flat_scr
+ %2:sgpr_32 = S_AND_B32 %1, 1, implicit-def $scc
+ S_CMP_LG_U32 %2, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit $scc
+
+ bb.2:
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir b/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir
index c2c5340..8145a1d7 100644
--- a/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir
+++ b/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir
@@ -167,3 +167,59 @@ body: |
%1:sreg_32 = COPY %0
S_BRANCH %bb.2
...
+
+---
+
+name: phi_moveimm_av_pseudo_input
+tracksRegLiveness: true
+body: |
+ ; GCN-LABEL: name: phi_moveimm_av_pseudo_input
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
+ ; GCN-NEXT: liveins: $sgpr0, $sgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: successors: %bb.3(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI %5, %bb.3, [[S_ADD_U32_]], %bb.1
+ ; GCN-NEXT: S_BRANCH %bb.3
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.3:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GCN-NEXT: S_BRANCH %bb.2
+ bb.0:
+ successors: %bb.1
+ liveins: $sgpr0, $sgpr1
+
+ %0:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+
+ %4:sreg_32 = COPY $sgpr0
+ %5:sreg_32 = COPY $sgpr1
+
+ bb.1:
+ successors: %bb.2
+ %2:sreg_32 = S_ADD_U32 %4, %5, implicit-def $scc
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.3
+ %3:sreg_32 = PHI %1, %bb.3, %2, %bb.1
+ S_BRANCH %bb.3
+
+ bb.3:
+ successors: %bb.2
+ %1:sreg_32 = COPY %0
+ S_BRANCH %bb.2
+...
diff --git a/llvm/test/CodeGen/DirectX/flatten-array.ll b/llvm/test/CodeGen/DirectX/flatten-array.ll
index 1376a1d..a2e1055 100644
--- a/llvm/test/CodeGen/DirectX/flatten-array.ll
+++ b/llvm/test/CodeGen/DirectX/flatten-array.ll
@@ -218,6 +218,28 @@ define void @two_index_gep_const() {
ret void
}
+define void @zero_index_global() {
+ ; CHECK-LABEL: define void @zero_index_global(
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [4 x float], ptr addrspace(3) @g.1dim, i32 0, i32 0
+ ; CHECK-NEXT: load float, ptr addrspace(3) [[GEP]], align 4
+ ; CHECK-NEXT: ret void
+ %1 = getelementptr inbounds nuw [2 x [2 x float]], ptr addrspace(3) @g, i32 0, i32 0, i32 0
+ %2 = load float, ptr addrspace(3) %1, align 4
+ ret void
+}
+
+; Note: A ConstantExpr GEP with all 0 indices is equivalent to the pointer
+; operand of the GEP. Therefore the visitLoadInst will not see the pointer operand
+; as a ConstantExpr GEP and will not create a GEP instruction to be visited.
+; The later dxil-legalize pass will insert a GEP in this instance.
+define void @zero_index_global_const() {
+ ; CHECK-LABEL: define void @zero_index_global_const(
+ ; CHECK-NEXT: load float, ptr addrspace(3) @g.1dim, align 4
+ ; CHECK-NEXT: ret void
+ %1 = load float, ptr addrspace(3) getelementptr inbounds nuw ([2 x [2 x float]], ptr addrspace(3) @g, i32 0, i32 0, i32 0), align 4
+ ret void
+}
+
define void @gep_4d_index_test() {
; CHECK-LABEL: gep_4d_index_test
; CHECK: [[a:%.*]] = alloca [16 x i32], align 4
diff --git a/llvm/test/CodeGen/DirectX/legalize-load-store-array-alloca.ll b/llvm/test/CodeGen/DirectX/legalize-load-store-array-alloca.ll
index b25b3de..c6789ac 100644
--- a/llvm/test/CodeGen/DirectX/legalize-load-store-array-alloca.ll
+++ b/llvm/test/CodeGen/DirectX/legalize-load-store-array-alloca.ll
@@ -21,3 +21,21 @@ define void @store() {
store i32 0, ptr %a, align 4
ret void
}
+
+@g = local_unnamed_addr addrspace(3) global [4 x i32] zeroinitializer, align 4
+define void @load_whole_global () {
+; CHECK-LABEL: define void @load_whole_global
+; CHECK-NEXT: load [4 x i32], ptr addrspace(3) @g, align 4
+; CHECK-NEXT: ret void
+ %l = load [4 x i32], ptr addrspace(3) @g, align 4
+ ret void
+}
+
+define void @load_global_index0 () {
+; CHECK-LABEL: define void @load_global_index0
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) @g, i32 0, i32 0
+; CHECK-NEXT: load i32, ptr addrspace(3) [[GEP]], align 4
+; CHECK-NEXT: ret void
+ %l = load i32, ptr addrspace(3) @g, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
index 27a8925..0c91c53 100644
--- a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
+++ b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
@@ -24,7 +24,8 @@
define <4 x i32> @load_array_vec_test() #0 {
; CHECK-LABEL: define <4 x i32> @load_array_vec_test(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, align 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[GEP]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1), align 4
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 2), align 4
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 3), align 4
@@ -52,7 +53,8 @@ define <4 x i32> @load_array_vec_test() #0 {
define <4 x i32> @load_vec_test() #0 {
; CHECK-LABEL: define <4 x i32> @load_vec_test(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) @vecData.scalarized, align 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[GEP]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 1), align 4
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 2), align 4
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 3), align 4
@@ -203,7 +205,8 @@ define <4 x i32> @load_static_array_of_vec_from_i8_gep_test(i32 %index) #0 {
define <4 x i32> @multid_load_test() #0 {
; CHECK-LABEL: define <4 x i32> @multid_load_test(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, align 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[GEP]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), align 4
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 2), align 4
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 3), align 4
diff --git a/llvm/test/CodeGen/DirectX/scalar-store.ll b/llvm/test/CodeGen/DirectX/scalar-store.ll
index a124c66..4394235 100644
--- a/llvm/test/CodeGen/DirectX/scalar-store.ll
+++ b/llvm/test/CodeGen/DirectX/scalar-store.ll
@@ -14,7 +14,8 @@
; CHECK-LABEL: store_array_vec_test
define void @store_array_vec_test () local_unnamed_addr #0 {
-; CHECK-NEXT: store float 1.000000e+00, ptr addrspace(3) @arrayofVecData.scalarized.1dim, align 16
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 0
+; CHECK-NEXT: store float 1.000000e+00, ptr addrspace(3) [[GEP]], align 16
; CHECK-NEXT: store float 2.000000e+00, ptr addrspace(3) getelementptr ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1), align 4
; CHECK-NEXT: store float 3.000000e+00, ptr addrspace(3) getelementptr ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 2), align 8
; CHECK-NEXT: store float 2.000000e+00, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 3), align 16
@@ -30,7 +31,8 @@ define void @store_array_vec_test () local_unnamed_addr #0 {
; CHECK-LABEL: store_vec_test
define void @store_vec_test(<4 x i32> %inputVec) #0 {
; CHECK-NEXT: [[INPUTVEC_I01:%.*]] = extractelement <4 x i32> %inputVec, i32 0
-; CHECK-NEXT: store i32 [[INPUTVEC_I01]], ptr addrspace(3) @vecData.scalarized, align 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 0
+; CHECK-NEXT: store i32 [[INPUTVEC_I01]], ptr addrspace(3) [[GEP]], align 4
; CHECK-NEXT: [[INPUTVEC_I12:%.*]] = extractelement <4 x i32> %inputVec, i32 1
; CHECK-NEXT: store i32 [[INPUTVEC_I12]], ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 1), align 4
; CHECK-NEXT: [[INPUTVEC_I23:%.*]] = extractelement <4 x i32> %inputVec, i32 2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 019bbe2..dbc8e89 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -1721,8 +1721,9 @@ define void @load_factor4_one_active_storeback_full(ptr %ptr) {
define <4 x i32> @vp_load_factor3_one_active(ptr %ptr) {
; CHECK-LABEL: vp_load_factor3_one_active:
; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 12
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vlseg3e32.v v8, (a0)
+; CHECK-NEXT: vlse32.v v8, (a0), a1
; CHECK-NEXT: ret
%interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> splat (i1 true), i32 12)
%v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
@@ -1732,8 +1733,9 @@ define <4 x i32> @vp_load_factor3_one_active(ptr %ptr) {
define <4 x i32> @vp_load_factor5_one_active(ptr %ptr) {
; CHECK-LABEL: vp_load_factor5_one_active:
; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 20
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vlseg5e32.v v8, (a0)
+; CHECK-NEXT: vlse32.v v8, (a0), a1
; CHECK-NEXT: ret
%interleaved.vec = tail call <20 x i32> @llvm.vp.load.v20i32.p0(ptr %ptr, <20 x i1> splat (i1 true), i32 20)
%v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 0, i32 5, i32 10, i32 15>
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/builtin_calls.ll b/llvm/test/CodeGen/SPIRV/transcoding/builtin_calls.ll
index 0a02a8b..b179732 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/builtin_calls.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/builtin_calls.ll
@@ -1,17 +1,109 @@
; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
-; CHECK-SPIRV-DAG: OpDecorate %[[#Id:]] BuiltIn GlobalInvocationId
-; CHECK-SPIRV-DAG: OpDecorate %[[#Id:]] BuiltIn GlobalLinearId
-; CHECK-SPIRV: %[[#Id:]] = OpVariable %[[#]]
-; CHECK-SPIRV: %[[#Id:]] = OpVariable %[[#]]
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id0:]] BuiltIn GlobalLinearId
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id1:]] BuiltIn GlobalInvocationId
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id2:]] BuiltIn LocalInvocationIndex
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id3:]] BuiltIn WorkDim
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id4:]] BuiltIn SubgroupSize
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id5:]] BuiltIn SubgroupMaxSize
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id6:]] BuiltIn NumSubgroups
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id7:]] BuiltIn NumEnqueuedSubgroups
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id8:]] BuiltIn SubgroupId
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id9:]] BuiltIn SubgroupLocalInvocationId
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id10:]] BuiltIn SubgroupEqMask
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id11:]] BuiltIn SubgroupGeMask
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id12:]] BuiltIn SubgroupGtMask
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id13:]] BuiltIn SubgroupLeMask
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id14:]] BuiltIn SubgroupLtMask
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id15:]] BuiltIn LocalInvocationId
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id16:]] BuiltIn WorkgroupSize
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id17:]] BuiltIn GlobalSize
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id18:]] BuiltIn WorkgroupId
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id19:]] BuiltIn EnqueuedWorkgroupSize
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id20:]] BuiltIn NumWorkgroups
+; CHECK-SPIRV-DAG: OpDecorate %[[#Id21:]] BuiltIn GlobalOffset
+
+; CHECK-SPIRV: %[[#Id0:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id1:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id2:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id3:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id4:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id5:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id6:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id7:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id8:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id9:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id10:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id11:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id12:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id13:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id14:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id15:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id16:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id17:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id18:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id19:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id20:]] = OpVariable %[[#]] Input
+; CHECK-SPIRV: %[[#Id21:]] = OpVariable %[[#]] Input
define spir_kernel void @f() {
entry:
%0 = call spir_func i32 @_Z29__spirv_BuiltInGlobalLinearIdv()
%1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 1)
+ %2 = call spir_func i64 @_Z35__spirv_BuiltInLocalInvocationIndexv()
+ %3 = call spir_func i32 @_Z22__spirv_BuiltInWorkDimv()
+ %4 = call spir_func i32 @_Z27__spirv_BuiltInSubgroupSizev()
+ %5 = call spir_func i32 @_Z30__spirv_BuiltInSubgroupMaxSizev()
+ %6 = call spir_func i32 @_Z27__spirv_BuiltInNumSubgroupsv()
+ %7 = call spir_func i32 @_Z35__spirv_BuiltInNumEnqueuedSubgroupsv()
+ %8 = call spir_func i32 @_Z25__spirv_BuiltInSubgroupIdv()
+ %9 = call spir_func i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv()
+ %10 = call spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupEqMaskv()
+ %11 = call spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupEqMaskKHRv()
+ %12 = call spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupGeMaskv()
+ %13 = call spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupGeMaskKHRv()
+ %14 = call spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupGtMaskv()
+ %15 = call spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupGtMaskKHRv()
+ %16 = call spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupLeMaskv()
+ %17 = call spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupLeMaskKHRv()
+ %18 = call spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupLtMaskv()
+ %19 = call spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupLtMaskKHRv()
+ %20 = call spir_func i64 @_Z32__spirv_BuiltInLocalInvocationIdi(i32 0)
+ %21 = call spir_func i64 @_Z28__spirv_BuiltInWorkgroupSizei(i32 0)
+ %22 = call spir_func i64 @_Z25__spirv_BuiltInGlobalSizei(i32 0)
+ %23 = call spir_func i64 @_Z26__spirv_BuiltInWorkgroupIdi(i32 0)
+ %24 = call spir_func i64 @_Z36__spirv_BuiltInEnqueuedWorkgroupSizei(i32 0)
+ %25 = call spir_func i64 @_Z28__spirv_BuiltInNumWorkgroupsi(i32 0)
+ %26 = call spir_func i64 @_Z27__spirv_BuiltInGlobalOffseti(i32 0)
+
ret void
}
declare spir_func i32 @_Z29__spirv_BuiltInGlobalLinearIdv()
declare spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32)
+declare spir_func i64 @_Z35__spirv_BuiltInLocalInvocationIndexv()
+declare spir_func i32 @_Z22__spirv_BuiltInWorkDimv()
+declare spir_func i32 @_Z27__spirv_BuiltInSubgroupSizev()
+declare spir_func i32 @_Z30__spirv_BuiltInSubgroupMaxSizev()
+declare spir_func i32 @_Z27__spirv_BuiltInNumSubgroupsv()
+declare spir_func i32 @_Z35__spirv_BuiltInNumEnqueuedSubgroupsv()
+declare spir_func i32 @_Z25__spirv_BuiltInSubgroupIdv()
+declare spir_func i32 @_Z40__spirv_BuiltInSubgroupLocalInvocationIdv()
+declare spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupEqMaskv()
+declare spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupEqMaskKHRv()
+declare spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupGeMaskv()
+declare spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupGeMaskKHRv()
+declare spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupGtMaskv()
+declare spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupGtMaskKHRv()
+declare spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupLeMaskv()
+declare spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupLeMaskKHRv()
+declare spir_func <4 x i32> @_Z29__spirv_BuiltInSubgroupLtMaskv()
+declare spir_func <4 x i32> @_Z32__spirv_BuiltInSubgroupLtMaskKHRv()
+declare spir_func i64 @_Z32__spirv_BuiltInLocalInvocationIdi(i32)
+declare spir_func i64 @_Z28__spirv_BuiltInWorkgroupSizei(i32)
+declare spir_func i64 @_Z25__spirv_BuiltInGlobalSizei(i32)
+declare spir_func i64 @_Z26__spirv_BuiltInWorkgroupIdi(i32)
+declare spir_func i64 @_Z36__spirv_BuiltInEnqueuedWorkgroupSizei(i32)
+declare spir_func i64 @_Z28__spirv_BuiltInNumWorkgroupsi(i32)
+declare spir_func i64 @_Z27__spirv_BuiltInGlobalOffseti(i32)
diff --git a/llvm/test/CodeGen/X86/invalid-operand-bundle-call.ll b/llvm/test/CodeGen/X86/invalid-operand-bundle-call.ll
index ac4963f..17065a4 100644
--- a/llvm/test/CodeGen/X86/invalid-operand-bundle-call.ll
+++ b/llvm/test/CodeGen/X86/invalid-operand-bundle-call.ll
@@ -1,10 +1,10 @@
; RUN: not llc -mtriple=x86_64-unknown-linux-gnu < %s 2>&1 | FileCheck %s
-; CHECK: LLVM ERROR: cannot lower calls with arbitrary operand bundles: foo
+; CHECK: LLVM ERROR: cannot lower calls with arbitrary operand bundles: foo, bar, baz
declare void @g()
define void @f(i32 %arg) {
- call void @g() [ "foo"(i32 %arg) ]
+ call void @g() [ "foo"(i32 %arg), "bar"(i32 %arg), "baz"(i32 %arg) ]
ret void
}
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
index f51d709..f9e217d 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
@@ -28,6 +28,51 @@ v_mov_b64 v[4:5], 0.5
v_mov_b64 v[254:255], 0xaf123456
// GFX1250: v_mov_b64_e32 v[254:255], lit64(0xaf123456) ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+v_tanh_f32 v5, v1
+// GFX1250: v_tanh_f32_e32 v5, v1 ; encoding: [0x01,0x3d,0x0a,0x7e]
+
+v_tanh_f32 v5, v255
+// GFX1250: v_tanh_f32_e32 v5, v255 ; encoding: [0xff,0x3d,0x0a,0x7e]
+
+v_tanh_f32 v5, s1
+// GFX1250: v_tanh_f32_e32 v5, s1 ; encoding: [0x01,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, s105
+// GFX1250: v_tanh_f32_e32 v5, s105 ; encoding: [0x69,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, vcc_lo
+// GFX1250: v_tanh_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, vcc_hi
+// GFX1250: v_tanh_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, ttmp15
+// GFX1250: v_tanh_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, m0
+// GFX1250: v_tanh_f32_e32 v5, m0 ; encoding: [0x7d,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, exec_lo
+// GFX1250: v_tanh_f32_e32 v5, exec_lo ; encoding: [0x7e,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, exec_hi
+// GFX1250: v_tanh_f32_e32 v5, exec_hi ; encoding: [0x7f,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, null
+// GFX1250: v_tanh_f32_e32 v5, null ; encoding: [0x7c,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, -1
+// GFX1250: v_tanh_f32_e32 v5, -1 ; encoding: [0xc1,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, 0.5
+// GFX1250: v_tanh_f32_e32 v5, 0.5 ; encoding: [0xf0,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, src_scc
+// GFX1250: v_tanh_f32_e32 v5, src_scc ; encoding: [0xfd,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v255, 0xaf123456
+// GFX1250: v_tanh_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x3c,0xfe,0x7f,0x56,0x34,0x12,0xaf]
+
v_tanh_bf16 v5, v1
// GFX1250: v_tanh_bf16_e32 v5, v1 ; encoding: [0x01,0x95,0x0a,0x7e]
@@ -343,6 +388,51 @@ v_sin_bf16 v5, src_scc
v_sin_bf16 v127, 0x8000
// GFX1250: v_sin_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xfc,0xfe,0x7e,0x00,0x80,0x00,0x00]
+v_cos_bf16 v5, v1
+// GFX1250: v_cos_bf16_e32 v5, v1 ; encoding: [0x01,0xff,0x0a,0x7e]
+
+v_cos_bf16 v5, v127
+// GFX1250: v_cos_bf16_e32 v5, v127 ; encoding: [0x7f,0xff,0x0a,0x7e]
+
+v_cos_bf16 v5, s1
+// GFX1250: v_cos_bf16_e32 v5, s1 ; encoding: [0x01,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, s105
+// GFX1250: v_cos_bf16_e32 v5, s105 ; encoding: [0x69,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, vcc_lo
+// GFX1250: v_cos_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, vcc_hi
+// GFX1250: v_cos_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, ttmp15
+// GFX1250: v_cos_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, m0
+// GFX1250: v_cos_bf16_e32 v5, m0 ; encoding: [0x7d,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, exec_lo
+// GFX1250: v_cos_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, exec_hi
+// GFX1250: v_cos_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, null
+// GFX1250: v_cos_bf16_e32 v5, null ; encoding: [0x7c,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, -1
+// GFX1250: v_cos_bf16_e32 v5, -1 ; encoding: [0xc1,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, 0.5
+// GFX1250: v_cos_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, src_scc
+// GFX1250: v_cos_bf16_e32 v5, src_scc ; encoding: [0xfd,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v127, 0x8000
+// GFX1250: v_cos_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xfe,0xfe,0x7e,0x00,0x80,0x00,0x00]
+
v_cvt_f32_bf16 v5, v1
// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
index 39fc73d..d51ef68b 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -28,6 +28,51 @@ v_mov_b64 v[4:5], 0.5
v_mov_b64 v[254:255], 0xaf123456
// GFX1250: v_mov_b64_e32 v[254:255], lit64(0xaf123456) ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+v_tanh_f32 v5, v1
+// GFX1250: v_tanh_f32_e32 v5, v1 ; encoding: [0x01,0x3d,0x0a,0x7e]
+
+v_tanh_f32 v5, v255
+// GFX1250: v_tanh_f32_e32 v5, v255 ; encoding: [0xff,0x3d,0x0a,0x7e]
+
+v_tanh_f32 v5, s1
+// GFX1250: v_tanh_f32_e32 v5, s1 ; encoding: [0x01,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, s105
+// GFX1250: v_tanh_f32_e32 v5, s105 ; encoding: [0x69,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, vcc_lo
+// GFX1250: v_tanh_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, vcc_hi
+// GFX1250: v_tanh_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, ttmp15
+// GFX1250: v_tanh_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, m0
+// GFX1250: v_tanh_f32_e32 v5, m0 ; encoding: [0x7d,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, exec_lo
+// GFX1250: v_tanh_f32_e32 v5, exec_lo ; encoding: [0x7e,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, exec_hi
+// GFX1250: v_tanh_f32_e32 v5, exec_hi ; encoding: [0x7f,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, null
+// GFX1250: v_tanh_f32_e32 v5, null ; encoding: [0x7c,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, -1
+// GFX1250: v_tanh_f32_e32 v5, -1 ; encoding: [0xc1,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, 0.5
+// GFX1250: v_tanh_f32_e32 v5, 0.5 ; encoding: [0xf0,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v5, src_scc
+// GFX1250: v_tanh_f32_e32 v5, src_scc ; encoding: [0xfd,0x3c,0x0a,0x7e]
+
+v_tanh_f32 v255, 0xaf123456
+// GFX1250: v_tanh_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x3c,0xfe,0x7f,0x56,0x34,0x12,0xaf]
+
v_tanh_bf16 v5, v1
// GFX1250: v_tanh_bf16_e32 v5, v1 ; encoding: [0x01,0x95,0x0a,0x7e]
@@ -364,6 +409,54 @@ v_sin_bf16 v127, 0x8000
v_sin_bf16 v5.h, v1.h
// GFX1250: v_sin_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xfd,0x0a,0x7f]
+v_cos_bf16 v5, v1
+// GFX1250: v_cos_bf16_e32 v5, v1 ; encoding: [0x01,0xff,0x0a,0x7e]
+
+v_cos_bf16 v5, v127
+// GFX1250: v_cos_bf16_e32 v5, v127 ; encoding: [0x7f,0xff,0x0a,0x7e]
+
+v_cos_bf16 v5, s1
+// GFX1250: v_cos_bf16_e32 v5, s1 ; encoding: [0x01,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, s105
+// GFX1250: v_cos_bf16_e32 v5, s105 ; encoding: [0x69,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, vcc_lo
+// GFX1250: v_cos_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, vcc_hi
+// GFX1250: v_cos_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, ttmp15
+// GFX1250: v_cos_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, m0
+// GFX1250: v_cos_bf16_e32 v5, m0 ; encoding: [0x7d,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, exec_lo
+// GFX1250: v_cos_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, exec_hi
+// GFX1250: v_cos_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, null
+// GFX1250: v_cos_bf16_e32 v5, null ; encoding: [0x7c,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, -1
+// GFX1250: v_cos_bf16_e32 v5, -1 ; encoding: [0xc1,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, 0.5
+// GFX1250: v_cos_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, src_scc
+// GFX1250: v_cos_bf16_e32 v5, src_scc ; encoding: [0xfd,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v127, 0x8000
+// GFX1250: v_cos_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xfe,0xfe,0x7e,0x00,0x80,0x00,0x00]
+
+v_cos_bf16 v5.h, v1.h
+// GFX1250: v_cos_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xff,0x0a,0x7f]
+
v_cvt_f32_bf16 v5, v1
// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
index 97058eb..ae22f68 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
@@ -2,6 +2,62 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+v_tanh_f32 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_tanh_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_tanh_f32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_mirror
+// GFX1250: v_tanh_f32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_half_mirror
+// GFX1250: v_tanh_f32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_shl:1
+// GFX1250: v_tanh_f32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_shl:15
+// GFX1250: v_tanh_f32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_shr:1
+// GFX1250: v_tanh_f32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_shr:15
+// GFX1250: v_tanh_f32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_ror:1
+// GFX1250: v_tanh_f32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_ror:15
+// GFX1250: v_tanh_f32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_tanh_f32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_tanh_f32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_tanh_f32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_tanh_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3c,0xfe,0x7f,0xff,0x6f,0x35,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_tanh_bf16 v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_tanh_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x94,0x0a,0x7e,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
@@ -394,6 +450,62 @@ v_sin_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi
// GFX1250: v_sin_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfc,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_cos_bf16 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_mirror
+// GFX1250: v_cos_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_half_mirror
+// GFX1250: v_cos_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_shl:1
+// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_shl:15
+// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_shr:1
+// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_shr:15
+// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_ror:1
+// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_ror:15
+// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_cos_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_cos_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_cos_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_cos_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
index 6a293c1..37ecb66 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
@@ -2,6 +2,62 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+v_tanh_f32 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_tanh_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_tanh_f32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_mirror
+// GFX1250: v_tanh_f32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_half_mirror
+// GFX1250: v_tanh_f32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_shl:1
+// GFX1250: v_tanh_f32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_shl:15
+// GFX1250: v_tanh_f32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_shr:1
+// GFX1250: v_tanh_f32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_shr:15
+// GFX1250: v_tanh_f32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_ror:1
+// GFX1250: v_tanh_f32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_ror:15
+// GFX1250: v_tanh_f32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_tanh_f32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_tanh_f32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_tanh_f32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_tanh_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3c,0xfe,0x7f,0xff,0x6f,0x35,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_tanh_bf16 v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_tanh_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x94,0x0a,0x7e,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
@@ -422,6 +478,66 @@ v_sin_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
// GFX1250: v_sin_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7f,0x81,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_cos_bf16 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_mirror
+// GFX1250: v_cos_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_half_mirror
+// GFX1250: v_cos_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_shl:1
+// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_shl:15
+// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_shr:1
+// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_shr:15
+// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_ror:1
+// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_ror:15
+// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_cos_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_cos_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_cos_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_cos_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
+// GFX1250: v_cos_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7f,0x81,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
index d1f53c7..f24122e 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
@@ -2,6 +2,18 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+v_tanh_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_tanh_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x3c,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_tanh_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x3c,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_tanh_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x3c,0xfe,0x7f,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_tanh_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_tanh_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x94,0x0a,0x7e,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
@@ -86,6 +98,18 @@ v_sin_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX1250: v_sin_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfc,0xfe,0x7e,0x7f,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_cos_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_cos_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
index dbee9f3..34abc82 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
@@ -2,6 +2,18 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+v_tanh_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_tanh_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x3c,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_tanh_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x3c,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_tanh_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x3c,0xfe,0x7f,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_tanh_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_tanh_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x94,0x0a,0x7e,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
@@ -114,6 +126,22 @@ v_sin_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_sin_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfc,0x0a,0x7f,0x81,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_cos_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_cos_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_cos_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7f,0x81,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
index 4257334..340a785 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
@@ -127,6 +127,51 @@ v_cvt_f32_fp8 v1, v3 byte_sel:1 clamp
v_cvt_f32_fp8 v1, v3 byte_sel:2 clamp
// GFX1250: v_cvt_f32_fp8_e64 v1, v3 byte_sel:2 clamp ; encoding: [0x01,0x88,0xec,0xd5,0x03,0x01,0x00,0x00]
+v_tanh_f32_e64 v5, v1
+// GFX1250: v_tanh_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x9e,0xd5,0x01,0x01,0x00,0x00]
+
+v_tanh_f32_e64 v5, v255
+// GFX1250: v_tanh_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x9e,0xd5,0xff,0x01,0x00,0x00]
+
+v_tanh_f32_e64 v5, s1
+// GFX1250: v_tanh_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x9e,0xd5,0x01,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, s105
+// GFX1250: v_tanh_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x9e,0xd5,0x69,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, vcc_lo
+// GFX1250: v_tanh_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x9e,0xd5,0x6a,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, vcc_hi
+// GFX1250: v_tanh_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x9e,0xd5,0x6b,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, ttmp15
+// GFX1250: v_tanh_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x9e,0xd5,0x7b,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, m0
+// GFX1250: v_tanh_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x9e,0xd5,0x7d,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, exec_lo
+// GFX1250: v_tanh_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x9e,0xd5,0x7e,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, exec_hi
+// GFX1250: v_tanh_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x9e,0xd5,0x7f,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, null
+// GFX1250: v_tanh_f32_e64 v5, null ; encoding: [0x05,0x00,0x9e,0xd5,0x7c,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, -1
+// GFX1250: v_tanh_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x9e,0xd5,0xc1,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, 0.5 mul:2
+// GFX1250: v_tanh_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x9e,0xd5,0xf0,0x00,0x00,0x08]
+
+v_tanh_f32_e64 v5, src_scc mul:4
+// GFX1250: v_tanh_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x9e,0xd5,0xfd,0x00,0x00,0x10]
+
+v_tanh_f32_e64 v255, -|0xaf123456| clamp div:2
+// GFX1250: v_tanh_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x9e,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
+
v_rcp_bf16_e64 v5, v1
// GFX1250: v_rcp_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf9,0xd5,0x01,0x01,0x00,0x00]
@@ -397,6 +442,51 @@ v_sin_bf16_e64 v5, src_scc mul:4
v_sin_bf16_e64 v255, -|0x8000| clamp div:2
// GFX1250: v_sin_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfe,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+v_cos_bf16_e64 v5, v1
+// GFX1250: v_cos_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x01,0x00,0x00]
+
+v_cos_bf16_e64 v5, v255
+// GFX1250: v_cos_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xff,0xd5,0xff,0x01,0x00,0x00]
+
+v_cos_bf16_e64 v5, s1
+// GFX1250: v_cos_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, s105
+// GFX1250: v_cos_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xff,0xd5,0x69,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, vcc_lo
+// GFX1250: v_cos_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xff,0xd5,0x6a,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, vcc_hi
+// GFX1250: v_cos_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xff,0xd5,0x6b,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, ttmp15
+// GFX1250: v_cos_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xff,0xd5,0x7b,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, m0
+// GFX1250: v_cos_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xff,0xd5,0x7d,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, exec_lo
+// GFX1250: v_cos_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xff,0xd5,0x7e,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, exec_hi
+// GFX1250: v_cos_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xff,0xd5,0x7f,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, null
+// GFX1250: v_cos_bf16_e64 v5, null ; encoding: [0x05,0x00,0xff,0xd5,0x7c,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, -1
+// GFX1250: v_cos_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xff,0xd5,0xc1,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, 0.5 mul:2
+// GFX1250: v_cos_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xff,0xd5,0xf0,0x00,0x00,0x08]
+
+v_cos_bf16_e64 v5, src_scc mul:4
+// GFX1250: v_cos_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xff,0xd5,0xfd,0x00,0x00,0x10]
+
+v_cos_bf16_e64 v255, -|0x8000| clamp div:2
+// GFX1250: v_cos_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xff,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+
v_cvt_f32_bf16_e64 v5, v1
// GFX1250: v_cvt_f32_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s
index 83986a6..579a467 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s
@@ -130,6 +130,51 @@ v_cvt_f32_fp8 v1, v3 byte_sel:1 clamp
v_cvt_f32_fp8 v1, v3 byte_sel:2 clamp
// GFX1250: v_cvt_f32_fp8_e64 v1, v3 byte_sel:2 clamp ; encoding: [0x01,0x88,0xec,0xd5,0x03,0x01,0x00,0x00]
+v_tanh_f32_e64 v5, v1
+// GFX1250: v_tanh_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x9e,0xd5,0x01,0x01,0x00,0x00]
+
+v_tanh_f32_e64 v5, v255
+// GFX1250: v_tanh_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x9e,0xd5,0xff,0x01,0x00,0x00]
+
+v_tanh_f32_e64 v5, s1
+// GFX1250: v_tanh_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x9e,0xd5,0x01,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, s105
+// GFX1250: v_tanh_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x9e,0xd5,0x69,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, vcc_lo
+// GFX1250: v_tanh_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x9e,0xd5,0x6a,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, vcc_hi
+// GFX1250: v_tanh_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x9e,0xd5,0x6b,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, ttmp15
+// GFX1250: v_tanh_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x9e,0xd5,0x7b,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, m0
+// GFX1250: v_tanh_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x9e,0xd5,0x7d,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, exec_lo
+// GFX1250: v_tanh_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x9e,0xd5,0x7e,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, exec_hi
+// GFX1250: v_tanh_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x9e,0xd5,0x7f,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, null
+// GFX1250: v_tanh_f32_e64 v5, null ; encoding: [0x05,0x00,0x9e,0xd5,0x7c,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, -1
+// GFX1250: v_tanh_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x9e,0xd5,0xc1,0x00,0x00,0x00]
+
+v_tanh_f32_e64 v5, 0.5 mul:2
+// GFX1250: v_tanh_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x9e,0xd5,0xf0,0x00,0x00,0x08]
+
+v_tanh_f32_e64 v5, src_scc mul:4
+// GFX1250: v_tanh_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x9e,0xd5,0xfd,0x00,0x00,0x10]
+
+v_tanh_f32_e64 v255, -|0xaf123456| clamp div:2
+// GFX1250: v_tanh_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x9e,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
+
v_rcp_bf16_e64 v5, v1
// GFX1250: v_rcp_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf9,0xd5,0x01,0x01,0x00,0x00]
@@ -418,6 +463,54 @@ v_sin_bf16_e64 v255, -|0x8000| clamp div:2
v_sin_bf16 v5.h, v128.h
// GFX1250: v_sin_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xfe,0xd5,0x80,0x01,0x00,0x00]
+v_cos_bf16_e64 v5, v1
+// GFX1250: v_cos_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x01,0x00,0x00]
+
+v_cos_bf16_e64 v5, v255
+// GFX1250: v_cos_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xff,0xd5,0xff,0x01,0x00,0x00]
+
+v_cos_bf16_e64 v5, s1
+// GFX1250: v_cos_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, s105
+// GFX1250: v_cos_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xff,0xd5,0x69,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, vcc_lo
+// GFX1250: v_cos_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xff,0xd5,0x6a,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, vcc_hi
+// GFX1250: v_cos_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xff,0xd5,0x6b,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, ttmp15
+// GFX1250: v_cos_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xff,0xd5,0x7b,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, m0
+// GFX1250: v_cos_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xff,0xd5,0x7d,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, exec_lo
+// GFX1250: v_cos_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xff,0xd5,0x7e,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, exec_hi
+// GFX1250: v_cos_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xff,0xd5,0x7f,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, null
+// GFX1250: v_cos_bf16_e64 v5, null ; encoding: [0x05,0x00,0xff,0xd5,0x7c,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, -1
+// GFX1250: v_cos_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xff,0xd5,0xc1,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, 0.5 mul:2
+// GFX1250: v_cos_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xff,0xd5,0xf0,0x00,0x00,0x08]
+
+v_cos_bf16_e64 v5, src_scc mul:4
+// GFX1250: v_cos_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xff,0xd5,0xfd,0x00,0x00,0x10]
+
+v_cos_bf16_e64 v255, -|0x8000| clamp div:2
+// GFX1250: v_cos_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xff,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+
+v_cos_bf16_e64 v5.h, v128.h
+// GFX1250: v_cos_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xff,0xd5,0x80,0x01,0x00,0x00]
+
v_cvt_f32_bf16_e64 v5, v1
// GFX1250: v_cvt_f32_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s
index bb6739e..423340c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s
@@ -2,6 +2,62 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+v_tanh_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 row_mirror
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 row_half_mirror
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 row_shl:1
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 row_shl:15
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 row_shr:1
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 row_shr:15
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 row_ror:1
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 row_ror:15
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x9e,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_tanh_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_tanh_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xca,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
@@ -394,6 +450,62 @@ v_sin_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask
// GFX1250: v_sin_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfe,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_cos_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_mirror
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_half_mirror
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_shl:1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_shl:15
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_shr:1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_shr:15
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_ror:1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_ror:15
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xff,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s
index 5f6f28e..7968b39 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s
@@ -2,6 +2,62 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+v_tanh_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 row_mirror
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 row_half_mirror
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 row_shl:1
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 row_shl:15
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 row_shr:1
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 row_shr:15
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 row_ror:1
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 row_ror:15
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x9e,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_tanh_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_tanh_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xca,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
@@ -422,6 +478,66 @@ v_sin_bf16_e64_dpp v5.h, v128.h quad_perm:[3,2,1,0]
// GFX1250: v_sin_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xfe,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_cos_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_mirror
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_half_mirror
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_shl:1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_shl:15
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_shr:1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_shr:15
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_ror:1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_ror:15
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xff,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5.h, v128.h quad_perm:[3,2,1,0]
+// GFX1250: v_cos_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xff,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s
index 037e7d6..dd469c2 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s
@@ -2,6 +2,22 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+v_tanh_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x9e,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x9e,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_tanh_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_tanh_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xca,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
@@ -114,6 +130,22 @@ v_sin_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX1250: v_sin_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfe,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_cos_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xff,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xff,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s
index 53fb0eb..9fce779 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s
@@ -2,6 +2,22 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+v_tanh_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x9e,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x9e,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_tanh_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_tanh_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xca,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
@@ -142,6 +158,26 @@ v_sin_bf16_e64_dpp v5.h, v128.h dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_sin_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xfe,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_cos_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xff,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xff,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5.h, v128.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_cos_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xff,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
index fec2207..0a6fc39 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
@@ -29,6 +29,51 @@
0x6a,0x3a,0x08,0x7e
# GFX1250: v_mov_b64_e32 v[4:5], vcc ; encoding: [0x6a,0x3a,0x08,0x7e]
+0xff,0x3c,0xfe,0x7f,0x56,0x34,0x12,0xaf
+# GFX1250: v_tanh_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x3c,0xfe,0x7f,0x56,0x34,0x12,0xaf]
+
+0xc1,0x3c,0x0a,0x7e
+# GFX1250: v_tanh_f32_e32 v5, -1 ; encoding: [0xc1,0x3c,0x0a,0x7e]
+
+0xf0,0x3c,0x0a,0x7e
+# GFX1250: v_tanh_f32_e32 v5, 0.5 ; encoding: [0xf0,0x3c,0x0a,0x7e]
+
+0x7f,0x3c,0x0a,0x7e
+# GFX1250: v_tanh_f32_e32 v5, exec_hi ; encoding: [0x7f,0x3c,0x0a,0x7e]
+
+0x7e,0x3c,0x0a,0x7e
+# GFX1250: v_tanh_f32_e32 v5, exec_lo ; encoding: [0x7e,0x3c,0x0a,0x7e]
+
+0x7d,0x3c,0x0a,0x7e
+# GFX1250: v_tanh_f32_e32 v5, m0 ; encoding: [0x7d,0x3c,0x0a,0x7e]
+
+0x7c,0x3c,0x0a,0x7e
+# GFX1250: v_tanh_f32_e32 v5, null ; encoding: [0x7c,0x3c,0x0a,0x7e]
+
+0x01,0x3c,0x0a,0x7e
+# GFX1250: v_tanh_f32_e32 v5, s1 ; encoding: [0x01,0x3c,0x0a,0x7e]
+
+0x69,0x3c,0x0a,0x7e
+# GFX1250: v_tanh_f32_e32 v5, s105 ; encoding: [0x69,0x3c,0x0a,0x7e]
+
+0xfd,0x3c,0x0a,0x7e
+# GFX1250: v_tanh_f32_e32 v5, src_scc ; encoding: [0xfd,0x3c,0x0a,0x7e]
+
+0x7b,0x3c,0x0a,0x7e
+# GFX1250: v_tanh_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x3c,0x0a,0x7e]
+
+0x01,0x3d,0x0a,0x7e
+# GFX1250: v_tanh_f32_e32 v5, v1 ; encoding: [0x01,0x3d,0x0a,0x7e]
+
+0xff,0x3d,0x0a,0x7e
+# GFX1250: v_tanh_f32_e32 v5, v255 ; encoding: [0xff,0x3d,0x0a,0x7e]
+
+0x6b,0x3c,0x0a,0x7e
+# GFX1250: v_tanh_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x3c,0x0a,0x7e]
+
+0x6a,0x3c,0x0a,0x7e
+# GFX1250: v_tanh_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x3c,0x0a,0x7e]
+
0xff,0x94,0xfe,0x7e,0x00,0x80,0x00,0x00
# GFX1250-REAL16: v_tanh_bf16_e32 v127.l, 0x8000 ; encoding: [0xff,0x94,0xfe,0x7e,0x00,0x80,0x00,0x00]
# GFX1250-FAKE16: v_tanh_bf16_e32 v127, 0x8000 ; encoding: [0xff,0x94,0xfe,0x7e,0x00,0x80,0x00,0x00]
@@ -470,6 +515,69 @@
0x81,0xfd,0x0a,0x7f
# GFX1250-REAL16: v_sin_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xfd,0x0a,0x7f]
+0xff,0xfe,0xfe,0x7e,0x00,0x80,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e32 v127.l, 0x8000 ; encoding: [0xff,0xfe,0xfe,0x7e,0x00,0x80,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xfe,0xfe,0x7e,0x00,0x80,0x00,0x00]
+
+0xc1,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, -1 ; encoding: [0xc1,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, -1 ; encoding: [0xc1,0xfe,0x0a,0x7e]
+
+0xf0,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, 0.5 ; encoding: [0xf0,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xfe,0x0a,0x7e]
+
+0x7f,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, exec_hi ; encoding: [0x7f,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xfe,0x0a,0x7e]
+
+0x7e,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, exec_lo ; encoding: [0x7e,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xfe,0x0a,0x7e]
+
+0x7d,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, m0 ; encoding: [0x7d,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, m0 ; encoding: [0x7d,0xfe,0x0a,0x7e]
+
+0x7c,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, null ; encoding: [0x7c,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, null ; encoding: [0x7c,0xfe,0x0a,0x7e]
+
+0x01,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, s1 ; encoding: [0x01,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, s1 ; encoding: [0x01,0xfe,0x0a,0x7e]
+
+0x69,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, s105 ; encoding: [0x69,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, s105 ; encoding: [0x69,0xfe,0x0a,0x7e]
+
+0xfd,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, src_scc ; encoding: [0xfd,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, src_scc ; encoding: [0xfd,0xfe,0x0a,0x7e]
+
+0x7b,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, ttmp15 ; encoding: [0x7b,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xfe,0x0a,0x7e]
+
+0x01,0xff,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, v1.l ; encoding: [0x01,0xff,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, v1 ; encoding: [0x01,0xff,0x0a,0x7e]
+
+0x7f,0xff,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, v127.l ; encoding: [0x7f,0xff,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, v127 ; encoding: [0x7f,0xff,0x0a,0x7e]
+
+0x6b,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, vcc_hi ; encoding: [0x6b,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xfe,0x0a,0x7e]
+
+0x6a,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, vcc_lo ; encoding: [0x6a,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xfe,0x0a,0x7e]
+
+0x81,0xff,0x0a,0x7f
+# GFX1250-REAL16: v_cos_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xff,0x0a,0x7f]
+
0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00
# GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt
index dc8c6b1..f099ffc 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt
@@ -2,6 +2,48 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
+0xfa,0x3c,0xfe,0x7f,0xff,0x6f,0x35,0x30
+# GFX1250: v_tanh_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3c,0xfe,0x7f,0xff,0x6f,0x35,0x30]
+
+0xfa,0x3c,0x0a,0x7e,0x01,0xe4,0x00,0xff
+# GFX1250: v_tanh_f32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+
+0xfa,0x3c,0x0a,0x7e,0x01,0x1b,0x00,0xff
+# GFX1250: v_tanh_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+
+0xfa,0x3c,0x0a,0x7e,0x01,0x41,0x01,0xff
+# GFX1250: v_tanh_f32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x41,0x01,0xff]
+
+0xfa,0x3c,0x0a,0x7e,0x01,0x40,0x01,0xff
+# GFX1250: v_tanh_f32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x40,0x01,0xff]
+
+0xfa,0x3c,0x0a,0x7e,0x01,0x21,0x01,0xff
+# GFX1250: v_tanh_f32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x21,0x01,0xff]
+
+0xfa,0x3c,0x0a,0x7e,0x01,0x2f,0x01,0xff
+# GFX1250: v_tanh_f32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+
+0xfa,0x3c,0x0a,0x7e,0x01,0x50,0x01,0xff
+# GFX1250: v_tanh_f32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x50,0x01,0xff]
+
+0xfa,0x3c,0x0a,0x7e,0x01,0x5f,0x01,0x01
+# GFX1250: v_tanh_f32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+
+0xfa,0x3c,0x0a,0x7e,0x01,0x01,0x01,0xff
+# GFX1250: v_tanh_f32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x01,0x01,0xff]
+
+0xfa,0x3c,0x0a,0x7e,0x01,0x0f,0x01,0xff
+# GFX1250: v_tanh_f32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+
+0xfa,0x3c,0x0a,0x7e,0x01,0x11,0x01,0xff
+# GFX1250: v_tanh_f32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x11,0x01,0xff]
+
+0xfa,0x3c,0x0a,0x7e,0x01,0x1f,0x01,0xff
+# GFX1250: v_tanh_f32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+
+0xfa,0x3c,0x0a,0x7e,0x01,0x60,0x09,0x13
+# GFX1250: v_tanh_f32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x60,0x09,0x13]
+
0xfa,0x94,0xfe,0x7e,0x7f,0x6f,0x35,0x30
# GFX1250-REAL16: v_tanh_bf16_dpp v127.l, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x94,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
# GFX1250-FAKE16: v_tanh_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x94,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
@@ -415,6 +457,65 @@
0xfa,0xfc,0x0a,0x7f,0x81,0x1b,0x00,0xff
# GFX1250-REAL16: v_sin_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7f,0x81,0x1b,0x00,0xff]
+0xfa,0xfe,0xfe,0x7e,0x7f,0x6f,0x35,0x30
+# GFX1250-REAL16: v_cos_bf16_dpp v127.l, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+# GFX1250-FAKE16: v_cos_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0xe4,0x00,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x1b,0x00,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x41,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x41,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x41,0x01,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x40,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x40,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x40,0x01,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x21,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x21,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x21,0x01,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x2f,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x50,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x50,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x50,0x01,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x5f,0x01,0x01
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x01,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x01,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x01,0x01,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x0f,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x11,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x11,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x11,0x01,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x1f,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x60,0x09,0x13
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x60,0x09,0x13]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x60,0x09,0x13]
+
+0xfa,0xfe,0x0a,0x7f,0x81,0x1b,0x00,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7f,0x81,0x1b,0x00,0xff]
+
0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30
# GFX1250: v_cvt_f32_bf16_dpp v127, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt
index 741bf3f..d86d463 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt
@@ -2,6 +2,15 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
+0xe9,0x3c,0xfe,0x7f,0xff,0x00,0x00,0x00
+# GFX1250: v_tanh_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x3c,0xfe,0x7f,0xff,0x00,0x00,0x00]
+
+0xe9,0x3c,0x0a,0x7e,0x01,0x77,0x39,0x05
+# GFX1250: v_tanh_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x3c,0x0a,0x7e,0x01,0x77,0x39,0x05]
+
+0xea,0x3c,0x0a,0x7e,0x01,0x77,0x39,0x05
+# GFX1250: v_tanh_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x3c,0x0a,0x7e,0x01,0x77,0x39,0x05]
+
0xe9,0x94,0xfe,0x7e,0x7f,0x00,0x00,0x00
# GFX1250-REAL16: v_tanh_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x94,0xfe,0x7e,0x7f,0x00,0x00,0x00]
# GFX1250-FAKE16: v_tanh_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x94,0xfe,0x7e,0x7f,0x00,0x00,0x00]
@@ -110,6 +119,21 @@
0xe9,0xfc,0x0a,0x7f,0x81,0x77,0x39,0x05
# GFX1250-REAL16: v_sin_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfc,0x0a,0x7f,0x81,0x77,0x39,0x05]
+0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+
+0xe9,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05]
+
+0xea,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05]
+
+0xe9,0xfe,0x0a,0x7f,0x81,0x77,0x39,0x05
+# GFX1250-REAL16: v_cos_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7f,0x81,0x77,0x39,0x05]
+
0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00
# GFX1250: v_cvt_f32_bf16_dpp v127, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt
index cd9b712..4dc7ed4 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt
@@ -2,6 +2,51 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
+0xff,0x81,0x9e,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf
+# GFX1250: v_tanh_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x9e,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
+
+0x05,0x00,0x9e,0xd5,0xc1,0x00,0x00,0x00
+# GFX1250: v_tanh_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x9e,0xd5,0xc1,0x00,0x00,0x00]
+
+0x05,0x00,0x9e,0xd5,0xf0,0x00,0x00,0x08
+# GFX1250: v_tanh_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x9e,0xd5,0xf0,0x00,0x00,0x08]
+
+0x05,0x00,0x9e,0xd5,0x7f,0x00,0x00,0x00
+# GFX1250: v_tanh_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x9e,0xd5,0x7f,0x00,0x00,0x00]
+
+0x05,0x00,0x9e,0xd5,0x7e,0x00,0x00,0x00
+# GFX1250: v_tanh_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x9e,0xd5,0x7e,0x00,0x00,0x00]
+
+0x05,0x00,0x9e,0xd5,0x7d,0x00,0x00,0x00
+# GFX1250: v_tanh_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x9e,0xd5,0x7d,0x00,0x00,0x00]
+
+0x05,0x00,0x9e,0xd5,0x7c,0x00,0x00,0x00
+# GFX1250: v_tanh_f32_e64 v5, null ; encoding: [0x05,0x00,0x9e,0xd5,0x7c,0x00,0x00,0x00]
+
+0x05,0x00,0x9e,0xd5,0x01,0x00,0x00,0x00
+# GFX1250: v_tanh_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x9e,0xd5,0x01,0x00,0x00,0x00]
+
+0x05,0x00,0x9e,0xd5,0x69,0x00,0x00,0x00
+# GFX1250: v_tanh_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x9e,0xd5,0x69,0x00,0x00,0x00]
+
+0x05,0x00,0x9e,0xd5,0xfd,0x00,0x00,0x10
+# GFX1250: v_tanh_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x9e,0xd5,0xfd,0x00,0x00,0x10]
+
+0x05,0x00,0x9e,0xd5,0x7b,0x00,0x00,0x00
+# GFX1250: v_tanh_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x9e,0xd5,0x7b,0x00,0x00,0x00]
+
+0x05,0x00,0x9e,0xd5,0x01,0x01,0x00,0x00
+# GFX1250: v_tanh_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x9e,0xd5,0x01,0x01,0x00,0x00]
+
+0x05,0x00,0x9e,0xd5,0xff,0x01,0x00,0x00
+# GFX1250: v_tanh_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x9e,0xd5,0xff,0x01,0x00,0x00]
+
+0x05,0x00,0x9e,0xd5,0x6b,0x00,0x00,0x00
+# GFX1250: v_tanh_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x9e,0xd5,0x6b,0x00,0x00,0x00]
+
+0x05,0x00,0x9e,0xd5,0x6a,0x00,0x00,0x00
+# GFX1250: v_tanh_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x9e,0xd5,0x6a,0x00,0x00,0x00]
+
0xff,0x81,0xca,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00
# GFX1250-REAL16: v_tanh_bf16_e64 v255.l, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xca,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
# GFX1250-FAKE16: v_tanh_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xca,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
@@ -450,6 +495,70 @@
# GFX1250-REAL16: v_sin_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xfe,0xd5,0x80,0x01,0x00,0x00]
# GFX1250-FAKE16: v_sin_bf16_e64 v5, v128 ; encoding: [0x05,0x00,0xfe,0xd5,0x80,0x01,0x00,0x00]
+0xff,0x81,0xff,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v255.l, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xff,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xff,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0xc1,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, -1 ; encoding: [0x05,0x00,0xff,0xd5,0xc1,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xff,0xd5,0xc1,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0xf0,0x00,0x00,0x08
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, 0.5 mul:2 ; encoding: [0x05,0x00,0xff,0xd5,0xf0,0x00,0x00,0x08]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xff,0xd5,0xf0,0x00,0x00,0x08]
+
+0x05,0x00,0xff,0xd5,0x7f,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0xff,0xd5,0x7f,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xff,0xd5,0x7f,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0x7e,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, exec_lo ; encoding: [0x05,0x00,0xff,0xd5,0x7e,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xff,0xd5,0x7e,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0x7d,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, m0 ; encoding: [0x05,0x00,0xff,0xd5,0x7d,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xff,0xd5,0x7d,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0x7c,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, null ; encoding: [0x05,0x00,0xff,0xd5,0x7c,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, null ; encoding: [0x05,0x00,0xff,0xd5,0x7c,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0x01,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, s1 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0x69,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, s105 ; encoding: [0x05,0x00,0xff,0xd5,0x69,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xff,0xd5,0x69,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0xfd,0x00,0x00,0x10
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, src_scc mul:4 ; encoding: [0x05,0x00,0xff,0xd5,0xfd,0x00,0x00,0x10]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xff,0xd5,0xfd,0x00,0x00,0x10]
+
+0x05,0x00,0xff,0xd5,0x7b,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, ttmp15 ; encoding: [0x05,0x00,0xff,0xd5,0x7b,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xff,0xd5,0x7b,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0x01,0x01,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, v1.l ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x01,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x01,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0xff,0x01,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, v255.l ; encoding: [0x05,0x00,0xff,0xd5,0xff,0x01,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xff,0xd5,0xff,0x01,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0x6b,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, vcc_hi ; encoding: [0x05,0x00,0xff,0xd5,0x6b,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xff,0xd5,0x6b,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0x6a,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, vcc_lo ; encoding: [0x05,0x00,0xff,0xd5,0x6a,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xff,0xd5,0x6a,0x00,0x00,0x00]
+
+0x05,0x48,0xff,0xd5,0x80,0x01,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xff,0xd5,0x80,0x01,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, v128 ; encoding: [0x05,0x00,0xff,0xd5,0x80,0x01,0x00,0x00]
+
0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00
# GFX1250: v_cvt_f32_bf8_e64 v1, 3 ; encoding: [0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt
index ed07393..1f03a43 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt
@@ -2,6 +2,48 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250-REAL16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250-FAKE16 %s
+0xff,0x81,0x9e,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30
+# GFX1250: v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x9e,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+
+0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01
+# GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+
+0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13
+# GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
+
+0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff
+# GFX1250: v_tanh_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+
+0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
+# GFX1250: v_tanh_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff
+# GFX1250: v_tanh_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+
+0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+# GFX1250: v_tanh_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff
+# GFX1250: v_tanh_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+
+0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff
+# GFX1250: v_tanh_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+
+0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff
+# GFX1250: v_tanh_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+
+0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff
+# GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+
+0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff
+# GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+
+0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff
+# GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+
+0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff
+# GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+
0xff,0x81,0xfb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30
# GFX1250-REAL16: v_rsq_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
# GFX1250-FAKE16: v_rsq_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
@@ -242,6 +284,66 @@
# GFX1250-REAL16: v_sin_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xfe,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
# GFX1250-FAKE16: v_sin_bf16_e64_dpp v5, v128 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfe,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
+0xff,0x81,0xff,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xff,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xff,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+
+0x05,0x48,0xff,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xff,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v128 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
+
0xff,0x81,0xf9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30
# GFX1250-REAL16: v_rcp_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xf9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
# GFX1250-FAKE16: v_rcp_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xf9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt
index a6d6713..e673f9f 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt
@@ -2,6 +2,18 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250-REAL16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250-FAKE16 %s
+0xff,0x81,0x9e,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00
+# GFX1250: v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x9e,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+
+0x05,0x00,0x9e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+# GFX1250: v_tanh_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
+0x05,0x00,0x9e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05
+# GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+
+0x05,0x00,0x9e,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05
+# GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x9e,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+
0xff,0x81,0xfb,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00
# GFX1250-REAL16: v_rsq_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfb,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
# GFX1250-FAKE16: v_rsq_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfb,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
@@ -82,6 +94,26 @@
# GFX1250-REAL16: v_sin_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xfe,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
# GFX1250-FAKE16: v_sin_bf16_e64_dpp v5, v128 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfe,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
+0xff,0x81,0xff,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xff,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xff,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
+0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+
+0x05,0x00,0xff,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xff,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xff,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+
+0x05,0x48,0xff,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xff,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v128 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
+
0xff,0x81,0xf9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00
# GFX1250-REAL16: v_rcp_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xf9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
# GFX1250-FAKE16: v_rcp_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xf9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 1076456..143cc38 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -93,6 +93,13 @@ config.substitutions.append(("%pluginext", config.llvm_plugin_ext))
config.substitutions.append(("%exeext", config.llvm_exe_ext))
config.substitutions.append(("%llvm_src_root", config.llvm_src_root))
+# Add IR2Vec test vocabulary path substitution
+config.substitutions.append(
+ (
+ "%ir2vec_test_vocab_dir",
+ os.path.join(config.test_source_root, "Analysis", "IR2Vec", "Inputs"),
+ )
+)
lli_args = []
# The target triple used by default by lli is the process target triple (some
@@ -197,6 +204,7 @@ tools.extend(
"llvm-dlltool",
"llvm-exegesis",
"llvm-extract",
+ "llvm-ir2vec",
"llvm-isel-fuzzer",
"llvm-ifs",
"llvm-install-name-tool",
diff --git a/llvm/test/tools/llvm-ir2vec/embeddings.ll b/llvm/test/tools/llvm-ir2vec/embeddings.ll
new file mode 100644
index 0000000..993ea86
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/embeddings.ll
@@ -0,0 +1,73 @@
+; RUN: llvm-ir2vec --mode=embeddings --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-DEFAULT
+; RUN: llvm-ir2vec --mode=embeddings --level=func --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-FUNC-LEVEL
+; RUN: llvm-ir2vec --mode=embeddings --level=func --function=abc --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-FUNC-LEVEL-ABC
+; RUN: not llvm-ir2vec --mode=embeddings --level=func --function=def --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s 2>&1 | FileCheck %s -check-prefix=CHECK-FUNC-DEF
+; RUN: llvm-ir2vec --mode=embeddings --level=bb --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-BB-LEVEL
+; RUN: llvm-ir2vec --mode=embeddings --level=bb --function=abc_repeat --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-BB-LEVEL-ABC-REPEAT
+; RUN: llvm-ir2vec --mode=embeddings --level=inst --function=abc_repeat --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-INST-LEVEL-ABC-REPEAT
+
+define dso_local noundef float @abc(i32 noundef %a, float noundef %b) #0 {
+entry:
+ %a.addr = alloca i32, align 4
+ %b.addr = alloca float, align 4
+ store i32 %a, ptr %a.addr, align 4
+ store float %b, ptr %b.addr, align 4
+ %0 = load i32, ptr %a.addr, align 4
+ %1 = load i32, ptr %a.addr, align 4
+ %mul = mul nsw i32 %0, %1
+ %conv = sitofp i32 %mul to float
+ %2 = load float, ptr %b.addr, align 4
+ %add = fadd float %conv, %2
+ ret float %add
+}
+
+define dso_local noundef float @abc_repeat(i32 noundef %a, float noundef %b) #0 {
+entry:
+ %a.addr = alloca i32, align 4
+ %b.addr = alloca float, align 4
+ store i32 %a, ptr %a.addr, align 4
+ store float %b, ptr %b.addr, align 4
+ %0 = load i32, ptr %a.addr, align 4
+ %1 = load i32, ptr %a.addr, align 4
+ %mul = mul nsw i32 %0, %1
+ %conv = sitofp i32 %mul to float
+ %2 = load float, ptr %b.addr, align 4
+ %add = fadd float %conv, %2
+ ret float %add
+}
+
+; CHECK-DEFAULT: Function: abc
+; CHECK-DEFAULT-NEXT: [ 878.00 889.00 900.00 ]
+; CHECK-DEFAULT-NEXT: Function: abc_repeat
+; CHECK-DEFAULT-NEXT: [ 878.00 889.00 900.00 ]
+
+; CHECK-FUNC-LEVEL: Function: abc
+; CHECK-FUNC-LEVEL-NEXT: [ 878.00 889.00 900.00 ]
+; CHECK-FUNC-LEVEL-NEXT: Function: abc_repeat
+; CHECK-FUNC-LEVEL-NEXT: [ 878.00 889.00 900.00 ]
+
+; CHECK-FUNC-LEVEL-ABC: Function: abc
+; CHECK-FUNC-LEVEL-NEXT-ABC: [ 878.00 889.00 900.00 ]
+
+; CHECK-FUNC-DEF: Error: Function 'def' not found
+
+; CHECK-BB-LEVEL: Function: abc
+; CHECK-BB-LEVEL-NEXT: entry: [ 878.00 889.00 900.00 ]
+; CHECK-BB-LEVEL-NEXT: Function: abc_repeat
+; CHECK-BB-LEVEL-NEXT: entry: [ 878.00 889.00 900.00 ]
+
+; CHECK-BB-LEVEL-ABC-REPEAT: Function: abc_repeat
+; CHECK-BB-LEVEL-ABC-REPEAT-NEXT: entry: [ 878.00 889.00 900.00 ]
+
+; CHECK-INST-LEVEL-ABC-REPEAT: Function: abc_repeat
+; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %a.addr = alloca i32, align 4 [ 91.00 92.00 93.00 ]
+; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %b.addr = alloca float, align 4 [ 91.00 92.00 93.00 ]
+; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: store i32 %a, ptr %a.addr, align 4 [ 97.00 98.00 99.00 ]
+; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: store float %b, ptr %b.addr, align 4 [ 97.00 98.00 99.00 ]
+; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %0 = load i32, ptr %a.addr, align 4 [ 94.00 95.00 96.00 ]
+; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %1 = load i32, ptr %a.addr, align 4 [ 94.00 95.00 96.00 ]
+; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %mul = mul nsw i32 %0, %1 [ 49.00 50.00 51.00 ]
+; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %conv = sitofp i32 %mul to float [ 130.00 131.00 132.00 ]
+; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %2 = load float, ptr %b.addr, align 4 [ 94.00 95.00 96.00 ]
+; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %add = fadd float %conv, %2 [ 40.00 41.00 42.00 ]
+; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: ret float %add [ 1.00 2.00 3.00 ]
diff --git a/llvm/test/tools/llvm-ir2vec/triplets.ll b/llvm/test/tools/llvm-ir2vec/triplets.ll
new file mode 100644
index 0000000..d1ef5b3
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/triplets.ll
@@ -0,0 +1,38 @@
+; RUN: llvm-ir2vec --mode=triplets %s | FileCheck %s -check-prefix=TRIPLETS
+
+define i32 @simple_add(i32 %a, i32 %b) {
+entry:
+ %add = add i32 %a, %b
+ ret i32 %add
+}
+
+define i32 @simple_mul(i32 %x, i32 %y) {
+entry:
+ %mul = mul i32 %x, %y
+ ret i32 %mul
+}
+
+define i32 @test_function(i32 %arg1, i32 %arg2) {
+entry:
+ %local1 = alloca i32, align 4
+ %local2 = alloca i32, align 4
+ store i32 %arg1, ptr %local1, align 4
+ store i32 %arg2, ptr %local2, align 4
+ %load1 = load i32, ptr %local1, align 4
+ %load2 = load i32, ptr %local2, align 4
+ %result = add i32 %load1, %load2
+ ret i32 %result
+}
+
+; TRIPLETS: Add IntegerTy Variable Variable
+; TRIPLETS-NEXT: Ret VoidTy Variable
+; TRIPLETS-NEXT: Mul IntegerTy Variable Variable
+; TRIPLETS-NEXT: Ret VoidTy Variable
+; TRIPLETS-NEXT: Alloca PointerTy Constant
+; TRIPLETS-NEXT: Alloca PointerTy Constant
+; TRIPLETS-NEXT: Store VoidTy Variable Pointer
+; TRIPLETS-NEXT: Store VoidTy Variable Pointer
+; TRIPLETS-NEXT: Load IntegerTy Pointer
+; TRIPLETS-NEXT: Load IntegerTy Pointer
+; TRIPLETS-NEXT: Add IntegerTy Variable Variable
+; TRIPLETS-NEXT: Ret VoidTy Variable