diff options
author | Brox Chen <guochen2@amd.com> | 2025-04-16 10:03:08 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-04-16 10:03:08 -0400 |
commit | 181872ffcc7dc7f20ed2b84e8fa39beba41cb6d3 (patch) | |
tree | e758e4844d48ad3edc748c8a195811d66870fcb6 | |
parent | 1bfd44462886b167f0d82e44e6a9856a830c1f8b (diff) | |
download | llvm-181872ffcc7dc7f20ed2b84e8fa39beba41cb6d3.zip llvm-181872ffcc7dc7f20ed2b84e8fa39beba41cb6d3.tar.gz llvm-181872ffcc7dc7f20ed2b84e8fa39beba41cb6d3.tar.bz2 |
[AMDGPU][True16][MC] update a few mc test for true16 (#135816)
This is another NFC patch.
Update mc test for a few true16 instructions by duplicating the file to
fake16 versions and udpate `mattr` flag with +/-real-true16. Also added
some fake16 file that are not properly created before
-rw-r--r-- | llvm/test/MC/AMDGPU/bf16_imm-fake16.s | 114 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/bf16_imm.s | 64 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/gfx11-promotions-fake16.s | 353 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/gfx11-promotions.s | 66 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/gfx1150_asm_features-fake16.s | 48 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/gfx1150_asm_features.s | 20 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/gfx11_asm_vinterp_err-fake16.s | 43 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/gfx11_asm_vop1.s | 30 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/gfx11_asm_vop3_alias-fake16.s | 15 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/gfx11_asm_vop3_alias.s | 12 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/gfx11_asm_vop3_features.s | 77 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/gfx12_asm_vop1.s | 31 |
12 files changed, 792 insertions, 81 deletions
diff --git a/llvm/test/MC/AMDGPU/bf16_imm-fake16.s b/llvm/test/MC/AMDGPU/bf16_imm-fake16.s new file mode 100644 index 0000000..ee697be --- /dev/null +++ b/llvm/test/MC/AMDGPU/bf16_imm-fake16.s @@ -0,0 +1,114 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -show-encoding %s | FileCheck %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s | FileCheck %s + +v_dot2_bf16_bf16 v5, v1, v2, 100.0 +// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 0x42c8 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0xc8,0x42,0x00,0x00] + +v_dot2_bf16_bf16 v2, v0, 1.0, v2 +// CHECK: v_dot2_bf16_bf16 v2, v0, 1.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe5,0x09,0x04] + +v_dot2_bf16_bf16 v2, 1.0, v0, v2 +// CHECK: v_dot2_bf16_bf16 v2, 1.0, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xf2,0x00,0x0a,0x04] + +v_dot2_bf16_bf16 v5, v1, v2, 1.0 +// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 1.0 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xca,0x03] + +v_dot2_bf16_bf16 v2, v0, -1.0, v2 +// CHECK: v_dot2_bf16_bf16 v2, v0, -1.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe7,0x09,0x04] + +v_dot2_bf16_bf16 v2, v0, 0.5, v2 +// CHECK: v_dot2_bf16_bf16 v2, v0, 0.5, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe1,0x09,0x04] + +v_dot2_bf16_bf16 v2, v0, -0.5, v2 +// CHECK: v_dot2_bf16_bf16 v2, v0, -0.5, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe3,0x09,0x04] + +v_dot2_bf16_bf16 v2, v0, 2.0, v2 +// CHECK: v_dot2_bf16_bf16 v2, v0, 2.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe9,0x09,0x04] + +v_dot2_bf16_bf16 v2, v0, -2.0, v2 +// CHECK: v_dot2_bf16_bf16 v2, v0, -2.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xeb,0x09,0x04] + +v_dot2_bf16_bf16 v2, v0, 4.0, v2 +// CHECK: v_dot2_bf16_bf16 v2, v0, 4.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xed,0x09,0x04] + +v_dot2_bf16_bf16 v2, v0, -4.0, v2 +// CHECK: v_dot2_bf16_bf16 v2, v0, -4.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xef,0x09,0x04] + +// Check 1/(2*pi) rounded value and ideomatic fp32 0.15915494 value +// which cannot be accurately represented in bf16. + +v_dot2_bf16_bf16 v2, v0, 0.158203125, v2 +// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04] + +v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 +// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04] + +v_dot2_bf16_bf16 v2, v0, 0x3e22, v2 +// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04] + +v_dot2_bf16_bf16 v2, v0, v2, 0.15915494 +// CHECK: v_dot2_bf16_bf16 v2, v0, v2, 0.15915494 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0x05,0xe2,0x03] + +v_dot2_f32_bf16 v2, v1, 0, v2 +// CHECK: v_dot2_f32_bf16 v2, v1, 0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0x01,0x09,0x1c] + +v_dot2_f32_bf16 v2, v1, 0.5, v2 +// CHECK: v_dot2_f32_bf16 v2, v1, 0.5, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe1,0x09,0x1c] + +v_dot2_f32_bf16 v2, v1, -0.5, v2 +// CHECK: v_dot2_f32_bf16 v2, v1, -0.5, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe3,0x09,0x1c] + +v_dot2_f32_bf16 v2, v1, 1.0, v2 +// CHECK: v_dot2_f32_bf16 v2, v1, 1.0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe5,0x09,0x1c] + +v_dot2_f32_bf16 v2, v1, -1.0, v2 +// CHECK: v_dot2_f32_bf16 v2, v1, -1.0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe7,0x09,0x1c] + +v_dot2_f32_bf16 v2, v1, 2.0, v2 +// CHECK: v_dot2_f32_bf16 v2, v1, 2.0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe9,0x09,0x1c] + +v_dot2_f32_bf16 v2, v1, -2.0, v2 +// CHECK: v_dot2_f32_bf16 v2, v1, -2.0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xeb,0x09,0x1c] + +v_dot2_f32_bf16 v2, v1, 4.0, v2 +// CHECK: v_dot2_f32_bf16 v2, v1, 4.0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xed,0x09,0x1c] + +v_dot2_f32_bf16 v2, v1, -4.0, v2 +// CHECK: v_dot2_f32_bf16 v2, v1, -4.0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xef,0x09,0x1c] + +v_dot2_f32_bf16 v2, v1, 0.15915494, v2 +// CHECK: v_dot2_f32_bf16 v2, v1, 0.15915494, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xf1,0x09,0x1c] + +v_dot2_f32_bf16 v2, v1, 0x3e22, v2 +// CHECK: v_dot2_f32_bf16 v2, v1, 0.15915494, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xf1,0x09,0x1c] + +v_dot2_f32_bf16 v2, 0.5, v1, v2 +// CHECK: v_dot2_f32_bf16 v2, 0.5, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf0,0x02,0x0a,0x1c] + +v_dot2_f32_bf16 v2, -0.5, v1, v2 +// CHECK: v_dot2_f32_bf16 v2, -0.5, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf1,0x02,0x0a,0x1c] + +v_dot2_f32_bf16 v2, 1.0, v1, v2 +// CHECK: v_dot2_f32_bf16 v2, 1.0, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf2,0x02,0x0a,0x1c] + +v_dot2_f32_bf16 v2, -1.0, v1, v2 +// CHECK: v_dot2_f32_bf16 v2, -1.0, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf3,0x02,0x0a,0x1c] + +v_dot2_f32_bf16 v2, 2.0, v1, v2 +// CHECK: v_dot2_f32_bf16 v2, 2.0, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf4,0x02,0x0a,0x1c] + +v_dot2_f32_bf16 v2, -2.0, v1, v2 +// CHECK: v_dot2_f32_bf16 v2, -2.0, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf5,0x02,0x0a,0x1c] + +v_dot2_f32_bf16 v2, 4.0, v1, v2 +// CHECK: v_dot2_f32_bf16 v2, 4.0, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf6,0x02,0x0a,0x1c] + +v_dot2_f32_bf16 v2, -4.0, v1, v2 +// CHECK: v_dot2_f32_bf16 v2, -4.0, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf7,0x02,0x0a,0x1c] + +v_dot2_f32_bf16 v2, 100.0, v1, v2 +// CHECK: v_dot2_f32_bf16 v2, 0x42c8, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xff,0x02,0x0a,0x1c,0xc8,0x42,0x00,0x00] + +v_dot2_f32_bf16 v2, v1, 100.0, v2 +// CHECK: v_dot2_f32_bf16 v2, v1, 0x42c8, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xff,0x09,0x1c,0xc8,0x42,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/bf16_imm.s b/llvm/test/MC/AMDGPU/bf16_imm.s index 7cf1810..d796490 100644 --- a/llvm/test/MC/AMDGPU/bf16_imm.s +++ b/llvm/test/MC/AMDGPU/bf16_imm.s @@ -1,54 +1,54 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5 -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -show-encoding %s | FileCheck %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s | FileCheck %s -v_dot2_bf16_bf16 v5, v1, v2, 100.0 -// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 0x42c8 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0xc8,0x42,0x00,0x00] +v_dot2_bf16_bf16 v5.l, v1, v2, 100.0 +// CHECK: v_dot2_bf16_bf16 v5.l, v1, v2, 0x42c8 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0xc8,0x42,0x00,0x00] -v_dot2_bf16_bf16 v2, v0, 1.0, v2 -// CHECK: v_dot2_bf16_bf16 v2, v0, 1.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe5,0x09,0x04] +v_dot2_bf16_bf16 v2.l, v0, 1.0, v2.l +// CHECK: v_dot2_bf16_bf16 v2.l, v0, 1.0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe5,0x09,0x04] -v_dot2_bf16_bf16 v2, 1.0, v0, v2 -// CHECK: v_dot2_bf16_bf16 v2, 1.0, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xf2,0x00,0x0a,0x04] +v_dot2_bf16_bf16 v2.l, 1.0, v0, v2.l +// CHECK: v_dot2_bf16_bf16 v2.l, 1.0, v0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0xf2,0x00,0x0a,0x04] -v_dot2_bf16_bf16 v5, v1, v2, 1.0 -// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 1.0 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xca,0x03] +v_dot2_bf16_bf16 v5.l, v1, v2, 1.0 +// CHECK: v_dot2_bf16_bf16 v5.l, v1, v2, 1.0 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xca,0x03] -v_dot2_bf16_bf16 v2, v0, -1.0, v2 -// CHECK: v_dot2_bf16_bf16 v2, v0, -1.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe7,0x09,0x04] +v_dot2_bf16_bf16 v2.l, v0, -1.0, v2.l +// CHECK: v_dot2_bf16_bf16 v2.l, v0, -1.0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe7,0x09,0x04] -v_dot2_bf16_bf16 v2, v0, 0.5, v2 -// CHECK: v_dot2_bf16_bf16 v2, v0, 0.5, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe1,0x09,0x04] +v_dot2_bf16_bf16 v2.l, v0, 0.5, v2.l +// CHECK: v_dot2_bf16_bf16 v2.l, v0, 0.5, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe1,0x09,0x04] -v_dot2_bf16_bf16 v2, v0, -0.5, v2 -// CHECK: v_dot2_bf16_bf16 v2, v0, -0.5, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe3,0x09,0x04] +v_dot2_bf16_bf16 v2.l, v0, -0.5, v2.l +// CHECK: v_dot2_bf16_bf16 v2.l, v0, -0.5, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe3,0x09,0x04] -v_dot2_bf16_bf16 v2, v0, 2.0, v2 -// CHECK: v_dot2_bf16_bf16 v2, v0, 2.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe9,0x09,0x04] +v_dot2_bf16_bf16 v2.l, v0, 2.0, v2.l +// CHECK: v_dot2_bf16_bf16 v2.l, v0, 2.0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xe9,0x09,0x04] -v_dot2_bf16_bf16 v2, v0, -2.0, v2 -// CHECK: v_dot2_bf16_bf16 v2, v0, -2.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xeb,0x09,0x04] +v_dot2_bf16_bf16 v2.l, v0, -2.0, v2.l +// CHECK: v_dot2_bf16_bf16 v2.l, v0, -2.0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xeb,0x09,0x04] -v_dot2_bf16_bf16 v2, v0, 4.0, v2 -// CHECK: v_dot2_bf16_bf16 v2, v0, 4.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xed,0x09,0x04] +v_dot2_bf16_bf16 v2.l, v0, 4.0, v2.l +// CHECK: v_dot2_bf16_bf16 v2.l, v0, 4.0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xed,0x09,0x04] -v_dot2_bf16_bf16 v2, v0, -4.0, v2 -// CHECK: v_dot2_bf16_bf16 v2, v0, -4.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xef,0x09,0x04] +v_dot2_bf16_bf16 v2.l, v0, -4.0, v2.l +// CHECK: v_dot2_bf16_bf16 v2.l, v0, -4.0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xef,0x09,0x04] // Check 1/(2*pi) rounded value and ideomatic fp32 0.15915494 value // which cannot be accurately represented in bf16. -v_dot2_bf16_bf16 v2, v0, 0.158203125, v2 -// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04] +v_dot2_bf16_bf16 v2.l, v0, 0.158203125, v2.l +// CHECK: v_dot2_bf16_bf16 v2.l, v0, 0.15915494, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04] -v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 -// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04] +v_dot2_bf16_bf16 v2.l, v0, 0.15915494, v2.l +// CHECK: v_dot2_bf16_bf16 v2.l, v0, 0.15915494, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04] -v_dot2_bf16_bf16 v2, v0, 0x3e22, v2 -// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04] +v_dot2_bf16_bf16 v2.l, v0, 0x3e22, v2.l +// CHECK: v_dot2_bf16_bf16 v2.l, v0, 0.15915494, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04] -v_dot2_bf16_bf16 v2, v0, v2, 0.15915494 -// CHECK: v_dot2_bf16_bf16 v2, v0, v2, 0.15915494 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0x05,0xe2,0x03] +v_dot2_bf16_bf16 v2.l, v0, v2, 0.15915494 +// CHECK: v_dot2_bf16_bf16 v2.l, v0, v2, 0.15915494 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0x05,0xe2,0x03] v_dot2_f32_bf16 v2, v1, 0, v2 // CHECK: v_dot2_f32_bf16 v2, v1, 0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0x01,0x09,0x1c] diff --git a/llvm/test/MC/AMDGPU/gfx11-promotions-fake16.s b/llvm/test/MC/AMDGPU/gfx11-promotions-fake16.s new file mode 100644 index 0000000..95a52ff --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11-promotions-fake16.s @@ -0,0 +1,353 @@ +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 %s | FileCheck --check-prefix=GFX11 %s + +// Check opcode promotions and forced suffices. +// 1. When a suffix is optional, check that it may be omitted. +// 2. When a suffix is optional, check that it may be specified w/o any effect. +// 3. When a suffix is required, check that specifying it enforces opcode promotion. +// 4. When a suffix is required, check that omitting the suffix results in a different encoding. + +//===----------------------------------------------------------------------===// +// VOP1. +//===----------------------------------------------------------------------===// + +v_mov_b32 v0, v1 +// GFX11: v_mov_b32_e32 v0, v1 ; encoding: [0x01,0x03,0x00,0x7e] + +v_mov_b32_e32 v0, v1 +// GFX11: v_mov_b32_e32 v0, v1 ; encoding: [0x01,0x03,0x00,0x7e] + +//===----------------------------------------------------------------------===// +// VOP2. +//===----------------------------------------------------------------------===// + +v_add_f16 v5, v1, v2 +// GFX11: v_add_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x64] + +v_add_f16_e32 v5, v1, v2 +// GFX11: v_add_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x64] + +//===----------------------------------------------------------------------===// +// VOPC. +//===----------------------------------------------------------------------===// + +v_cmp_lt_f32 vcc_lo, v1, v2 +// GFX11: v_cmp_lt_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x22,0x7c] + +v_cmp_lt_f32_e32 vcc_lo, v1, v2 +// GFX11: v_cmp_lt_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x22,0x7c] + +//===----------------------------------------------------------------------===// +// VOPCX. +//===----------------------------------------------------------------------===// + +v_cmpx_class_f16 v1, v2 +// GFX11: v_cmpx_class_f16_e32 v1, v2 ; encoding: [0x01,0x05,0xfa,0x7d] + +v_cmpx_class_f16_e32 v1, v2 +// GFX11: v_cmpx_class_f16_e32 v1, v2 ; encoding: [0x01,0x05,0xfa,0x7d] + +//===----------------------------------------------------------------------===// +// VOP1.DPP8. +//===----------------------------------------------------------------------===// + +v_bfrev_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05] + +v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05] + +//===----------------------------------------------------------------------===// +// VOP1.DPP16. +//===----------------------------------------------------------------------===// + +v_bfrev_b32 v5, v1 quad_perm:[3,2,1,0] +// GFX11: v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +//===----------------------------------------------------------------------===// +// VOP2.DPP8. +//===----------------------------------------------------------------------===// + +v_add_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] + +v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] + +//===----------------------------------------------------------------------===// +// VOP2.DPP16. +//===----------------------------------------------------------------------===// + +v_add_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] + +v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] + +//===----------------------------------------------------------------------===// +// VOPC.DPP8. +//===----------------------------------------------------------------------===// + +v_cmp_le_u16 v1, v2 dpp8:[7,7,7,3,4,4,6,7] fi:1 +// GFX11: v_cmp_le_u16 vcc_lo, v1, v2 dpp8:[7,7,7,3,4,4,6,7] fi:1 ; encoding: [0xea,0x04,0x76,0x7c,0x01,0xff,0x47,0xfa] + +v_cmp_le_u16_dpp v1, v2 dpp8:[7,7,7,3,4,4,6,7] fi:1 +// GFX11: v_cmp_le_u16 vcc_lo, v1, v2 dpp8:[7,7,7,3,4,4,6,7] fi:1 ; encoding: [0xea,0x04,0x76,0x7c,0x01,0xff,0x47,0xfa] + +//===----------------------------------------------------------------------===// +// VOPC.DPP16. +//===----------------------------------------------------------------------===// + +v_cmp_gt_u16 v1, v2 row_shl:0x7 row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: v_cmp_gt_u16 vcc_lo, v1, v2 row_shl:7 row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x78,0x7c,0x01,0x07,0x05,0x00] + +v_cmp_gt_u16_dpp v1, v2 row_shl:0x7 row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: v_cmp_gt_u16 vcc_lo, v1, v2 row_shl:7 row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x78,0x7c,0x01,0x07,0x05,0x00] + +//===----------------------------------------------------------------------===// +// VOPCX.DPP8. +//===----------------------------------------------------------------------===// + +v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_class_f16_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05] + +//===----------------------------------------------------------------------===// +// VOPCX.DPP16. +//===----------------------------------------------------------------------===// + +v_cmpx_class_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_class_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_class_f16_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_class_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff] + +//===----------------------------------------------------------------------===// +// VOP1 -> VOP3. +//===----------------------------------------------------------------------===// + +v_sin_f32 v5, 0.5 mul:2 +// GFX11: v_sin_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xb5,0xd5,0xf0,0x00,0x00,0x08] + +v_sin_f32_e64 v5, 0.5 mul:2 +// GFX11: v_sin_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xb5,0xd5,0xf0,0x00,0x00,0x08] + +v_sin_f32_e64 v5, v1 +// GFX11: v_sin_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xb5,0xd5,0x01,0x01,0x00,0x00] + +v_sin_f32 v5, v1 +// GFX11: v_sin_f32_e32 v5, v1 ; encoding: [0x01,0x6b,0x0a,0x7e] + +//===----------------------------------------------------------------------===// +// VOP2 -> VOP3. +//===----------------------------------------------------------------------===// + +v_add_f32 v5, v1, -v2 +// GFX11: v_add_f32_e64 v5, v1, -v2 ; encoding: [0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x40] + +v_add_f32_e64 v5, v1, -v2 +// GFX11: v_add_f32_e64 v5, v1, -v2 ; encoding: [0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x40] + +v_add_f32_e64 v5, v1, v2 +// GFX11: v_add_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x00] + +v_add_f32 v5, v1, v2 +// GFX11: v_add_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x06] + +//===----------------------------------------------------------------------===// +// VOPC -> VOP3. +//===----------------------------------------------------------------------===// + +v_cmp_f_f32 s10, -v1, v2 +// GFX11: v_cmp_f_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x20] + +v_cmp_f_f32_e64 s10, -v1, v2 +// GFX11: v_cmp_f_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x20] + +v_cmp_f_f32_e64 vcc_lo, v1, v2 +// GFX11: v_cmp_f_f32_e64 vcc_lo, v1, v2 ; encoding: [0x6a,0x00,0x10,0xd4,0x01,0x05,0x02,0x00] + +v_cmp_f_f32 vcc_lo, v1, v2 +// GFX11: v_cmp_f_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x20,0x7c] + +//===----------------------------------------------------------------------===// +// VOPCX -> VOP3. +//===----------------------------------------------------------------------===// + +v_cmpx_f_f32 -v1, v2 +// GFX11: v_cmpx_f_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0x05,0x02,0x20] + +v_cmpx_f_f32_e64 -v1, v2 +// GFX11: v_cmpx_f_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0x05,0x02,0x20] + +v_cmpx_f_f32_e64 v1, v2 +// GFX11: v_cmpx_f_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_f_f32 v1, v2 +// GFX11: v_cmpx_f_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x20,0x7d] + +//===----------------------------------------------------------------------===// +// VOP3. +//===----------------------------------------------------------------------===// + +v_add3_u32 v5, v1, v2, s3 +// GFX11: v_add3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] + +v_add3_u32_e64 v5, v1, v2, s3 +// GFX11: v_add3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] + +//===----------------------------------------------------------------------===// +// VOP1 -> VOP3.DPP8. +//===----------------------------------------------------------------------===// + +v_sin_f32 v5, v1 div:2 dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_sin_f32_e64_dpp v5, v1 div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x18,0x01,0x00,0x00,0x00] + +v_sin_f32_e64_dpp v5, v1 div:2 dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_sin_f32_e64_dpp v5, v1 div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x18,0x01,0x00,0x00,0x00] + +v_sin_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sin_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_sin_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sin_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x6a,0x0a,0x7e,0x01,0x77,0x39,0x05] + +//===----------------------------------------------------------------------===// +// VOP2 -> VOP3.DPP8. +//===----------------------------------------------------------------------===// + +v_add_f32 v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f32_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x10,0x01,0x77,0x39,0x05] + +v_add_f32_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f32_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x10,0x01,0x77,0x39,0x05] + +v_add_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x06,0x01,0x77,0x39,0x05] + +//===----------------------------------------------------------------------===// +// VOPC -> VOP3.DPP8. +//===----------------------------------------------------------------------===// + +v_cmp_class_f32 s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_class_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmp_class_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_class_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmp_class_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_class_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmp_class_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_class_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05] + +//===----------------------------------------------------------------------===// +// VOPCX -> VOP3.DPP8. +//===----------------------------------------------------------------------===// + +v_cmpx_class_f32 -v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_class_f32_e64_dpp -v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfe,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_class_f32_e64_dpp -v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_class_f32_e64_dpp -v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfe,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_class_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_class_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfe,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_class_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_class_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfc,0x7d,0x01,0x77,0x39,0x05] + +//===----------------------------------------------------------------------===// +// VOP1 -> VOP3.DPP16. +//===----------------------------------------------------------------------===// + +v_sin_f32 v5, v1 div:2 row_xmask:15 +// GFX11: v_sin_f32_e64_dpp v5, v1 div:2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x18,0x01,0x6f,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 div:2 row_xmask:15 +// GFX11: v_sin_f32_e64_dpp v5, v1 div:2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x18,0x01,0x6f,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: v_sin_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_sin_f32 v5, v1 quad_perm:[3,2,1,0] +// GFX11: v_sin_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +//===----------------------------------------------------------------------===// +// VOP2 -> VOP3.DPP16. +//===----------------------------------------------------------------------===// + +v_add_f32 v5, v1, v2 div:2 quad_perm:[3,2,1,0] +// GFX11: v_add_f32_e64_dpp v5, v1, v2 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x18,0x01,0x1b,0x00,0xff] + +v_add_f32_e64_dpp v5, v1, v2 div:2 quad_perm:[3,2,1,0] +// GFX11: v_add_f32_e64_dpp v5, v1, v2 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x18,0x01,0x1b,0x00,0xff] + +v_add_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_add_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_add_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1b,0x00,0xff] + +//===----------------------------------------------------------------------===// +// VOPC -> VOP3.DPP16. +//===----------------------------------------------------------------------===// + +v_cmp_class_f32 s5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_class_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmp_class_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_class_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmp_class_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_cmp_class_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmp_class_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_cmp_class_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x50,0x01,0xff] + +//===----------------------------------------------------------------------===// +// VOPCX -> VOP3.DPP16. +//===----------------------------------------------------------------------===// + +v_cmpx_class_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_class_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_class_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_class_f32 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x1b,0x00,0xff] + +//===----------------------------------------------------------------------===// +// VOP3P. +//===----------------------------------------------------------------------===// + +v_dot2_f32_f16 v0, v1, v2, v3 +// GFX11: v_dot2_f32_f16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x13,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot2_f32_f16_e64 v0, v1, v2, v3 +// GFX11: v_dot2_f32_f16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x13,0xcc,0x01,0x05,0x0e,0x1c] + +//===----------------------------------------------------------------------===// +// VOP3P.DPP8. +//===----------------------------------------------------------------------===// + +v_dot2_f32_f16 v0, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x00,0x40,0x13,0xcc,0xe9,0x04,0x0e,0x1c,0x01,0x77,0x39,0x05] + +v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x00,0x40,0x13,0xcc,0xe9,0x04,0x0e,0x1c,0x01,0x77,0x39,0x05] + +//===----------------------------------------------------------------------===// +// VOP3P.DPP16. +//===----------------------------------------------------------------------===// + +v_dot2_f32_f16 v0, v1, v2, v3 quad_perm:[1,2,3,0] +// GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x13,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x39,0x00,0xff] + +v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 quad_perm:[1,2,3,0] +// GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x13,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x39,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11-promotions.s b/llvm/test/MC/AMDGPU/gfx11-promotions.s index 0d1568e..d9499b3 100644 --- a/llvm/test/MC/AMDGPU/gfx11-promotions.s +++ b/llvm/test/MC/AMDGPU/gfx11-promotions.s @@ -1,5 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5 -// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1100 -mattr=+wavefrontsize32 %s | FileCheck --check-prefix=GFX11 %s +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1100 -mattr=+real-true16 %s | FileCheck --check-prefix=GFX11 %s // Check opcode promotions and forced suffices. // 1. When a suffix is optional, check that it may be omitted. @@ -21,11 +21,11 @@ v_mov_b32_e32 v0, v1 // VOP2. //===----------------------------------------------------------------------===// -v_add_f16 v5, v1, v2 -// GFX11: v_add_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x64] +v_add_f16 v5.l, v1.l, v2.l +// GFX11: v_add_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x64] -v_add_f16_e32 v5, v1, v2 -// GFX11: v_add_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x64] +v_add_f16_e32 v5.l, v1.l, v2.l +// GFX11: v_add_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x64] //===----------------------------------------------------------------------===// // VOPC. @@ -41,11 +41,11 @@ v_cmp_lt_f32_e32 vcc_lo, v1, v2 // VOPCX. //===----------------------------------------------------------------------===// -v_cmpx_class_f16 v1, v2 -// GFX11: v_cmpx_class_f16_e32 v1, v2 ; encoding: [0x01,0x05,0xfa,0x7d] +v_cmpx_class_f16 v1.l, v2.l +// GFX11: v_cmpx_class_f16_e32 v1.l, v2.l ; encoding: [0x01,0x05,0xfa,0x7d] -v_cmpx_class_f16_e32 v1, v2 -// GFX11: v_cmpx_class_f16_e32 v1, v2 ; encoding: [0x01,0x05,0xfa,0x7d] +v_cmpx_class_f16_e32 v1.l, v2.l +// GFX11: v_cmpx_class_f16_e32 v1.l, v2.l ; encoding: [0x01,0x05,0xfa,0x7d] //===----------------------------------------------------------------------===// // VOP1.DPP8. @@ -71,61 +71,61 @@ v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] // VOP2.DPP8. //===----------------------------------------------------------------------===// -v_add_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] +v_add_f16 v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] -v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] +v_add_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] //===----------------------------------------------------------------------===// // VOP2.DPP16. //===----------------------------------------------------------------------===// -v_add_f16 v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] +v_add_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_add_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] -v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] +v_add_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_add_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] //===----------------------------------------------------------------------===// // VOPC.DPP8. //===----------------------------------------------------------------------===// -v_cmp_le_u16 v1, v2 dpp8:[7,7,7,3,4,4,6,7] fi:1 -// GFX11: v_cmp_le_u16 vcc_lo, v1, v2 dpp8:[7,7,7,3,4,4,6,7] fi:1 ; encoding: [0xea,0x04,0x76,0x7c,0x01,0xff,0x47,0xfa] +v_cmp_le_u16 v1.l, v2.l dpp8:[7,7,7,3,4,4,6,7] fi:1 +// GFX11: v_cmp_le_u16 vcc_lo, v1.l, v2.l dpp8:[7,7,7,3,4,4,6,7] fi:1 ; encoding: [0xea,0x04,0x76,0x7c,0x01,0xff,0x47,0xfa] -v_cmp_le_u16_dpp v1, v2 dpp8:[7,7,7,3,4,4,6,7] fi:1 -// GFX11: v_cmp_le_u16 vcc_lo, v1, v2 dpp8:[7,7,7,3,4,4,6,7] fi:1 ; encoding: [0xea,0x04,0x76,0x7c,0x01,0xff,0x47,0xfa] +v_cmp_le_u16_dpp v1.l, v2.l dpp8:[7,7,7,3,4,4,6,7] fi:1 +// GFX11: v_cmp_le_u16 vcc_lo, v1.l, v2.l dpp8:[7,7,7,3,4,4,6,7] fi:1 ; encoding: [0xea,0x04,0x76,0x7c,0x01,0xff,0x47,0xfa] //===----------------------------------------------------------------------===// // VOPC.DPP16. //===----------------------------------------------------------------------===// -v_cmp_gt_u16 v1, v2 row_shl:0x7 row_mask:0x0 bank_mask:0x0 fi:1 -// GFX11: v_cmp_gt_u16 vcc_lo, v1, v2 row_shl:7 row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x78,0x7c,0x01,0x07,0x05,0x00] +v_cmp_gt_u16 v1.l, v2.l row_shl:0x7 row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: v_cmp_gt_u16 vcc_lo, v1.l, v2.l row_shl:7 row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x78,0x7c,0x01,0x07,0x05,0x00] -v_cmp_gt_u16_dpp v1, v2 row_shl:0x7 row_mask:0x0 bank_mask:0x0 fi:1 -// GFX11: v_cmp_gt_u16 vcc_lo, v1, v2 row_shl:7 row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x78,0x7c,0x01,0x07,0x05,0x00] +v_cmp_gt_u16_dpp v1.l, v2.l row_shl:0x7 row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: v_cmp_gt_u16 vcc_lo, v1.l, v2.l row_shl:7 row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x78,0x7c,0x01,0x07,0x05,0x00] //===----------------------------------------------------------------------===// // VOPCX.DPP8. //===----------------------------------------------------------------------===// -v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05] +v_cmpx_class_f16 v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_class_f16 v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05] -v_cmpx_class_f16_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05] +v_cmpx_class_f16_dpp v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_class_f16 v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05] //===----------------------------------------------------------------------===// // VOPCX.DPP16. //===----------------------------------------------------------------------===// -v_cmpx_class_f16 v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_class_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff] +v_cmpx_class_f16 v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_cmpx_class_f16 v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff] -v_cmpx_class_f16_dpp v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_class_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff] +v_cmpx_class_f16_dpp v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_cmpx_class_f16 v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff] //===----------------------------------------------------------------------===// // VOP1 -> VOP3. diff --git a/llvm/test/MC/AMDGPU/gfx1150_asm_features-fake16.s b/llvm/test/MC/AMDGPU/gfx1150_asm_features-fake16.s new file mode 100644 index 0000000..b329737 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1150_asm_features-fake16.s @@ -0,0 +1,48 @@ +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1150 -mattr=-real-true16 %s | FileCheck --check-prefix=GFX1150 %s +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1151 -mattr=-real-true16 %s | FileCheck --check-prefix=GFX1150 %s +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1152 -mattr=-real-true16 %s | FileCheck --check-prefix=GFX1150 %s +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1153 -mattr=-real-true16 %s | FileCheck --check-prefix=GFX1150 %s + +// +// Subtargets allow src1 of VOP3 DPP instructions to be SGPR or inlinable +// constant. +// + +v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, 42, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xfa,0x54,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, s2, v0 dpp8:[7,6,5,4,3,2,1,0] +// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x00,0x04,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, 42, v0 dpp8:[7,6,5,4,3,2,1,0] +// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x54,0x01,0x04,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05] + +v_cmp_ne_i32_e64_dpp vcc_lo, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1150: encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_add_f32_e64_dpp v5, v1, s2 row_mirror +// GFX1150: encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] + +v_min3_f16 v5, v1, s2, 2.0 op_sel:[1,1,0,1] quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf +// GFX1150: encoding: [0x05,0x58,0x49,0xd6,0xfa,0x04,0xd0,0x03,0x01,0x55,0x00,0xff] + +v_cmp_le_f32 vcc_lo, v1, v2 row_mirror +// GFX1150: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff] + +v_cmp_le_f32 vcc_lo, v1, s2 row_mirror +// GFX1150: encoding: [0x6a,0x00,0x13,0xd4,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cmp_le_f32 vcc_lo, v1, s2 quad_perm:[1,1,1,1] +// GFX1150: encoding: [0x6a,0x00,0x13,0xd4,0xfa,0x04,0x00,0x00,0x01,0x55,0x00,0xff] + +v_cmpx_neq_f16 v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX1150: encoding: [0x7e,0x00,0x8d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_class_f16 v1, 2.0 quad_perm:[1,1,1,1] +// GFX1150: encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x55,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx1150_asm_features.s b/llvm/test/MC/AMDGPU/gfx1150_asm_features.s index 5c498a2..6e5bda3 100644 --- a/llvm/test/MC/AMDGPU/gfx1150_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx1150_asm_features.s @@ -1,8 +1,8 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5 -// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1150 %s | FileCheck --check-prefix=GFX1150 %s -// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1151 %s | FileCheck --check-prefix=GFX1150 %s -// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1152 %s | FileCheck --check-prefix=GFX1150 %s -// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1153 %s | FileCheck --check-prefix=GFX1150 %s +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1150 -mattr=+real-true16 %s | FileCheck --check-prefix=GFX1150 %s +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1151 -mattr=+real-true16 %s | FileCheck --check-prefix=GFX1150 %s +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1152 -mattr=+real-true16 %s | FileCheck --check-prefix=GFX1150 %s +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1153 -mattr=+real-true16 %s | FileCheck --check-prefix=GFX1150 %s // Subtargets allow src1 of VOP3 DPP instructions to be SGPR or inlinable // constant. @@ -28,8 +28,8 @@ v_cmp_ne_i32_e64_dpp vcc_lo, v1, s2 dpp8:[7,6,5,4,3,2,1,0] v_add_f32_e64_dpp v5, v1, s2 row_mirror // GFX1150: v_add_f32_e64_dpp v5, v1, s2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] -v_min3_f16 v5, v1, s2, 2.0 op_sel:[1,1,0,1] quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf -// GFX1150: v_min3_f16_e64_dpp v5, v1, s2, 2.0 op_sel:[1,1,0,1] quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x49,0xd6,0xfa,0x04,0xd0,0x03,0x01,0x55,0x00,0xff] +v_min3_f16 v5.h, v1.h, s2, 2.0 op_sel:[1,1,0,1] quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf +// GFX1150: v_min3_f16_e64_dpp v5.h, v1.h, s2, 2.0 op_sel:[1,1,0,1] quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x49,0xd6,0xfa,0x04,0xd0,0x03,0x01,0x55,0x00,0xff] v_cmp_le_f32 vcc_lo, v1, v2 row_mirror // GFX1150: v_cmp_le_f32 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff] @@ -40,8 +40,8 @@ v_cmp_le_f32 vcc_lo, v1, s2 row_mirror v_cmp_le_f32 vcc_lo, v1, s2 quad_perm:[1,1,1,1] // GFX1150: v_cmp_le_f32_e64_dpp vcc_lo, v1, s2 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x13,0xd4,0xfa,0x04,0x00,0x00,0x01,0x55,0x00,0xff] -v_cmpx_neq_f16 v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] -// GFX1150: v_cmpx_neq_f16_e64_dpp v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +v_cmpx_neq_f16 v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX1150: v_cmpx_neq_f16_e64_dpp v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] -v_cmpx_class_f16 v1, 2.0 quad_perm:[1,1,1,1] -// GFX1150: v_cmpx_class_f16_e64_dpp v1, 2.0 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x55,0x00,0xff] +v_cmpx_class_f16 v1.l, 2.0 quad_perm:[1,1,1,1] +// GFX1150: v_cmpx_class_f16_e64_dpp v1.l, 2.0 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x55,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vinterp_err-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vinterp_err-fake16.s new file mode 100644 index 0000000..7450d4b --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vinterp_err-fake16.s @@ -0,0 +1,43 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 %s 2>&1 | FileCheck %s -check-prefix=GCN-ERR --implicit-check-not=error: --strict-whitespace +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 %s 2>&1 | FileCheck %s -check-prefix=GCN-ERR --implicit-check-not=error: --strict-whitespace + +//===----------------------------------------------------------------------===// +// VINTERP src operands must be VGPRs. +// Check that other operand kinds are rejected by assembler. +//===----------------------------------------------------------------------===// + +v_interp_p10_f32 v0, s1, v2, v3 +// GCN-ERR: :[[@LINE-1]]:22: error: invalid operand for instruction + +v_interp_p10_f32 v0, v1, s2, v3 +// GCN-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction + +v_interp_p10_f32 v0, v1, v2, s3 +// GCN-ERR: :[[@LINE-1]]:30: error: invalid operand for instruction + +v_interp_p2_f32 v0, 1, v2, v3 +// GCN-ERR: :[[@LINE-1]]:21: error: invalid operand for instruction + +v_interp_p2_f32 v0, v1, 2, v3 +// GCN-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction + +v_interp_p2_f32 v0, v1, v2, 3 +// GCN-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction + +v_interp_p10_f16_f32 v0, s1, v2, v3 +// GCN-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction + +v_interp_p10_f16_f32 v0, v1, s2, v3 +// GCN-ERR: :[[@LINE-1]]:30: error: invalid operand for instruction + +v_interp_p10_f16_f32 v0, v1, v2, s3 +// GCN-ERR: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_interp_p2_f16_f32 v0, 1, v2, v3 +// GCN-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction + +v_interp_p2_f16_f32 v0, v1, 2, v3 +// GCN-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction + +v_interp_p2_f16_f32 v0, v1, v2, 3 +// GCN-ERR: :[[@LINE-1]]:33: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s index 1aefd1f..5864133 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s @@ -2630,6 +2630,36 @@ v_log_f32 v5, src_scc v_log_f32 v255, 0xaf123456 // GFX11: v_log_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x4e,0xfe,0x7f,0x56,0x34,0x12,0xaf] +v_mov_b16_e32 v0.l, v1.l +// GFX11: v_mov_b16_e32 v0.l, v1.l ; encoding: [0x01,0x39,0x00,0x7e] + +v_mov_b16_e32 v0.l, s1 +// GFX11: v_mov_b16_e32 v0.l, s1 ; encoding: [0x01,0x38,0x00,0x7e] + +v_mov_b16_e32 v0.h, 0 +// GFX11: v_mov_b16_e32 v0.h, 0 ; encoding: [0x80,0x38,0x00,0x7f] + +v_mov_b16_e32 v0.h, 1.0 +// GFX11: v_mov_b16_e32 v0.h, 1.0 ; encoding: [0xf2,0x38,0x00,0x7f] + +v_mov_b16_e32 v0.l, 0x1234 +// GFX11: v_mov_b16_e32 v0.l, 0x1234 ; encoding: [0xff,0x38,0x00,0x7e,0x34,0x12,0x00,0x00] + +v_mov_b16_e64 v0.l, v1.l +// GFX11: v_mov_b16_e64 v0.l, v1.l ; encoding: [0x00,0x00,0x9c,0xd5,0x01,0x01,0x00,0x00] + +v_mov_b16_e64 v200.l, v1.h +// GFX11: v_mov_b16_e64 v200.l, v1.h op_sel:[1,0] ; encoding: [0xc8,0x08,0x9c,0xd5,0x01,0x01,0x00,0x00] + +v_mov_b16_e64 v0.l, s1 +// GFX11: v_mov_b16_e64 v0.l, s1 ; encoding: [0x00,0x00,0x9c,0xd5,0x01,0x00,0x00,0x00] + +v_mov_b16_e64 v200.h, 1 +// GFX11: v_mov_b16_e64 v200.h, 1 op_sel:[0,1] ; encoding: [0xc8,0x40,0x9c,0xd5,0x81,0x00,0x00,0x00] + +v_mov_b16_e64 v0.l, 0x1234 +// GFX11: v_mov_b16_e64 v0.l, 0x1234 ; encoding: [0x00,0x00,0x9c,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00] + v_mov_b32 v5, v1 // GFX11: v_mov_b32_e32 v5, v1 ; encoding: [0x01,0x03,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_alias-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_alias-fake16.s new file mode 100644 index 0000000..34f519e --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_alias-fake16.s @@ -0,0 +1,15 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck -check-prefix=GFX11 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck -check-prefix=GFX11 %s + +v_cvt_pknorm_i16_f16 v5, v1, v2 +// GFX11: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pknorm_u16_f16 v5, v1, v2 +// GFX11: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] + +v_add3_nc_u32 v5, v1, v2, s3 +// GFX11: v_add3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] + +v_xor_add_u32 v5, v1, v2, s3 +// GFX11: v_xad_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_alias.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_alias.s index f6ac190..b34c94d 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_alias.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_alias.s @@ -1,12 +1,12 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5 -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck -check-prefix=GFX11 %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck -check-prefix=GFX11 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck -check-prefix=GFX11 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck -check-prefix=GFX11 %s -v_cvt_pknorm_i16_f16 v5, v1, v2 -// GFX11: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +v_cvt_pknorm_i16_f16 v5, v1.l, v2.l +// GFX11: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] -v_cvt_pknorm_u16_f16 v5, v1, v2 -// GFX11: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +v_cvt_pknorm_u16_f16 v5, v1.l, v2.l +// GFX11: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] v_add3_nc_u32 v5, v1, v2, s3 // GFX11: v_add3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_features.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_features.s new file mode 100644 index 0000000..e15a48a --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_features.s @@ -0,0 +1,77 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5 +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefixes=W32-ERR,GFX11-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefixes=W64-ERR,GFX11-ERR --implicit-check-not=error: %s + +//===----------------------------------------------------------------------===// +// HW correctly handles fp inline constants for src2 (they have f16 type). +// Check that inline constants are not converted to literals. +//===----------------------------------------------------------------------===// + +v_cmp_class_f16_e64 s[10:11], v1.l, 0.5 +// W64: v_cmp_class_f16_e64 s[10:11], v1.l, 0.5 ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0xe1,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_class_f16_e64 s10, v1.l, 0.5 +// W32: v_cmp_class_f16_e64 s10, v1.l, 0.5 ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0xe1,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmpx_class_f16_e64 v1.l, 0.5 +// GFX11: v_cmpx_class_f16_e64 v1.l, 0.5 ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0xe1,0x01,0x00] + +//===----------------------------------------------------------------------===// +// src0 and src2 are packed operands. +// Check that op_sel is not allowed with these operands. +//===----------------------------------------------------------------------===// + +v_dot2_f16_f16_e64 v0.l, v1.h, v2, v3.l +// GFX11-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2.h, v3.l dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0.l, v1.h, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11-ERR: :[[@LINE-1]]:30: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64 v0.l, v1, v2.h, v3.l +// GFX11-ERR: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0.l, v1.h, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2.h, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11-ERR: :[[@LINE-1]]:36: error: invalid operand for instruction + +//===----------------------------------------------------------------------===// +// src0 and src1 are vector operands. +// Check that SGPRs are not allowed for these operands. +//===----------------------------------------------------------------------===// + +v_dot2_f16_f16_e64_dpp v0.l, s1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:30: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0.l, v1, s2, v3.l dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0.l, s1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11-ERR: :[[@LINE-1]]:30: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0.l, v1, s2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11-ERR: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0.l, s1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, s2, v3.l dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0.l, s1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX11-ERR: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, s2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX11-ERR: :[[@LINE-1]]:36: error: invalid operand for instruction + +// Ensure bits 8-15 are not zeroed out and .h which should be present on src0 and dst are present. +v_mul_f16_e64 v5.h, v1.h, v2.l +// GFX11: v_mul_f16_e64 v5.h, v1.h, v2.l op_sel:[1,0,1] ; encoding: [0x05,0x48,0x35,0xd5,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s index e21e5bf..fe8858d 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s @@ -2696,6 +2696,37 @@ v_log_f32 v5, src_scc v_log_f32 v255, 0xaf123456 // GFX12: v_log_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x4e,0xfe,0x7f,0x56,0x34,0x12,0xaf] +v_mov_b16_e32 v0.l, v1.l +// GFX12: v_mov_b16_e32 v0.l, v1.l ; encoding: [0x01,0x39,0x00,0x7e] + +v_mov_b16_e32 v0.l, s1 +// GFX12: v_mov_b16_e32 v0.l, s1 ; encoding: [0x01,0x38,0x00,0x7e] + +v_mov_b16_e32 v0.h, 0 +// GFX12: v_mov_b16_e32 v0.h, 0 ; encoding: [0x80,0x38,0x00,0x7f] + +v_mov_b16_e32 v0.h, 1.0 +// GFX12-ASM: v_mov_b16_e32 v0.h, 1.0 ; encoding: [0xf2,0x38,0x00,0x7f] +// GFX12-DIS: v_mov_b16_e32 v0.h, 0x3c00 ; encoding: [0xff,0x38,0x00,0x7f,0x00,0x3c,0x00,0x00] + +v_mov_b16_e32 v0.l, 0x1234 +// GFX12: v_mov_b16_e32 v0.l, 0x1234 ; encoding: [0xff,0x38,0x00,0x7e,0x34,0x12,0x00,0x00] + +v_mov_b16_e64 v0.l, v1.l +// GFX12: v_mov_b16_e64 v0.l, v1.l ; encoding: [0x00,0x00,0x9c,0xd5,0x01,0x01,0x00,0x00] + +v_mov_b16_e64 v200.l, v1.h +// GFX12: v_mov_b16_e64 v200.l, v1.h op_sel:[1,0] ; encoding: [0xc8,0x08,0x9c,0xd5,0x01,0x01,0x00,0x00] + +v_mov_b16_e64 v0.l, s1 +// GFX12: v_mov_b16_e64 v0.l, s1 ; encoding: [0x00,0x00,0x9c,0xd5,0x01,0x00,0x00,0x00] + +v_mov_b16_e64 v200.h, 1 +// GFX12: v_mov_b16_e64 v200.h, 1 op_sel:[0,1] ; encoding: [0xc8,0x40,0x9c,0xd5,0x81,0x00,0x00,0x00] + +v_mov_b16_e64 v0.l, 0x1234 +// GFX12: v_mov_b16_e64 v0.l, 0x1234 ; encoding: [0x00,0x00,0x9c,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00] + v_mov_b32 v5, v1 // GFX12: v_mov_b32_e32 v5, v1 ; encoding: [0x01,0x03,0x0a,0x7e] |