aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAbhay Kandpal <abhay@linux.ibm.com>2025-09-02 23:36:42 +0000
committerPeter Bergner <bergner@tenstorrent.com>2025-09-02 23:36:42 +0000
commitd419a1b472ffd9479ae9a561af1f1d8691152b15 (patch)
tree6b21d232538bd6e9c9e7533a0db0ae555a3161be
parent4520d8dd1cf8ef91b090f1af73b48874d6804445 (diff)
downloadbinutils-d419a1b472ffd9479ae9a561af1f1d8691152b15.zip
binutils-d419a1b472ffd9479ae9a561af1f1d8691152b15.tar.gz
binutils-d419a1b472ffd9479ae9a561af1f1d8691152b15.tar.bz2
PowerPC: Vector Instructions for Deeply Compressed Weight for AI (RFC02691)
opcodes/ * ppc-opc.c: (VXSEL5, VXSEL4, VXSEL3, VXSEL2, UIMM1): New defines. (powerpc_opcodes): <vucmprhn, vucmprln, vucmprhb, vucmprlb, vucmprhh, vucmprlh, vupkhsntob, vupklsntob, vupkint4tobf16, vupkint8tobf16, vupkint4tofp32, vupkint8tofp32>: New instructions. gas/ * gas/testsuite/gas/ppc/future.s: Add new testcases. * gas/testsuite/gas/ppc/future.d: Likewise.
-rw-r--r--gas/testsuite/gas/ppc/future.d12
-rw-r--r--gas/testsuite/gas/ppc/future.s12
-rw-r--r--opcodes/ppc-opc.c22
3 files changed, 46 insertions, 0 deletions
diff --git a/gas/testsuite/gas/ppc/future.d b/gas/testsuite/gas/ppc/future.d
index 5c008b5..645efd2 100644
--- a/gas/testsuite/gas/ppc/future.d
+++ b/gas/testsuite/gas/ppc/future.d
@@ -92,4 +92,16 @@ Disassembly of section \.text:
.* (3d e0 ff ff|ff ff e0 3d)
.*: (06 10 7f ff|ff 7f 10 06) paddis r15,0,2147483647,1 # 148
.* (3d e0 ff ff|ff ff e0 3d)
+.*: (03 a8 b8 11|11 b8 a8 03) vucmprhn v13,v24,v21
+.*: (43 b0 b8 11|11 b8 b0 43) vucmprln v13,v24,v22
+.*: (83 b0 d8 11|11 d8 b0 83) vucmprhb v14,v24,v22
+.*: (c3 a8 d8 11|11 d8 a8 c3) vucmprlb v14,v24,v21
+.*: (03 b1 f8 11|11 f8 b1 03) vucmprhh v15,v24,v22
+.*: (43 a9 f8 11|11 f8 a9 43) vucmprlh v15,v24,v21
+.*: (83 b1 a0 12|12 a0 b1 83) vupkhsntob v21,v22
+.*: (83 a9 81 12|12 81 a9 83) vupklsntob v20,v21
+.*: (83 91 2b 12|12 2b 91 83) vupkint4tobf16 v17,v18,3
+.*: (83 c9 e3 11|11 e3 c9 83) vupkint8tobf16 v15,v25,1
+.*: (83 c1 17 12|12 17 c1 83) vupkint4tofp32 v16,v24,7
+.*: (83 b9 2f 12|12 2f b9 83) vupkint8tofp32 v17,v23,3
#pass
diff --git a/gas/testsuite/gas/ppc/future.s b/gas/testsuite/gas/ppc/future.s
index 55867de..0f00299 100644
--- a/gas/testsuite/gas/ppc/future.s
+++ b/gas/testsuite/gas/ppc/future.s
@@ -66,4 +66,16 @@ _start:
psubis 30, 10, -2147483647, 0
paddis 15, 0, 2147483647, 1
psubis 15, 0, -2147483647, 1
+ vucmprhn 13, 24, 21
+ vucmprln 13, 24, 22
+ vucmprhb 14, 24, 22
+ vucmprlb 14, 24, 21
+ vucmprhh 15, 24, 22
+ vucmprlh 15, 24, 21
+ vupkhsntob 21, 22
+ vupklsntob 20, 21
+ vupkint4tobf16 17, 18, 3
+ vupkint8tobf16 15, 25, 1
+ vupkint4tofp32 16, 24, 7
+ vupkint8tofp32 17, 23, 3
diff --git a/opcodes/ppc-opc.c b/opcodes/ppc-opc.c
index 712cd31..028295a 100644
--- a/opcodes/ppc-opc.c
+++ b/opcodes/ppc-opc.c
@@ -3813,6 +3813,7 @@ const struct powerpc_operand powerpc_operands[] =
#define R RMC + 1
#define MP R
+#define UIMM1 R
#define P1 R
{ 0x1, 16, NULL, NULL, 0 },
@@ -4406,6 +4407,12 @@ const unsigned int num_powerpc_operands = ARRAY_SIZE (powerpc_operands);
/* A VX form instruction. */
#define VX(op, xop) (OP (op) | (((uint64_t)(xop)) & 0x7ff))
+/* A VX form instruction with selector bit */
+#define VXSEL5(op, xop, sel) (VX(op, xop) | (((sel) & 0x1f) << 16))
+#define VXSEL4(op, xop, sel) (VX(op, xop) | (((sel) & 0xf) << 17))
+#define VXSEL3(op, xop, sel) (VX(op, xop) | (((sel) & 0x7) << 18))
+#define VXSEL2(op, xop, sel) (VX(op, xop) | (((sel) & 0x3) << 19))
+
/* The mask for an VX form instruction. */
#define VX_MASK VX(0x3f, 0x7ff)
@@ -4497,6 +4504,9 @@ const unsigned int num_powerpc_operands = ARRAY_SIZE (powerpc_operands);
/* A VX_MASK with a UIMM2 field. */
#define VXUIMM2_MASK (VX_MASK | (0x7 << 18))
+/* A VX_MASK with a UIMM1 field. */
+#define VXUIMM1_MASK (VX_MASK | (0xf << 17))
+
/* A VX_MASK with a PS field. */
#define VXPS_MASK (VX_MASK & ~(0x1 << 9))
@@ -5212,6 +5222,7 @@ const struct powerpc_opcode powerpc_opcodes[] = {
{"vaddubm", VX (4, 0), VX_MASK, PPCVEC, 0, {VD, VA, VB}},
{"vmul10cuq", VX (4, 1), VXVB_MASK, PPCVEC3, 0, {VD, VA}},
{"vmaxub", VX (4, 2), VX_MASK, PPCVEC, 0, {VD, VA, VB}},
+{"vucmprhn", VX (4, 3), VX_MASK, FUTURE, 0, {VD, VA, VB}},
{"vrlb", VX (4, 4), VX_MASK, PPCVEC, 0, {VD, VA, VB}},
{"vrlq", VX (4, 5), VX_MASK, POWER10, 0, {VD, VA, VB}},
{"vcmpequb", VXR(4, 6,0), VXR_MASK, PPCVEC, 0, {VD, VA, VB}},
@@ -5305,6 +5316,7 @@ const struct powerpc_opcode powerpc_opcodes[] = {
{"vadduhm", VX (4, 64), VX_MASK, PPCVEC, 0, {VD, VA, VB}},
{"vmul10ecuq", VX (4, 65), VX_MASK, PPCVEC3, 0, {VD, VA, VB}},
{"vmaxuh", VX (4, 66), VX_MASK, PPCVEC, 0, {VD, VA, VB}},
+{"vucmprln", VX (4, 67), VX_MASK, FUTURE, 0, {VD, VA, VB}},
{"vrlh", VX (4, 68), VX_MASK, PPCVEC, 0, {VD, VA, VB}},
{"vrlqmi", VX (4, 69), VX_MASK, POWER10, 0, {VD, VA, VB}},
{"vcmpequh", VXR(4, 70,0), VXR_MASK, PPCVEC, 0, {VD, VA, VB}},
@@ -5327,6 +5339,7 @@ const struct powerpc_opcode powerpc_opcodes[] = {
{"ps_cmpu1", X (4, 64), XBF_MASK, PPCPS, 0, {BF, FRA, FRB}},
{"vadduwm", VX (4, 128), VX_MASK, PPCVEC, 0, {VD, VA, VB}},
{"vmaxuw", VX (4, 130), VX_MASK, PPCVEC, 0, {VD, VA, VB}},
+{"vucmprhb", VX (4, 131), VX_MASK, FUTURE, 0, {VD, VA, VB}},
{"vrlw", VX (4, 132), VX_MASK, PPCVEC, 0, {VD, VA, VB}},
{"vrlwmi", VX (4, 133), VX_MASK, PPCVEC3, 0, {VD, VA, VB}},
{"vcmpequw", VXR(4, 134,0), VXR_MASK, PPCVEC, 0, {VD, VA, VB}},
@@ -5344,6 +5357,7 @@ const struct powerpc_opcode powerpc_opcodes[] = {
{"ps_cmpo1", X (4, 96), XBF_MASK, PPCPS, 0, {BF, FRA, FRB}},
{"vaddudm", VX (4, 192), VX_MASK, PPCVEC2, 0, {VD, VA, VB}},
{"vmaxud", VX (4, 194), VX_MASK, PPCVEC2, 0, {VD, VA, VB}},
+{"vucmprlb", VX (4, 195), VX_MASK, FUTURE, 0, {VD, VA, VB}},
{"vrld", VX (4, 196), VX_MASK, PPCVEC2, 0, {VD, VA, VB}},
{"vrldmi", VX (4, 197), VX_MASK, PPCVEC3, 0, {VD, VA, VB}},
{"vcmpeqfp", VXR(4, 198,0), VXR_MASK, PPCVEC, 0, {VD, VA, VB}},
@@ -5359,6 +5373,7 @@ const struct powerpc_opcode powerpc_opcodes[] = {
{"vadduqm", VX (4, 256), VX_MASK, PPCVEC2, 0, {VD, VA, VB}},
{"vcmpuq", VX (4, 257), VXBF_MASK, POWER10, 0, {BF, VA, VB}},
{"vmaxsb", VX (4, 258), VX_MASK, PPCVEC, 0, {VD, VA, VB}},
+{"vucmprhh", VX (4, 259), VX_MASK, FUTURE, 0, {VD, VA, VB}},
{"vslb", VX (4, 260), VX_MASK, PPCVEC, 0, {VD, VA, VB}},
{"vslq", VX (4, 261), VX_MASK, POWER10, 0, {VD, VA, VB}},
{"vcmpnezb", VXR(4, 263,0), VXR_MASK, PPCVEC3, 0, {VD, VA, VB}},
@@ -5377,6 +5392,7 @@ const struct powerpc_opcode powerpc_opcodes[] = {
{"vaddcuq", VX (4, 320), VX_MASK, PPCVEC2, 0, {VD, VA, VB}},
{"vcmpsq", VX (4, 321), VXBF_MASK, POWER10, 0, {BF, VA, VB}},
{"vmaxsh", VX (4, 322), VX_MASK, PPCVEC, 0, {VD, VA, VB}},
+{"vucmprlh", VX (4, 323), VX_MASK, FUTURE, 0, {VD, VA, VB}},
{"vslh", VX (4, 324), VX_MASK, PPCVEC, 0, {VD, VA, VB}},
{"vrlqnm", VX (4, 325), VX_MASK, POWER10, 0, {VD, VA, VB}},
{"vcmpnezh", VXR(4, 327,0), VXR_MASK, PPCVEC3, 0, {VD, VA, VB}},
@@ -5393,6 +5409,12 @@ const struct powerpc_opcode powerpc_opcodes[] = {
{"nmacchw.", XO (4, 174,0,1), XO_MASK, MULHW, 0, {RT, RA, RB}},
{"vaddcuw", VX (4, 384), VX_MASK, PPCVEC, 0, {VD, VA, VB}},
{"vmaxsw", VX (4, 386), VX_MASK, PPCVEC, 0, {VD, VA, VB}},
+{"vupkhsntob", VXSEL5 (4, 387,0), VXVA_MASK, FUTURE, 0, {VD, VB}},
+{"vupklsntob", VXSEL5 (4, 387,1), VXVA_MASK, FUTURE, 0, {VD, VB}},
+{"vupkint8tobf16", VXSEL4 (4, 387,1), VXUIMM1_MASK, FUTURE, 0, {VD, VB, UIMM1}},
+{"vupkint4tobf16", VXSEL3 (4, 387,2), VXUIMM2_MASK, FUTURE, 0, {VD, VB, UIMM2}},
+{"vupkint8tofp32", VXSEL3 (4, 387,3), VXUIMM2_MASK, FUTURE, 0, {VD, VB, UIMM2}},
+{"vupkint4tofp32", VXSEL2 (4, 387,2), VXUIMM3_MASK, FUTURE, 0, {VD, VB, UIMM3}},
{"vslw", VX (4, 388), VX_MASK, PPCVEC, 0, {VD, VA, VB}},
{"vrlwnm", VX (4, 389), VX_MASK, PPCVEC3, 0, {VD, VA, VB}},
{"vcmpnezw", VXR(4, 391,0), VXR_MASK, PPCVEC3, 0, {VD, VA, VB}},