diff options
author | Abhay Kandpal <abhay@linux.ibm.com> | 2025-09-02 23:36:42 +0000 |
---|---|---|
committer | Peter Bergner <bergner@tenstorrent.com> | 2025-09-02 23:36:42 +0000 |
commit | d419a1b472ffd9479ae9a561af1f1d8691152b15 (patch) | |
tree | 6b21d232538bd6e9c9e7533a0db0ae555a3161be | |
parent | 4520d8dd1cf8ef91b090f1af73b48874d6804445 (diff) | |
download | binutils-d419a1b472ffd9479ae9a561af1f1d8691152b15.zip binutils-d419a1b472ffd9479ae9a561af1f1d8691152b15.tar.gz binutils-d419a1b472ffd9479ae9a561af1f1d8691152b15.tar.bz2 |
PowerPC: Vector Instructions for Deeply Compressed Weight for AI (RFC02691)
opcodes/
* ppc-opc.c: (VXSEL5, VXSEL4, VXSEL3, VXSEL2, UIMM1): New defines.
(powerpc_opcodes): <vucmprhn, vucmprln, vucmprhb, vucmprlb,
vucmprhh, vucmprlh, vupkhsntob, vupklsntob, vupkint4tobf16,
vupkint8tobf16, vupkint4tofp32, vupkint8tofp32>: New instructions.
gas/
* gas/testsuite/gas/ppc/future.s: Add new testcases.
* gas/testsuite/gas/ppc/future.d: Likewise.
-rw-r--r-- | gas/testsuite/gas/ppc/future.d | 12 | ||||
-rw-r--r-- | gas/testsuite/gas/ppc/future.s | 12 | ||||
-rw-r--r-- | opcodes/ppc-opc.c | 22 |
3 files changed, 46 insertions, 0 deletions
diff --git a/gas/testsuite/gas/ppc/future.d b/gas/testsuite/gas/ppc/future.d index 5c008b5..645efd2 100644 --- a/gas/testsuite/gas/ppc/future.d +++ b/gas/testsuite/gas/ppc/future.d @@ -92,4 +92,16 @@ Disassembly of section \.text: .* (3d e0 ff ff|ff ff e0 3d) .*: (06 10 7f ff|ff 7f 10 06) paddis r15,0,2147483647,1 # 148 .* (3d e0 ff ff|ff ff e0 3d) +.*: (03 a8 b8 11|11 b8 a8 03) vucmprhn v13,v24,v21 +.*: (43 b0 b8 11|11 b8 b0 43) vucmprln v13,v24,v22 +.*: (83 b0 d8 11|11 d8 b0 83) vucmprhb v14,v24,v22 +.*: (c3 a8 d8 11|11 d8 a8 c3) vucmprlb v14,v24,v21 +.*: (03 b1 f8 11|11 f8 b1 03) vucmprhh v15,v24,v22 +.*: (43 a9 f8 11|11 f8 a9 43) vucmprlh v15,v24,v21 +.*: (83 b1 a0 12|12 a0 b1 83) vupkhsntob v21,v22 +.*: (83 a9 81 12|12 81 a9 83) vupklsntob v20,v21 +.*: (83 91 2b 12|12 2b 91 83) vupkint4tobf16 v17,v18,3 +.*: (83 c9 e3 11|11 e3 c9 83) vupkint8tobf16 v15,v25,1 +.*: (83 c1 17 12|12 17 c1 83) vupkint4tofp32 v16,v24,7 +.*: (83 b9 2f 12|12 2f b9 83) vupkint8tofp32 v17,v23,3 #pass diff --git a/gas/testsuite/gas/ppc/future.s b/gas/testsuite/gas/ppc/future.s index 55867de..0f00299 100644 --- a/gas/testsuite/gas/ppc/future.s +++ b/gas/testsuite/gas/ppc/future.s @@ -66,4 +66,16 @@ _start: psubis 30, 10, -2147483647, 0 paddis 15, 0, 2147483647, 1 psubis 15, 0, -2147483647, 1 + vucmprhn 13, 24, 21 + vucmprln 13, 24, 22 + vucmprhb 14, 24, 22 + vucmprlb 14, 24, 21 + vucmprhh 15, 24, 22 + vucmprlh 15, 24, 21 + vupkhsntob 21, 22 + vupklsntob 20, 21 + vupkint4tobf16 17, 18, 3 + vupkint8tobf16 15, 25, 1 + vupkint4tofp32 16, 24, 7 + vupkint8tofp32 17, 23, 3 diff --git a/opcodes/ppc-opc.c b/opcodes/ppc-opc.c index 712cd31..028295a 100644 --- a/opcodes/ppc-opc.c +++ b/opcodes/ppc-opc.c @@ -3813,6 +3813,7 @@ const struct powerpc_operand powerpc_operands[] = #define R RMC + 1 #define MP R +#define UIMM1 R #define P1 R { 0x1, 16, NULL, NULL, 0 }, @@ -4406,6 +4407,12 @@ const unsigned int num_powerpc_operands = ARRAY_SIZE (powerpc_operands); /* A VX form instruction. */ #define VX(op, xop) (OP (op) | (((uint64_t)(xop)) & 0x7ff)) +/* A VX form instruction with selector bit */ +#define VXSEL5(op, xop, sel) (VX(op, xop) | (((sel) & 0x1f) << 16)) +#define VXSEL4(op, xop, sel) (VX(op, xop) | (((sel) & 0xf) << 17)) +#define VXSEL3(op, xop, sel) (VX(op, xop) | (((sel) & 0x7) << 18)) +#define VXSEL2(op, xop, sel) (VX(op, xop) | (((sel) & 0x3) << 19)) + /* The mask for an VX form instruction. */ #define VX_MASK VX(0x3f, 0x7ff) @@ -4497,6 +4504,9 @@ const unsigned int num_powerpc_operands = ARRAY_SIZE (powerpc_operands); /* A VX_MASK with a UIMM2 field. */ #define VXUIMM2_MASK (VX_MASK | (0x7 << 18)) +/* A VX_MASK with a UIMM1 field. */ +#define VXUIMM1_MASK (VX_MASK | (0xf << 17)) + /* A VX_MASK with a PS field. */ #define VXPS_MASK (VX_MASK & ~(0x1 << 9)) @@ -5212,6 +5222,7 @@ const struct powerpc_opcode powerpc_opcodes[] = { {"vaddubm", VX (4, 0), VX_MASK, PPCVEC, 0, {VD, VA, VB}}, {"vmul10cuq", VX (4, 1), VXVB_MASK, PPCVEC3, 0, {VD, VA}}, {"vmaxub", VX (4, 2), VX_MASK, PPCVEC, 0, {VD, VA, VB}}, +{"vucmprhn", VX (4, 3), VX_MASK, FUTURE, 0, {VD, VA, VB}}, {"vrlb", VX (4, 4), VX_MASK, PPCVEC, 0, {VD, VA, VB}}, {"vrlq", VX (4, 5), VX_MASK, POWER10, 0, {VD, VA, VB}}, {"vcmpequb", VXR(4, 6,0), VXR_MASK, PPCVEC, 0, {VD, VA, VB}}, @@ -5305,6 +5316,7 @@ const struct powerpc_opcode powerpc_opcodes[] = { {"vadduhm", VX (4, 64), VX_MASK, PPCVEC, 0, {VD, VA, VB}}, {"vmul10ecuq", VX (4, 65), VX_MASK, PPCVEC3, 0, {VD, VA, VB}}, {"vmaxuh", VX (4, 66), VX_MASK, PPCVEC, 0, {VD, VA, VB}}, +{"vucmprln", VX (4, 67), VX_MASK, FUTURE, 0, {VD, VA, VB}}, {"vrlh", VX (4, 68), VX_MASK, PPCVEC, 0, {VD, VA, VB}}, {"vrlqmi", VX (4, 69), VX_MASK, POWER10, 0, {VD, VA, VB}}, {"vcmpequh", VXR(4, 70,0), VXR_MASK, PPCVEC, 0, {VD, VA, VB}}, @@ -5327,6 +5339,7 @@ const struct powerpc_opcode powerpc_opcodes[] = { {"ps_cmpu1", X (4, 64), XBF_MASK, PPCPS, 0, {BF, FRA, FRB}}, {"vadduwm", VX (4, 128), VX_MASK, PPCVEC, 0, {VD, VA, VB}}, {"vmaxuw", VX (4, 130), VX_MASK, PPCVEC, 0, {VD, VA, VB}}, +{"vucmprhb", VX (4, 131), VX_MASK, FUTURE, 0, {VD, VA, VB}}, {"vrlw", VX (4, 132), VX_MASK, PPCVEC, 0, {VD, VA, VB}}, {"vrlwmi", VX (4, 133), VX_MASK, PPCVEC3, 0, {VD, VA, VB}}, {"vcmpequw", VXR(4, 134,0), VXR_MASK, PPCVEC, 0, {VD, VA, VB}}, @@ -5344,6 +5357,7 @@ const struct powerpc_opcode powerpc_opcodes[] = { {"ps_cmpo1", X (4, 96), XBF_MASK, PPCPS, 0, {BF, FRA, FRB}}, {"vaddudm", VX (4, 192), VX_MASK, PPCVEC2, 0, {VD, VA, VB}}, {"vmaxud", VX (4, 194), VX_MASK, PPCVEC2, 0, {VD, VA, VB}}, +{"vucmprlb", VX (4, 195), VX_MASK, FUTURE, 0, {VD, VA, VB}}, {"vrld", VX (4, 196), VX_MASK, PPCVEC2, 0, {VD, VA, VB}}, {"vrldmi", VX (4, 197), VX_MASK, PPCVEC3, 0, {VD, VA, VB}}, {"vcmpeqfp", VXR(4, 198,0), VXR_MASK, PPCVEC, 0, {VD, VA, VB}}, @@ -5359,6 +5373,7 @@ const struct powerpc_opcode powerpc_opcodes[] = { {"vadduqm", VX (4, 256), VX_MASK, PPCVEC2, 0, {VD, VA, VB}}, {"vcmpuq", VX (4, 257), VXBF_MASK, POWER10, 0, {BF, VA, VB}}, {"vmaxsb", VX (4, 258), VX_MASK, PPCVEC, 0, {VD, VA, VB}}, +{"vucmprhh", VX (4, 259), VX_MASK, FUTURE, 0, {VD, VA, VB}}, {"vslb", VX (4, 260), VX_MASK, PPCVEC, 0, {VD, VA, VB}}, {"vslq", VX (4, 261), VX_MASK, POWER10, 0, {VD, VA, VB}}, {"vcmpnezb", VXR(4, 263,0), VXR_MASK, PPCVEC3, 0, {VD, VA, VB}}, @@ -5377,6 +5392,7 @@ const struct powerpc_opcode powerpc_opcodes[] = { {"vaddcuq", VX (4, 320), VX_MASK, PPCVEC2, 0, {VD, VA, VB}}, {"vcmpsq", VX (4, 321), VXBF_MASK, POWER10, 0, {BF, VA, VB}}, {"vmaxsh", VX (4, 322), VX_MASK, PPCVEC, 0, {VD, VA, VB}}, +{"vucmprlh", VX (4, 323), VX_MASK, FUTURE, 0, {VD, VA, VB}}, {"vslh", VX (4, 324), VX_MASK, PPCVEC, 0, {VD, VA, VB}}, {"vrlqnm", VX (4, 325), VX_MASK, POWER10, 0, {VD, VA, VB}}, {"vcmpnezh", VXR(4, 327,0), VXR_MASK, PPCVEC3, 0, {VD, VA, VB}}, @@ -5393,6 +5409,12 @@ const struct powerpc_opcode powerpc_opcodes[] = { {"nmacchw.", XO (4, 174,0,1), XO_MASK, MULHW, 0, {RT, RA, RB}}, {"vaddcuw", VX (4, 384), VX_MASK, PPCVEC, 0, {VD, VA, VB}}, {"vmaxsw", VX (4, 386), VX_MASK, PPCVEC, 0, {VD, VA, VB}}, +{"vupkhsntob", VXSEL5 (4, 387,0), VXVA_MASK, FUTURE, 0, {VD, VB}}, +{"vupklsntob", VXSEL5 (4, 387,1), VXVA_MASK, FUTURE, 0, {VD, VB}}, +{"vupkint8tobf16", VXSEL4 (4, 387,1), VXUIMM1_MASK, FUTURE, 0, {VD, VB, UIMM1}}, +{"vupkint4tobf16", VXSEL3 (4, 387,2), VXUIMM2_MASK, FUTURE, 0, {VD, VB, UIMM2}}, +{"vupkint8tofp32", VXSEL3 (4, 387,3), VXUIMM2_MASK, FUTURE, 0, {VD, VB, UIMM2}}, +{"vupkint4tofp32", VXSEL2 (4, 387,2), VXUIMM3_MASK, FUTURE, 0, {VD, VB, UIMM3}}, {"vslw", VX (4, 388), VX_MASK, PPCVEC, 0, {VD, VA, VB}}, {"vrlwnm", VX (4, 389), VX_MASK, PPCVEC3, 0, {VD, VA, VB}}, {"vcmpnezw", VXR(4, 391,0), VXR_MASK, PPCVEC3, 0, {VD, VA, VB}}, |