From 51d543ed936c9ea7d045ecf80030e6bc8ffff29f Mon Sep 17 00:00:00 2001 From: Matthew Wahab Date: Mon, 14 Dec 2015 16:44:02 +0000 Subject: [AArch64][PATCH 2/14] Support ARMv8.2 FP16 Vector Three Same instructions. ARMv8.2 adds 16-bit floating point operations as an optional extension to floating point and Adv.SIMD support. This patch adds FP16 instructions to the group Vector Three Register Same, making them available when +simd+fp16 is enabled. The instructions added are: FMAXNM, FMAXNMP, FNMINNM, FMINNMP, FMLA, FMLS, FADD, FADDP, FSUB, FABD, FMULX, FMUL, FCMEQ, FCMGE, FCMGT, FACGE, FACGT, FMAX, FMAXP, FMIN, FMINP, FRECPS, FDIV and FRSQRTS. The general form for these instructions is ., ., . where T is 4h or 8h. gas/testsuite/ 2015-12-14 Matthew Wahab * gas/aarch64/advsimd-fp16.d: New. * gas/aarch64/advsimd-fp16.s: New. opcodes/ 2015-12-14 Matthew Wahab * aarch64-asm-2.c: Regenerate. * aarch64-dis-2.c: Regenerate. * aarch64-opc-2.c: Regenerate. * aarch64-tbl.h (QL_V3SAMEH): New. (aarch64_opcode_table): Add fp16 versions of fmaxnm, fmla, fadd, fmulx, fcmeq, fmax, frecps, fminnm, fmls, fsub, fmin, frsqrts, fmaxnmp, faddp, fmul, fcmge, facge, fmaxp, fdiv, fminnmp, fabd, fcmgt, facgt and fminp to the vector three same group. Change-Id: I3f1c5fe82ca73f7a17fe5329cf2b0de03c94328c --- opcodes/aarch64-tbl.h | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'opcodes/aarch64-tbl.h') diff --git a/opcodes/aarch64-tbl.h b/opcodes/aarch64-tbl.h index a726b33..27f0f78 100644 --- a/opcodes/aarch64-tbl.h +++ b/opcodes/aarch64-tbl.h @@ -921,6 +921,13 @@ QLF3(V_2D , V_2D , V_2D ) \ } +/* e.g. FMAXNM ., ., .. */ +#define QL_V3SAMEH \ +{ \ + QLF3 (V_4H , V_4H , V_4H ), \ + QLF3 (V_8H , V_8H , V_8H ), \ +} + /* e.g. SQDMLAL ., ., .. */ #define QL_V3LONGHS \ { \ @@ -1584,19 +1591,43 @@ struct aarch64_opcode aarch64_opcode_table[] = {"sqdmulh", 0xe20b400, 0xbf20fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMEHS, F_SIZEQ}, {"addp", 0xe20bc00, 0xbf20fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAME, F_SIZEQ}, {"fmaxnm", 0xe20c400, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"fmaxnm", 0xe400400, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"fmla", 0xe20cc00, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"fmla", 0xe400c00, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"fadd", 0xe20d400, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"fadd", 0xe401400, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"fmulx", 0xe20dc00, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"fmulx", 0xe401c00, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"fcmeq", 0xe20e400, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"fcmeq", 0xe402400, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"fmax", 0xe20f400, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"fmax", 0xe403400, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"frecps", 0xe20fc00, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"frecps", 0xe403c00, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"and", 0xe201c00, 0xbfe0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMEB, F_SIZEQ}, {"bic", 0xe601c00, 0xbfe0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMEB, F_SIZEQ}, {"fminnm", 0xea0c400, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"fminnm", 0xec00400, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"fmls", 0xea0cc00, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"fmls", 0xec00c00, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"fsub", 0xea0d400, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"fsub", 0xec01400, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"fmin", 0xea0f400, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"fmin", 0xec03400, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"frsqrts", 0xea0fc00, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"frsqrts", 0xec03c00, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"orr", 0xea01c00, 0xbfe0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMEB, F_HAS_ALIAS | F_SIZEQ}, {"mov", 0xea01c00, 0xbfe0fc00, asimdsame, OP_MOV_V, SIMD, OP2 (Vd, Vn), QL_V2SAMEB, F_ALIAS | F_CONV}, {"orn", 0xee01c00, 0xbfe0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMEB, F_SIZEQ}, @@ -1623,19 +1654,43 @@ struct aarch64_opcode aarch64_opcode_table[] = {"uminp", 0x2e20ac00, 0xbf20fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMEBHS, F_SIZEQ}, {"sqrdmulh", 0x2e20b400, 0xbf20fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMEHS, F_SIZEQ}, {"fmaxnmp", 0x2e20c400, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"fmaxnmp", 0x2e400400, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"faddp", 0x2e20d400, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"faddp", 0x2e401400, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"fmul", 0x2e20dc00, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"fmul", 0x2e401c00, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"fcmge", 0x2e20e400, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"fcmge", 0x2e402400, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"facge", 0x2e20ec00, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"facge", 0x2e402c00, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"fmaxp", 0x2e20f400, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"fmaxp", 0x2e403400, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"fdiv", 0x2e20fc00, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"fdiv", 0x2e403c00, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"eor", 0x2e201c00, 0xbfe0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMEB, F_SIZEQ}, {"bsl", 0x2e601c00, 0xbfe0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMEB, F_SIZEQ}, {"fminnmp", 0x2ea0c400, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"fminnmp", 0x2ec00400, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"fabd", 0x2ea0d400, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"fabd", 0x2ec01400, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"fcmgt", 0x2ea0e400, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"fcmgt", 0x2ec02400, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"facgt", 0x2ea0ec00, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"facgt", 0x2ec02c00, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"fminp", 0x2ea0f400, 0xbfa0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ}, + {"fminp", 0x2ec03400, 0xbfe0fc00, asimdsame, 0, SIMD_F16, + OP3 (Vd, Vn, Vm), QL_V3SAMEH, F_SIZEQ}, {"bit", 0x2ea01c00, 0xbfe0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMEB, F_SIZEQ}, {"bif", 0x2ee01c00, 0xbfe0fc00, asimdsame, 0, SIMD, OP3 (Vd, Vn, Vm), QL_V3SAMEB, F_SIZEQ}, /* AdvSIMD three same extension. */ -- cgit v1.1