diff options
author | Szabolcs Nagy <szabolcs.nagy@arm.com> | 2016-12-05 14:24:17 +0000 |
---|---|---|
committer | Szabolcs Nagy <szabolcs.nagy@arm.com> | 2016-12-05 14:24:17 +0000 |
commit | c28eeff2eabbba2246799470f3713716fa629680 (patch) | |
tree | c4a81d2c79dbfcda97f355d503895918ff3bbed7 /gas/config | |
parent | 0691188992efa4afab80bfdf966479bc331ce0a4 (diff) | |
download | gdb-c28eeff2eabbba2246799470f3713716fa629680.zip gdb-c28eeff2eabbba2246799470f3713716fa629680.tar.gz gdb-c28eeff2eabbba2246799470f3713716fa629680.tar.bz2 |
[ARM] Add ARMv8.3 VCMLA and VCADD instructions
Add support for VCMLA and VCADD advanced SIMD complex number instructions.
The command line option is -march=armv8.3-a+fp16+simd for enabling all
instructions.
In arm-dis.c the formatting syntax was abused a bit to select between
0 vs 90 or 180 vs 270 or 90 vs 270 based on a bit value instead of
duplicating entries in the opcode table.
gas/
* config/tc-arm.c (do_vcmla, do_vcadd): Define.
(neon_scalar_for_vcmla): Define.
(enum operand_parse_code): Add OP_IROT1 and OP_IROT2.
(NEON_ENC_TAB): Add DDSI and QQSI variants.
(insns): Add vcmla and vcadd.
* testsuite/gas/arm/armv8_3-a-simd.d: New.
* testsuite/gas/arm/armv8_3-a-simd.s: New.
* testsuite/gas/arm/armv8_3-a-simd-bad.d: New.
* testsuite/gas/arm/armv8_3-a-simd-bad.l: New.
* testsuite/gas/arm/armv8_3-a-simd-bad.s: New.
opcodes/
* arm-dis.c (coprocessor_opcodes): Add vcmla and vcadd.
(print_insn_coprocessor): Add 'V' format for neon D or Q regs.
Diffstat (limited to 'gas/config')
-rw-r--r-- | gas/config/tc-arm.c | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c index 3515fc6..41ab13e 100644 --- a/gas/config/tc-arm.c +++ b/gas/config/tc-arm.c @@ -6535,6 +6535,8 @@ enum operand_parse_code OP_EXPi, /* same, with optional immediate prefix */ OP_EXPr, /* same, with optional relocation suffix */ OP_HALF, /* 0 .. 65535 or low/high reloc. */ + OP_IROT1, /* VCADD rotate immediate: 90, 270. */ + OP_IROT2, /* VCMLA rotate immediate: 0, 90, 180, 270. */ OP_CPSF, /* CPS flags */ OP_ENDI, /* Endianness specifier */ @@ -13348,6 +13350,8 @@ NEON_ENC_TAB X(3, (D, Q, S), MIXED), \ X(4, (D, D, D, I), DOUBLE), \ X(4, (Q, Q, Q, I), QUAD), \ + X(4, (D, D, S, I), DOUBLE), \ + X(4, (Q, Q, S, I), QUAD), \ X(2, (F, F), SINGLE), \ X(3, (F, F, F), SINGLE), \ X(2, (F, I), SINGLE), \ @@ -17261,6 +17265,80 @@ do_vrintm (void) do_vrint_1 (neon_cvt_mode_m); } +static unsigned +neon_scalar_for_vcmla (unsigned opnd, unsigned elsize) +{ + unsigned regno = NEON_SCALAR_REG (opnd); + unsigned elno = NEON_SCALAR_INDEX (opnd); + + if (elsize == 16 && elno < 2 && regno < 16) + return regno | (elno << 4); + else if (elsize == 32 && elno == 0) + return regno; + + first_error (_("scalar out of range")); + return 0; +} + +static void +do_vcmla (void) +{ + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8), + _(BAD_FPU)); + constraint (inst.reloc.exp.X_op != O_constant, _("expression too complex")); + unsigned rot = inst.reloc.exp.X_add_number; + constraint (rot != 0 && rot != 90 && rot != 180 && rot != 270, + _("immediate out of range")); + rot /= 90; + if (inst.operands[2].isscalar) + { + enum neon_shape rs = neon_select_shape (NS_DDSI, NS_QQSI, NS_NULL); + unsigned size = neon_check_type (3, rs, N_EQK, N_EQK, + N_KEY | N_F16 | N_F32).size; + unsigned m = neon_scalar_for_vcmla (inst.operands[2].reg, size); + inst.is_neon = 1; + inst.instruction = 0xfe000800; + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg) << 16; + inst.instruction |= HI1 (inst.operands[1].reg) << 7; + inst.instruction |= LOW4 (m); + inst.instruction |= HI1 (m) << 5; + inst.instruction |= neon_quad (rs) << 6; + inst.instruction |= rot << 20; + inst.instruction |= (size == 32) << 23; + } + else + { + enum neon_shape rs = neon_select_shape (NS_DDDI, NS_QQQI, NS_NULL); + unsigned size = neon_check_type (3, rs, N_EQK, N_EQK, + N_KEY | N_F16 | N_F32).size; + neon_three_same (neon_quad (rs), 0, -1); + inst.instruction &= 0x00ffffff; /* Undo neon_dp_fixup. */ + inst.instruction |= 0xfc200800; + inst.instruction |= rot << 23; + inst.instruction |= (size == 32) << 20; + } +} + +static void +do_vcadd (void) +{ + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8), + _(BAD_FPU)); + constraint (inst.reloc.exp.X_op != O_constant, _("expression too complex")); + unsigned rot = inst.reloc.exp.X_add_number; + constraint (rot != 90 && rot != 270, _("immediate out of range")); + enum neon_shape rs = neon_select_shape (NS_DDDI, NS_QQQI, NS_NULL); + unsigned size = neon_check_type (3, rs, N_EQK, N_EQK, + N_KEY | N_F16 | N_F32).size; + neon_three_same (neon_quad (rs), 0, -1); + inst.instruction &= 0x00ffffff; /* Undo neon_dp_fixup. */ + inst.instruction |= 0xfc800800; + inst.instruction |= (rot == 270) << 24; + inst.instruction |= (size == 32) << 20; +} + /* Crypto v1 instructions. */ static void do_crypto_2op_1 (unsigned elttype, int op) @@ -19799,6 +19877,8 @@ static const struct asm_opcode insns[] = #undef THUMB_VARIANT #define THUMB_VARIANT & arm_ext_v8_3 NCE (vjcvt, eb90bc0, 2, (RVS, RVD), vjcvt), + NUF (vcmla, 0, 4, (RNDQ, RNDQ, RNDQ_RNSC, EXPi), vcmla), + NUF (vcadd, 0, 4, (RNDQ, RNDQ, RNDQ, EXPi), vcadd), #undef ARM_VARIANT #define ARM_VARIANT & fpu_fpa_ext_v1 /* Core FPA instruction set (V1). */ |