[binutils][arm] Implement Custom Datapath Extensions for MVE

Here we implement the custom datapath extensions for MVE. This required the following changes: - Adding a new register argument type (that takes either an MVE vector or a Neon S or D register). - Adding two new immediate operands types (0-127 and 0-4095). - Using the Neon type machinery to distinguish between instruction types. This required the introduction of new neon shapes to account for the coprocessor operands to these instructions. - Adding a new disassembly character to `print_insn_cde` to handle the new register types. Specification can be found at https://developer.arm.com/docs/ddi0607/latest Successfully regression tested on arm-none-eabi, and arm-wince-pe. gas/ChangeLog: 2020-02-10 Matthew Malcomson <matthew.malcomson@arm.com> * config/tc-arm.c (NEON_MAX_TYPE_ELS): Increment to account for instructions that can have 5 arguments. (enum operand_parse_code): Add new operands. (parse_operands): Account for new operands. (S5): New macro. (enum neon_shape_el): Introduce P suffixes for coprocessor. (neon_select_shape): Account for P suffix. (LOW1): Move macro to global position. (HI4): Move macro to global position. (vcx_assign_vec_d): New. (vcx_assign_vec_m): New. (vcx_assign_vec_n): New. (enum vcx_reg_type): New. (vcx_get_reg_type): New. (vcx_size_pos): New. (vcx_vec_pos): New. (vcx_handle_shape): New. (vcx_ensure_register_in_range): New. (vcx_handle_register_arguments): New. (vcx_handle_insn_block): New. (vcx_handle_common_checks): New. (do_vcx1): New. (do_vcx2): New. (do_vcx3): New. * testsuite/gas/arm/cde-missing-fp.d: New test. * testsuite/gas/arm/cde-missing-fp.l: New test. * testsuite/gas/arm/cde-missing-mve.d: New test. * testsuite/gas/arm/cde-missing-mve.l: New test. * testsuite/gas/arm/cde-mve-or-neon.d: New test. * testsuite/gas/arm/cde-mve-or-neon.s: New test. * testsuite/gas/arm/cde-mve.s: New test. * testsuite/gas/arm/cde-warnings.l: * testsuite/gas/arm/cde-warnings.s: * testsuite/gas/arm/cde.d: * testsuite/gas/arm/cde.s: opcodes/ChangeLog: 2020-02-10 Matthew Malcomson <matthew.malcomson@arm.com> * arm-dis.c (print_insn_cde): Define 'V' parse character. (cde_opcodes): Add VCX* instructions.
author: Matthew Malcomson <matthew.malcomson@arm.com> 2020-02-10 16:39:02 +0000
committer: Matthew Malcomson <matthew.malcomson@arm.com> 2020-02-10 16:50:14 +0000
commit: 5aae9ae97f65f6651cf91db856a1f8b4bef5e896 (patch)
tree: 47e5e20c08d9b53251ea7b624a9aeec5ac23e61c /gas/config/tc-arm.c
parent: 4934a27c8c1d5c8623366f5dbafae8af60b96bc0 (diff)
download: gdb-5aae9ae97f65f6651cf91db856a1f8b4bef5e896.zip
gdb-5aae9ae97f65f6651cf91db856a1f8b4bef5e896.tar.gz
gdb-5aae9ae97f65f6651cf91db856a1f8b4bef5e896.tar.bz2
1 files changed, 252 insertions, 4 deletions
diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c
index 93d04ee..48b1d36 100644
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -339,6 +339,11 @@ static const arm_feature_set mve_ext =
   ARM_FEATURE_CORE_HIGH (ARM_EXT2_MVE);
 static const arm_feature_set mve_fp_ext =
   ARM_FEATURE_CORE_HIGH (ARM_EXT2_MVE_FP);
+/* Note: This has more than one bit set, which means using it with
+   mark_feature_used (which returns if *any* of the bits are set in the current
+   cpu variant) can give surprising results.  */
+static const arm_feature_set armv8m_fp =
+  ARM_FEATURE_COPROC (FPU_VFP_V5_SP_D16);
 #ifdef OBJ_ELF
 static const arm_feature_set fpu_vfp_fp16 =
   ARM_FEATURE_COPROC (FPU_VFP_EXT_FP16);
@@ -478,7 +483,7 @@ struct neon_type_el
   unsigned size;
 };
 
-#define NEON_MAX_TYPE_ELS 4
+#define NEON_MAX_TYPE_ELS 5
 
 struct neon_type
 {
@@ -7092,6 +7097,7 @@ enum operand_parse_code
   OP_RIWG,	/* iWMMXt wCG register */
   OP_RXA,	/* XScale accumulator register */
 
+  OP_RNSDMQ,	/* Neon single, double or MVE vector register */
   OP_RNSDQMQ,	/* Neon single, double or quad register or MVE vector register
 		 */
   OP_RNSDQMQR,	/* Neon single, double or quad register, MVE vector register or
@@ -7160,8 +7166,10 @@ enum operand_parse_code
   OP_I63s,	/*		 -64 .. 63 */
   OP_I64,	/*		   1 .. 64 */
   OP_I64z,	/*		   0 .. 64 */
+  OP_I127,	/*		   0 .. 127 */
   OP_I255,	/*		   0 .. 255 */
   OP_I511,	/*		   0 .. 511 */
+  OP_I4095,	/*		   0 .. 4095 */
   OP_I8191,	/*		   0 .. 8191 */
   OP_I4b,	/* immediate, prefix optional, 1 .. 4 */
   OP_I7b,	/*			       0 .. 7 */
@@ -7480,6 +7488,12 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
 	case OP_RVSD_COND:
 	  po_reg_or_goto (REG_TYPE_VFSD, try_cond);
 	  break;
+	case OP_RNSDMQ:
+	  po_reg_or_goto (REG_TYPE_NSD, try_mq2);
+	  break;
+	  try_mq2:
+	  po_reg_or_fail (REG_TYPE_MQ);
+	  break;
 	case OP_oRNSDQ:
 	case OP_RNSDQ: po_reg_or_fail (REG_TYPE_NSDQ);    break;
 	case OP_RNSDQMQR:
@@ -7676,8 +7690,10 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
 	case OP_I63:	 po_imm_or_fail (  0,     63, FALSE);   break;
 	case OP_I64:	 po_imm_or_fail (  1,     64, FALSE);   break;
 	case OP_I64z:	 po_imm_or_fail (  0,     64, FALSE);   break;
+	case OP_I127:	 po_imm_or_fail (  0,	 127, FALSE);	break;
 	case OP_I255:	 po_imm_or_fail (  0,	 255, FALSE);	break;
 	case OP_I511:	 po_imm_or_fail (  0,	 511, FALSE);	break;
+	case OP_I4095:	 po_imm_or_fail (  0,	 4095, FALSE);	break;
 	case OP_I8191:   po_imm_or_fail (  0,	 8191, FALSE);	break;
 	case OP_I4b:	 po_imm_or_fail (  1,	   4, TRUE);	break;
 	case OP_oI7b:
@@ -14797,6 +14813,15 @@ NEON_ENC_TAB
   X(2, (Q, R), QUAD),			\
   X(2, (D, I), DOUBLE),			\
   X(2, (Q, I), QUAD),			\
+  X(3, (P, F, I), SINGLE),		\
+  X(3, (P, D, I), DOUBLE),		\
+  X(3, (P, Q, I), QUAD),		\
+  X(4, (P, F, F, I), SINGLE),		\
+  X(4, (P, D, D, I), DOUBLE),		\
+  X(4, (P, Q, Q, I), QUAD),		\
+  X(5, (P, F, F, F, I), SINGLE),	\
+  X(5, (P, D, D, D, I), DOUBLE),	\
+  X(5, (P, Q, Q, Q, I), QUAD),		\
   X(3, (D, L, D), DOUBLE),		\
   X(2, (D, Q), MIXED),			\
   X(2, (Q, D), MIXED),			\
@@ -14845,6 +14870,7 @@ NEON_ENC_TAB
 #define S2(A,B)		NS_##A##B
 #define S3(A,B,C)	NS_##A##B##C
 #define S4(A,B,C,D)	NS_##A##B##C##D
+#define S5(A,B,C,D,E)	NS_##A##B##C##D##E
 
 #define X(N, L, C) S##N L
 
@@ -14858,6 +14884,7 @@ enum neon_shape
 #undef S2
 #undef S3
 #undef S4
+#undef S5
 
 enum neon_shape_class
 {
@@ -14886,7 +14913,8 @@ enum neon_shape_el
   SE_I,
   SE_S,
   SE_R,
-  SE_L
+  SE_L,
+  SE_P
 };
 
 /* Register widths of above.  */
@@ -14899,6 +14927,7 @@ static unsigned neon_shape_el_size[] =
   0,
   32,
   32,
+  0,
   0
 };
 
@@ -14911,6 +14940,7 @@ struct neon_shape_info
 #define S2(A,B)		{ SE_##A, SE_##B }
 #define S3(A,B,C)	{ SE_##A, SE_##B, SE_##C }
 #define S4(A,B,C,D)	{ SE_##A, SE_##B, SE_##C, SE_##D }
+#define S5(A,B,C,D,E)	{ SE_##A, SE_##B, SE_##C, SE_##D, SE_##E }
 
 #define X(N, L, C) { N, S##N L }
 
@@ -14923,6 +14953,7 @@ static struct neon_shape_info neon_shape_tab[] =
 #undef S2
 #undef S3
 #undef S4
+#undef S5
 
 /* Bit masks used in type checking given instructions.
   'N_EQK' means the type must be the same as (or based on in some way) the key
@@ -15112,6 +15143,7 @@ neon_select_shape (enum neon_shape shape, ...)
 		matches = 0;
 	      break;
 
+	    case SE_P:
 	    case SE_L:
 	      break;
 	    }
@@ -15840,6 +15872,8 @@ neon_logbits (unsigned x)
 
 #define LOW4(R) ((R) & 0xf)
 #define HI1(R) (((R) >> 4) & 1)
+#define LOW1(R) ((R) & 0x1)
+#define HI4(R) (((R) >> 1) & 0xf)
 
 static unsigned
 mve_get_vcmp_vpt_cond (struct neon_type_el et)
@@ -19728,8 +19762,6 @@ do_neon_fmac_maybe_scalar_long (int subtype)
   inst.instruction &= 0x00ffffff;
   inst.instruction |= high8;
 
-#define LOW1(R) ((R) & 0x1)
-#define HI4(R) (((R) >> 1) & 0xf)
   /* Unlike usually NEON three-same, encoding for Vn and Vm will depend on
      whether the instruction is in Q form and whether Vm is a scalar indexed
      operand.  */
@@ -21840,6 +21872,212 @@ do_cx3da (void)
   return do_custom_instruction_3 (1, 1);
 }
 
+static void
+vcx_assign_vec_d (unsigned regnum)
+{
+  inst.instruction |= HI4 (regnum) << 12;
+  inst.instruction |= LOW1 (regnum) << 22;
+}
+
+static void
+vcx_assign_vec_m (unsigned regnum)
+{
+  inst.instruction |= HI4 (regnum);
+  inst.instruction |= LOW1 (regnum) << 5;
+}
+
+static void
+vcx_assign_vec_n (unsigned regnum)
+{
+  inst.instruction |= HI4 (regnum) << 16;
+  inst.instruction |= LOW1 (regnum) << 7;
+}
+
+enum vcx_reg_type {
+    q_reg,
+    d_reg,
+    s_reg
+};
+
+static enum vcx_reg_type
+vcx_get_reg_type (enum neon_shape ns)
+{
+  gas_assert (ns == NS_PQI
+	      || ns == NS_PDI
+	      || ns == NS_PFI
+	      || ns == NS_PQQI
+	      || ns == NS_PDDI
+	      || ns == NS_PFFI
+	      || ns == NS_PQQQI
+	      || ns == NS_PDDDI
+	      || ns == NS_PFFFI);
+  if (ns == NS_PQI || ns == NS_PQQI || ns == NS_PQQQI)
+    return q_reg;
+  if (ns == NS_PDI || ns == NS_PDDI || ns == NS_PDDDI)
+    return d_reg;
+  return s_reg;
+}
+
+#define vcx_size_pos 24
+#define vcx_vec_pos 6
+static unsigned
+vcx_handle_shape (enum vcx_reg_type reg_type)
+{
+  unsigned mult = 2;
+  if (reg_type == q_reg)
+    inst.instruction |= 1 << vcx_vec_pos;
+  else if (reg_type == d_reg)
+    inst.instruction |= 1 << vcx_size_pos;
+  else
+    mult = 1;
+  /* NOTE:
+     The documentation says that the Q registers are encoded as 2*N in the D:Vd
+     bits (or equivalent for N and M registers).
+     Similarly the D registers are encoded as N in D:Vd bits.
+     While the S registers are encoded as N in the Vd:D bits.
+
+     Taking into account the maximum values of these registers we can see a
+     nicer pattern for calculation:
+       Q -> 7, D -> 15, S -> 31
+
+     If we say that everything is encoded in the Vd:D bits, then we can say
+     that Q is encoded as 4*N, and D is encoded as 2*N.
+     This way the bits will end up the same, and calculation is simpler.
+     (calculation is now:
+	1. Multiply by a number determined by the register letter.
+	2. Encode resulting number in Vd:D bits.)
+
+      This is made a little more complicated by automatic handling of 'Q'
+      registers elsewhere, which means the register number is already 2*N where
+      N is the number the user wrote after the register letter.
+     */
+  return mult;
+}
+#undef vcx_vec_pos
+#undef vcx_size_pos
+
+static void
+vcx_ensure_register_in_range (unsigned R, enum vcx_reg_type reg_type)
+{
+  if (reg_type == q_reg)
+    {
+      gas_assert (R % 2 == 0);
+      constraint (R >= 16, _("'q' register must be in range 0-7"));
+    }
+  else if (reg_type == d_reg)
+    constraint (R >= 16, _("'d' register must be in range 0-15"));
+  else
+    constraint (R >= 32, _("'s' register must be in range 0-31"));
+}
+
+static void (*vcx_assign_vec[3]) (unsigned) = {
+    vcx_assign_vec_d,
+    vcx_assign_vec_m,
+    vcx_assign_vec_n
+};
+
+static void
+vcx_handle_register_arguments (unsigned num_registers,
+			       enum vcx_reg_type reg_type)
+{
+  unsigned R;
+  unsigned reg_mult = vcx_handle_shape (reg_type);
+  for (unsigned i = 0; i < num_registers; i++)
+    {
+      R = inst.operands[i+1].reg;
+      vcx_ensure_register_in_range (R, reg_type);
+      if (num_registers == 3 && i > 0)
+	{
+	  if (i == 2)
+	    vcx_assign_vec[1] (R * reg_mult);
+	  else
+	    vcx_assign_vec[2] (R * reg_mult);
+	  continue;
+	}
+      vcx_assign_vec[i](R * reg_mult);
+    }
+}
+
+static void
+vcx_handle_insn_block (enum vcx_reg_type reg_type)
+{
+  if (reg_type == q_reg)
+    if (inst.cond > COND_ALWAYS)
+      inst.pred_insn_type = INSIDE_VPT_INSN;
+    else
+      inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+  else if (inst.cond == COND_ALWAYS)
+    inst.pred_insn_type = OUTSIDE_PRED_INSN;
+  else
+    inst.error = BAD_NOT_IT;
+}
+
+static void
+vcx_handle_common_checks (unsigned num_args, enum neon_shape rs)
+{
+  constraint (!mark_feature_used (&arm_ext_cde), _(BAD_CDE));
+  cde_handle_coproc ();
+  enum vcx_reg_type reg_type = vcx_get_reg_type (rs);
+  vcx_handle_register_arguments (num_args, reg_type);
+  vcx_handle_insn_block (reg_type);
+  if (reg_type == q_reg)
+    constraint (!mark_feature_used (&mve_ext),
+		_("vcx instructions with Q registers require MVE"));
+  else
+    constraint (!(ARM_FSET_CPU_SUBSET (armv8m_fp, cpu_variant)
+		  && mark_feature_used (&armv8m_fp))
+		&& !mark_feature_used (&mve_ext),
+		_("vcx instructions with S or D registers require either MVE"
+		  " or Armv8-M floating point etension."));
+}
+
+static void
+do_vcx1 (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_PQI, NS_PDI, NS_PFI, NS_NULL);
+  vcx_handle_common_checks (1, rs);
+
+  unsigned imm = inst.operands[2].imm;
+  inst.instruction |= (imm & 0x03f);
+  inst.instruction |= (imm & 0x040) << 1;
+  inst.instruction |= (imm & 0x780) << 9;
+  if (rs != NS_PQI)
+    constraint (imm >= 2048,
+		_("vcx1 with S or D registers takes immediate within 0-2047"));
+  inst.instruction |= (imm & 0x800) << 13;
+}
+
+static void
+do_vcx2 (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_PQQI, NS_PDDI, NS_PFFI, NS_NULL);
+  vcx_handle_common_checks (2, rs);
+
+  unsigned imm = inst.operands[3].imm;
+  inst.instruction |= (imm & 0x01) << 4;
+  inst.instruction |= (imm & 0x02) << 6;
+  inst.instruction |= (imm & 0x3c) << 14;
+  if (rs != NS_PQQI)
+    constraint (imm >= 64,
+		_("vcx2 with S or D registers takes immediate within 0-63"));
+  inst.instruction |= (imm & 0x40) << 18;
+}
+
+static void
+do_vcx3 (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_PQQQI, NS_PDDDI, NS_PFFFI, NS_NULL);
+  vcx_handle_common_checks (3, rs);
+
+  unsigned imm = inst.operands[4].imm;
+  inst.instruction |= (imm & 0x1) << 4;
+  inst.instruction |= (imm & 0x6) << 19;
+  if (rs != NS_PQQQI)
+    constraint (imm >= 8,
+		_("vcx2 with S or D registers takes immediate within 0-7"));
+  inst.instruction |= (imm & 0x8) << 21;
+}
+
 /* Crypto v1 instructions.  */
 static void
 do_crypto_2op_1 (unsigned elttype, int op)
@@ -26400,7 +26638,17 @@ static const struct asm_opcode insns[] =
  ToC ("cx3a", fe800000, 5, (RCP, APSR_RR, APSR_RR, APSR_RR, I63), cx3a),
  ToC ("cx3d", ee800040, 6, (RCP, RR, APSR_RR, APSR_RR, APSR_RR, I63), cx3d),
  ToC ("cx3da", fe800040, 6, (RCP, RR, APSR_RR, APSR_RR, APSR_RR, I63), cx3da),
+
+ mToC ("vcx1", ec200000, 3, (RCP, RNSDMQ, I4095), vcx1),
+ mToC ("vcx1a", fc200000, 3, (RCP, RNSDMQ, I4095), vcx1),
+
+ mToC ("vcx2", ec300000, 4, (RCP, RNSDMQ, RNSDMQ, I127), vcx2),
+ mToC ("vcx2a", fc300000, 4, (RCP, RNSDMQ, RNSDMQ, I127), vcx2),
+
+ mToC ("vcx3", ec800000, 5, (RCP, RNSDMQ, RNSDMQ, RNSDMQ, I15), vcx3),
+ mToC ("vcx3a", fc800000, 5, (RCP, RNSDMQ, RNSDMQ, RNSDMQ, I15), vcx3),
 };
+
 #undef ARM_VARIANT
 #undef THUMB_VARIANT
 #undef TCE
author	Matthew Malcomson <matthew.malcomson@arm.com>	2020-02-10 16:39:02 +0000
committer	Matthew Malcomson <matthew.malcomson@arm.com>	2020-02-10 16:50:14 +0000
commit	5aae9ae97f65f6651cf91db856a1f8b4bef5e896 (patch)
tree	47e5e20c08d9b53251ea7b624a9aeec5ac23e61c /gas/config/tc-arm.c
parent	4934a27c8c1d5c8623366f5dbafae8af60b96bc0 (diff)
download	gdb-5aae9ae97f65f6651cf91db856a1f8b4bef5e896.zip gdb-5aae9ae97f65f6651cf91db856a1f8b4bef5e896.tar.gz gdb-5aae9ae97f65f6651cf91db856a1f8b4bef5e896.tar.bz2