[Patch][binutils][arm] Armv8.6-A Matrix Multiply extension [9/10]

Hi, This patch is part of a series that adds support for Armv8.6-A (Matrix Multiply and BFloat16 extensions) to binutils. This patch introduces the Matrix Multiply (Int8, F32, F64) extensions to the arm backend. The following Matrix Multiply instructions are added: vummla, vsmmla, vusmmla, vusdot, vsudot[1]. [1]https://developer.arm.com/docs/ddi0597/latest/simd-and-floating-point-instructions-alphabetic-order Committed on behalf of Mihail Ionescu. gas/ChangeLog: 2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com> * config/tc-arm.c (arm_ext_i8mm): New feature set. (do_vusdot): New. (do_vsudot): New. (do_vsmmla): New. (do_vummla): New. (insns): Add vsmmla, vummla, vusmmla, vusdot, vsudot mnemonics. (armv86a_ext_table): Add i8mm extension. (arm_extensions): Move bf16 extension to context sensitive table. (armv82a_ext_table, armv84a_ext_table, armv85a_ext_table): Move bf16 extension to context sensitive table. (armv86a_ext_table): Add i8mm extension. * doc/c-arm.texi: Document i8mm extension. * testsuite/gas/arm/i8mm.s: New test. * testsuite/gas/arm/i8mm.d: New test. * testsuite/gas/arm/bfloat17-cmdline-bad-3.d: Update test. include/ChangeLog: 2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com> * opcode/arm.h (ARM_EXT2_I8MM): New feature macro. opcodes/ChangeLog: 2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com> * arm-dis.c (neon_opcodes): Add i8mm SIMD instructions. Regression tested on arm-none-eabi. Is this ok for trunk? Regards, Mihail
author: Matthew Malcomson <matthew.malcomson@arm.com> 2019-11-07 17:20:08 +0000
committer: Matthew Malcomson <matthew.malcomson@arm.com> 2019-11-07 17:20:08 +0000
commit: 616ce08e1cc98c28c42bc2afed6b92df449c7b00 (patch)
tree: cbc0e78a74f954d5a40177b1887de248fa0d400f /gas/config/tc-arm.c
parent: 8382113fdb028386a335e8dee9ac04ebc8cf04a1 (diff)
download: gdb-616ce08e1cc98c28c42bc2afed6b92df449c7b00.zip
gdb-616ce08e1cc98c28c42bc2afed6b92df449c7b00.tar.gz
gdb-616ce08e1cc98c28c42bc2afed6b92df449c7b00.tar.bz2
1 files changed, 83 insertions, 4 deletions
diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c
index 1d735a7..2a884b1 100644
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -277,6 +277,8 @@ static const arm_feature_set arm_ext_predres =
   ARM_FEATURE_CORE_HIGH (ARM_EXT2_PREDRES);
 static const arm_feature_set arm_ext_bf16 =
   ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16);
+static const arm_feature_set arm_ext_i8mm =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM);
 
 static const arm_feature_set arm_arch_any = ARM_ANY;
 #ifdef OBJ_ELF
@@ -21483,6 +21485,79 @@ do_neon_dotproduct_u (void)
   return do_neon_dotproduct (1);
 }
 
+static void
+do_vusdot (void)
+{
+  enum neon_shape rs;
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
+  if (inst.operands[2].isscalar)
+    {
+      rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+      neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY);
+
+      inst.instruction |= (1 << 25);
+      int index = inst.operands[2].reg & 0xf;
+      constraint ((index != 1 && index != 0), _("index must be 0 or 1"));
+      inst.operands[2].reg >>= 4;
+      constraint (!(inst.operands[2].reg < 16),
+		  _("indexed register must be less than 16"));
+      neon_three_args (rs == NS_QQS);
+      inst.instruction |= (index << 5);
+    }
+  else
+    {
+      inst.instruction |= (1 << 21);
+      rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+      neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY);
+      neon_three_args (rs == NS_QQQ);
+    }
+}
+
+static void
+do_vsudot (void)
+{
+  enum neon_shape rs;
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
+  if (inst.operands[2].isscalar)
+    {
+      rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+      neon_check_type (3, rs, N_EQK, N_EQK, N_U8 | N_KEY);
+
+      inst.instruction |= (1 << 25);
+      int index = inst.operands[2].reg & 0xf;
+      constraint ((index != 1 && index != 0), _("index must be 0 or 1"));
+      inst.operands[2].reg >>= 4;
+      constraint (!(inst.operands[2].reg < 16),
+		  _("indexed register must be less than 16"));
+      neon_three_args (rs == NS_QQS);
+      inst.instruction |= (index << 5);
+    }
+}
+
+static void
+do_vsmmla (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
+  neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY);
+
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
+
+  neon_three_args (1);
+
+}
+
+static void
+do_vummla (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
+  neon_check_type (3, rs, N_EQK, N_EQK, N_U8 | N_KEY);
+
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
+
+  neon_three_args (1);
+
+}
+
 /* Crypto v1 instructions.  */
 static void
 do_crypto_2op_1 (unsigned elttype, int op)
@@ -26000,7 +26075,7 @@ static const struct asm_opcode insns[] =
 #define	THUMB_VARIANT &arm_ext_i8mm
  TUF ("vsmmla", c200c40, fc200c40, 3, (RNQ, RNQ, RNQ), vsmmla, vsmmla),
  TUF ("vummla", c200c50, fc200c50, 3, (RNQ, RNQ, RNQ), vummla, vummla),
- TUF ("vusmmla", ca00c40, fca00c40, 3, (RNQ, RNQ, RNQ), vummla, vummla),
+ TUF ("vusmmla", ca00c40, fca00c40, 3, (RNQ, RNQ, RNQ), vsmmla, vsmmla),
  TUF ("vusdot", c800d00, fc800d00, 3, (RNDQ, RNDQ, RNDQ_RNSC), vusdot, vusdot),
  TUF ("vsudot", c800d10, fc800d10, 3, (RNDQ, RNDQ, RNSC), vsudot, vsudot),
 };
@@ -31127,6 +31202,8 @@ static const struct arm_ext_table armv82a_ext_table[] =
   ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8_1),
   ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_2_FP16),
   ARM_ADD ("fp16fml", FPU_ARCH_NEON_VFP_ARMV8_2_FP16FML),
+  ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)),
+  ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
   ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_1,
 	   ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
   ARM_ADD ("dotprod", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
@@ -31143,6 +31220,8 @@ static const struct arm_ext_table armv84a_ext_table[] =
 {
   ARM_ADD ("simd", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
   ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_4_FP16FML),
+  ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)),
+  ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
   ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_4,
 	   ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
 
@@ -31158,6 +31237,8 @@ static const struct arm_ext_table armv85a_ext_table[] =
 {
   ARM_ADD ("simd", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
   ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_4_FP16FML),
+  ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)),
+  ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
   ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_4,
 	   ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
 
@@ -31169,6 +31250,7 @@ static const struct arm_ext_table armv85a_ext_table[] =
 
 static const struct arm_ext_table armv86a_ext_table[] =
 {
+  ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
   { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
 };
 
@@ -31308,9 +31390,6 @@ struct arm_option_extension_value_table
    use the context sensitive approach using arm_ext_table's.  */
 static const struct arm_option_extension_value_table arm_extensions[] =
 {
-  ARM_EXT_OPT ("bf16",  ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
-			ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
-			ARM_ARCH_V8_2A),
   ARM_EXT_OPT ("crc",  ARCH_CRC_ARMV8, ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
 			 ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
   ARM_EXT_OPT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
author	Matthew Malcomson <matthew.malcomson@arm.com>	2019-11-07 17:20:08 +0000
committer	Matthew Malcomson <matthew.malcomson@arm.com>	2019-11-07 17:20:08 +0000
commit	616ce08e1cc98c28c42bc2afed6b92df449c7b00 (patch)
tree	cbc0e78a74f954d5a40177b1887de248fa0d400f /gas/config/tc-arm.c
parent	8382113fdb028386a335e8dee9ac04ebc8cf04a1 (diff)
download	gdb-616ce08e1cc98c28c42bc2afed6b92df449c7b00.zip gdb-616ce08e1cc98c28c42bc2afed6b92df449c7b00.tar.gz gdb-616ce08e1cc98c28c42bc2afed6b92df449c7b00.tar.bz2