aboutsummaryrefslogtreecommitdiff
path: root/gas
diff options
context:
space:
mode:
authorMatthew Malcomson <matthew.malcomson@arm.com>2019-11-07 17:10:01 +0000
committerMatthew Malcomson <matthew.malcomson@arm.com>2019-11-07 17:11:52 +0000
commit8382113fdb028386a335e8dee9ac04ebc8cf04a1 (patch)
treec2e565fa25ec35788e5b2dbc3212629a3218ee64 /gas
parenteb5bbc482128b08d2ee8a2470951a74d8351146f (diff)
downloadfsf-binutils-gdb-8382113fdb028386a335e8dee9ac04ebc8cf04a1.zip
fsf-binutils-gdb-8382113fdb028386a335e8dee9ac04ebc8cf04a1.tar.gz
fsf-binutils-gdb-8382113fdb028386a335e8dee9ac04ebc8cf04a1.tar.bz2
[binutils][aarch64] Matrix Multiply extension enablement [8/X]
Hi, This patch is part of a series that adds support for Armv8.6-A (Matrix Multiply and BFloat16 extensions) to binutils. This patch introduces the Matrix Multiply (Int8, F32, F64) extensions to the aarch64 backend. The following instructions are added: {s/u}mmla, usmmla, {us/su}dot, fmmla, ld1rob, ld1roh, d1row, ld1rod, uzip{1/2}, trn{1/2}. Committed on behalf of Mihail Ionescu. gas/ChangeLog: 2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com> * config/tc-aarch64.c: Add new arch fetures to suppport the mm extension. (parse_operands): Add new operand. * testsuite/gas/aarch64/i8mm.s: New test. * testsuite/gas/aarch64/i8mm.d: New test. * testsuite/gas/aarch64/f32mm.s: New test. * testsuite/gas/aarch64/f32mm.d: New test. * testsuite/gas/aarch64/f64mm.s: New test. * testsuite/gas/aarch64/f64mm.d: New test. * testsuite/gas/aarch64/sve-movprfx-mm.s: New test. * testsuite/gas/aarch64/sve-movprfx-mm.d: New test. include/ChangeLog: 2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com> * opcode/aarch64.h (AARCH64_FEATURE_I8MM): New. (AARCH64_FEATURE_F32MM): New. (AARCH64_FEATURE_F64MM): New. (AARCH64_OPND_SVE_ADDR_RI_S4x32): New. (enum aarch64_insn_class): Add new instruction class "aarch64_misc" for instructions that do not require special handling. opcodes/ChangeLog: 2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com> * aarch64-tbl.h (aarch64_feature_i8mm_sve, aarch64_feature_f32mm_sve, aarch64_feature_f64mm_sve, aarch64_feature_i8mm, aarch64_feature_f32mm, aarch64_feature_f64mm): New feature sets. (INT8MATMUL_INSN, F64MATMUL_SVE_INSN, F64MATMUL_INSN, F32MATMUL_SVE_INSN, F32MATMUL_INSN): New macros to define matrix multiply instructions. (I8MM_SVE, F32MM_SVE, F64MM_SVE, I8MM, F32MM, F64MM): New feature set macros. (QL_MMLA64, OP_SVE_SBB): New qualifiers. (OP_SVE_QQQ): New qualifier. (INT8MATMUL_SVE_INSNC, F64MATMUL_SVE_INSNC, F32MATMUL_SVE_INSNC): New feature set for bfloat16 instructions to support the movprfx constraint. (aarch64_opcode_table): Support for SVE_ADDR_RI_S4x32. (aarch64_opcode_table): Define new instructions smmla, ummla, usmmla, usdot, sudot, fmmla, ld1rob, ld1roh, ld1row, ld1rod uzip{1/2}, trn{1/2}. * aarch64-opc.c (operand_general_constraint_met_p): Handle AARCH64_OPND_SVE_ADDR_RI_S4x32. (aarch64_print_operand): Handle AARCH64_OPND_SVE_ADDR_RI_S4x32. * aarch64-dis-2.c (aarch64_opcode_lookup_1, aarch64_find_next_opcode): Account for new instructions. * opcodes/aarch64-asm-2.c (aarch64_insert_operand): Support the new S4x32 operand. * aarch64-opc-2.c (aarch64_operands): Support the new S4x32 operand. Regression tested on arm-none-eabi. Is it ok for trunk? Regards, Mihail
Diffstat (limited to 'gas')
-rw-r--r--gas/ChangeLog13
-rw-r--r--gas/config/tc-aarch64.c7
-rw-r--r--gas/doc/c-aarch64.texi6
-rw-r--r--gas/testsuite/gas/aarch64/f32mm.d11
-rw-r--r--gas/testsuite/gas/aarch64/f32mm.s12
-rw-r--r--gas/testsuite/gas/aarch64/f64mm.d62
-rw-r--r--gas/testsuite/gas/aarch64/f64mm.s71
-rw-r--r--gas/testsuite/gas/aarch64/i8mm.d43
-rw-r--r--gas/testsuite/gas/aarch64/i8mm.s56
-rw-r--r--gas/testsuite/gas/aarch64/sve-movprfx-mm.d24
-rw-r--r--gas/testsuite/gas/aarch64/sve-movprfx-mm.s25
11 files changed, 330 insertions, 0 deletions
diff --git a/gas/ChangeLog b/gas/ChangeLog
index 97054cc..1d835b6 100644
--- a/gas/ChangeLog
+++ b/gas/ChangeLog
@@ -1,4 +1,17 @@
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
+
+ * config/tc-aarch64.c: Add new arch fetures to suppport the mm extension.
+ (parse_operands): Add new operand.
+ * testsuite/gas/aarch64/i8mm.s: New test.
+ * testsuite/gas/aarch64/i8mm.d: New test.
+ * testsuite/gas/aarch64/f32mm.s: New test.
+ * testsuite/gas/aarch64/f32mm.d: New test.
+ * testsuite/gas/aarch64/f64mm.s: New test.
+ * testsuite/gas/aarch64/f64mm.d: New test.
+ * testsuite/gas/aarch64/sve-movprfx-mm.s: New test.
+ * testsuite/gas/aarch64/sve-movprfx-mm.d: New test.
+
+2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
2019-11-07 Barnaby Wilks <barnaby.wilks@arm.com>
* config/tc-aarch64.c (md_atof): Add encoding for the bfloat16 format.
diff --git a/gas/config/tc-aarch64.c b/gas/config/tc-aarch64.c
index eea863b..77a9189 100644
--- a/gas/config/tc-aarch64.c
+++ b/gas/config/tc-aarch64.c
@@ -6433,6 +6433,7 @@ parse_operands (char *str, const aarch64_opcode *opcode)
break;
case AARCH64_OPND_SVE_ADDR_RI_S4x16:
+ case AARCH64_OPND_SVE_ADDR_RI_S4x32:
case AARCH64_OPND_SVE_ADDR_RI_S4xVL:
case AARCH64_OPND_SVE_ADDR_RI_S4x2xVL:
case AARCH64_OPND_SVE_ADDR_RI_S4x3xVL:
@@ -9058,6 +9059,12 @@ static const struct aarch64_option_cpu_value_table aarch64_features[] = {
AARCH64_FEATURE (AARCH64_FEATURE_SVE2, 0)},
{"bf16", AARCH64_FEATURE (AARCH64_FEATURE_BFLOAT16, 0),
AARCH64_ARCH_NONE},
+ {"i8mm", AARCH64_FEATURE (AARCH64_FEATURE_I8MM, 0),
+ AARCH64_ARCH_NONE},
+ {"f32mm", AARCH64_FEATURE (AARCH64_FEATURE_F32MM, 0),
+ AARCH64_ARCH_NONE},
+ {"f64mm", AARCH64_FEATURE (AARCH64_FEATURE_F64MM, 0),
+ AARCH64_ARCH_NONE},
{NULL, AARCH64_ARCH_NONE, AARCH64_ARCH_NONE},
};
diff --git a/gas/doc/c-aarch64.texi b/gas/doc/c-aarch64.texi
index 4cb9487..056f23d 100644
--- a/gas/doc/c-aarch64.texi
+++ b/gas/doc/c-aarch64.texi
@@ -144,6 +144,12 @@ automatically cause those extensions to be disabled.
@multitable @columnfractions .12 .17 .17 .54
@headitem Extension @tab Minimum Architecture @tab Enabled by default
@tab Description
+@item @code{i8mm} @tab ARMv8.2-A @tab ARMv8.6-A or later
+ @tab Enable Int8 Matrix Multiply extension.
+@item @code{f32mm} @tab ARMv8.2-A @tab No
+ @tab Enable F32 Matrix Multiply extension.
+@item @code{f64mm} @tab ARMv8.2-A @tab No
+ @tab Enable F64 Matrix Multiply extension.
@item @code{bf16} @tab ARMv8.2-A @tab ARMv8.6-A or later
@tab Enable BFloat16 extension.
@item @code{compnum} @tab ARMv8.2-A @tab ARMv8.3-A or later
diff --git a/gas/testsuite/gas/aarch64/f32mm.d b/gas/testsuite/gas/aarch64/f32mm.d
new file mode 100644
index 0000000..8f1cdff
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/f32mm.d
@@ -0,0 +1,11 @@
+#as: -march=armv8.6-a+sve+f32mm
+#objdump: -dr
+
+.*: file format .*
+
+
+Disassembly of section \.text:
+
+0000000000000000 <\.text>:
+ *[0-9a-f]+: 64bbe6b1 fmmla z17\.s, z21\.s, z27\.s
+ *[0-9a-f]+: 64a0e400 fmmla z0\.s, z0\.s, z0\.s
diff --git a/gas/testsuite/gas/aarch64/f32mm.s b/gas/testsuite/gas/aarch64/f32mm.s
new file mode 100644
index 0000000..6e6b32b
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/f32mm.s
@@ -0,0 +1,12 @@
+/* The instructions with non-zero register numbers are there to ensure we have
+ the correct argument positioning (i.e. check that the first argument is at
+ the end of the word etc).
+ The instructions with all-zero register numbers are to ensure the previous
+ encoding didn't just "happen" to fit -- so that if we change the registers
+ that changes the correct part of the word.
+ Each of the numbered patterns begin and end with a 1, so we can replace
+ them with all-zeros and see the entire range has changed. */
+
+// SVE
+fmmla z17.s, z21.s, z27.s
+fmmla z0.s, z0.s, z0.s
diff --git a/gas/testsuite/gas/aarch64/f64mm.d b/gas/testsuite/gas/aarch64/f64mm.d
new file mode 100644
index 0000000..9540c13
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/f64mm.d
@@ -0,0 +1,62 @@
+#as: -march=armv8.6-a+sve+f64mm
+#objdump: -dr
+
+.*: file format .*
+
+Disassembly of section \.text:
+
+0000000000000000 <\.text>:
+ *[0-9a-f]+: 64dbe6b1 fmmla z17\.d, z21\.d, z27\.d
+ *[0-9a-f]+: 64c0e400 fmmla z0\.d, z0\.d, z0\.d
+ *[0-9a-f]+: a43b17f1 ld1rob {z17\.b}, p5/z, \[sp, x27\]
+ *[0-9a-f]+: a42003e0 ld1rob {z0\.b}, p0/z, \[sp, x0\]
+ *[0-9a-f]+: a4bb17f1 ld1roh {z17\.h}, p5/z, \[sp, x27\]
+ *[0-9a-f]+: a4a003e0 ld1roh {z0\.h}, p0/z, \[sp, x0\]
+ *[0-9a-f]+: a53b17f1 ld1row {z17\.s}, p5/z, \[sp, x27\]
+ *[0-9a-f]+: a52003e0 ld1row {z0\.s}, p0/z, \[sp, x0\]
+ *[0-9a-f]+: a5bb17f1 ld1rod {z17\.d}, p5/z, \[sp, x27\]
+ *[0-9a-f]+: a5a003e0 ld1rod {z0\.d}, p0/z, \[sp, x0\]
+ *[0-9a-f]+: a43b1411 ld1rob {z17\.b}, p5/z, \[x0, x27\]
+ *[0-9a-f]+: a4200000 ld1rob {z0\.b}, p0/z, \[x0, x0\]
+ *[0-9a-f]+: a4bb1411 ld1roh {z17\.h}, p5/z, \[x0, x27\]
+ *[0-9a-f]+: a4a00000 ld1roh {z0\.h}, p0/z, \[x0, x0\]
+ *[0-9a-f]+: a53b1411 ld1row {z17\.s}, p5/z, \[x0, x27\]
+ *[0-9a-f]+: a5200000 ld1row {z0\.s}, p0/z, \[x0, x0\]
+ *[0-9a-f]+: a5bb1411 ld1rod {z17\.d}, p5/z, \[x0, x27\]
+ *[0-9a-f]+: a5a00000 ld1rod {z0\.d}, p0/z, \[x0, x0\]
+ *[0-9a-f]+: a42037f1 ld1rob {z17\.b}, p5/z, \[sp\]
+ *[0-9a-f]+: a42723e0 ld1rob {z0\.b}, p0/z, \[sp, #224\]
+ *[0-9a-f]+: a42823e0 ld1rob {z0\.b}, p0/z, \[sp, #-256\]
+ *[0-9a-f]+: a4a037f1 ld1roh {z17\.h}, p5/z, \[sp\]
+ *[0-9a-f]+: a4a723e0 ld1roh {z0\.h}, p0/z, \[sp, #224\]
+ *[0-9a-f]+: a4a823e0 ld1roh {z0\.h}, p0/z, \[sp, #-256\]
+ *[0-9a-f]+: a52037f1 ld1row {z17\.s}, p5/z, \[sp\]
+ *[0-9a-f]+: a52723e0 ld1row {z0\.s}, p0/z, \[sp, #224\]
+ *[0-9a-f]+: a52823e0 ld1row {z0\.s}, p0/z, \[sp, #-256\]
+ *[0-9a-f]+: a5a037f1 ld1rod {z17\.d}, p5/z, \[sp\]
+ *[0-9a-f]+: a5a723e0 ld1rod {z0\.d}, p0/z, \[sp, #224\]
+ *[0-9a-f]+: a5a823e0 ld1rod {z0\.d}, p0/z, \[sp, #-256\]
+ *[0-9a-f]+: a4203411 ld1rob {z17\.b}, p5/z, \[x0\]
+ *[0-9a-f]+: a4272000 ld1rob {z0\.b}, p0/z, \[x0, #224\]
+ *[0-9a-f]+: a4282000 ld1rob {z0\.b}, p0/z, \[x0, #-256\]
+ *[0-9a-f]+: a4a03411 ld1roh {z17\.h}, p5/z, \[x0\]
+ *[0-9a-f]+: a4a72000 ld1roh {z0\.h}, p0/z, \[x0, #224\]
+ *[0-9a-f]+: a4a82000 ld1roh {z0\.h}, p0/z, \[x0, #-256\]
+ *[0-9a-f]+: a5203411 ld1row {z17\.s}, p5/z, \[x0\]
+ *[0-9a-f]+: a5272000 ld1row {z0\.s}, p0/z, \[x0, #224\]
+ *[0-9a-f]+: a5282000 ld1row {z0\.s}, p0/z, \[x0, #-256\]
+ *[0-9a-f]+: a5a03411 ld1rod {z17\.d}, p5/z, \[x0\]
+ *[0-9a-f]+: a5a72000 ld1rod {z0\.d}, p0/z, \[x0, #224\]
+ *[0-9a-f]+: a5a82000 ld1rod {z0\.d}, p0/z, \[x0, #-256\]
+ *[0-9a-f]+: 05a502b1 zip1 z17\.q, z21\.q, z5\.q
+ *[0-9a-f]+: 05a00000 zip1 z0\.q, z0\.q, z0\.q
+ *[0-9a-f]+: 05a506b1 zip2 z17\.q, z21\.q, z5\.q
+ *[0-9a-f]+: 05a00400 zip2 z0\.q, z0\.q, z0\.q
+ *[0-9a-f]+: 05a50ab1 uzip1 z17\.q, z21\.q, z5\.q
+ *[0-9a-f]+: 05a00800 uzip1 z0\.q, z0\.q, z0\.q
+ *[0-9a-f]+: 05a50eb1 uzip2 z17\.q, z21\.q, z5\.q
+ *[0-9a-f]+: 05a00c00 uzip2 z0\.q, z0\.q, z0\.q
+ *[0-9a-f]+: 05a51ab1 trn1 z17\.q, z21\.q, z5\.q
+ *[0-9a-f]+: 05a01800 trn1 z0\.q, z0\.q, z0\.q
+ *[0-9a-f]+: 05a51eb1 trn2 z17\.q, z21\.q, z5\.q
+ *[0-9a-f]+: 05a01c00 trn2 z0\.q, z0\.q, z0\.q
diff --git a/gas/testsuite/gas/aarch64/f64mm.s b/gas/testsuite/gas/aarch64/f64mm.s
new file mode 100644
index 0000000..fcf662b
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/f64mm.s
@@ -0,0 +1,71 @@
+/* The instructions with non-zero register numbers are there to ensure we have
+ the correct argument positioning (i.e. check that the first argument is at
+ the end of the word etc).
+ The instructions with all-zero register numbers are to ensure the previous
+ encoding didn't just "happen" to fit -- so that if we change the registers
+ that changes the correct part of the word.
+ Each of the numbered patterns begin and end with a 1, so we can replace
+ them with all-zeros and see the entire range has changed. */
+
+// SVE
+fmmla z17.d, z21.d, z27.d
+fmmla z0.d, z0.d, z0.d
+
+ld1rob { z17.b }, p5/z, [sp, x27]
+ld1rob { z0.b }, p0/z, [sp, x0]
+ld1roh { z17.h }, p5/z, [sp, x27]
+ld1roh { z0.h }, p0/z, [sp, x0]
+ld1row { z17.s }, p5/z, [sp, x27]
+ld1row { z0.s }, p0/z, [sp, x0]
+ld1rod { z17.d }, p5/z, [sp, x27]
+ld1rod { z0.d }, p0/z, [sp, x0]
+
+ld1rob { z17.b }, p5/z, [x0, x27]
+ld1rob { z0.b }, p0/z, [x0, x0]
+ld1roh { z17.h }, p5/z, [x0, x27]
+ld1roh { z0.h }, p0/z, [x0, x0]
+ld1row { z17.s }, p5/z, [x0, x27]
+ld1row { z0.s }, p0/z, [x0, x0]
+ld1rod { z17.d }, p5/z, [x0, x27]
+ld1rod { z0.d }, p0/z, [x0, x0]
+
+ld1rob { z17.b }, p5/z, [sp, #0]
+ld1rob { z0.b }, p0/z, [sp, #224]
+ld1rob { z0.b }, p0/z, [sp, #-256]
+ld1roh { z17.h }, p5/z, [sp, #0]
+ld1roh { z0.h }, p0/z, [sp, #224]
+ld1roh { z0.h }, p0/z, [sp, #-256]
+ld1row { z17.s }, p5/z, [sp, #0]
+ld1row { z0.s }, p0/z, [sp, #224]
+ld1row { z0.s }, p0/z, [sp, #-256]
+ld1rod { z17.d }, p5/z, [sp, #0]
+ld1rod { z0.d }, p0/z, [sp, #224]
+ld1rod { z0.d }, p0/z, [sp, #-256]
+
+ld1rob { z17.b }, p5/z, [x0, #0]
+ld1rob { z0.b }, p0/z, [x0, #224]
+ld1rob { z0.b }, p0/z, [x0, #-256]
+ld1roh { z17.h }, p5/z, [x0, #0]
+ld1roh { z0.h }, p0/z, [x0, #224]
+ld1roh { z0.h }, p0/z, [x0, #-256]
+ld1row { z17.s }, p5/z, [x0, #0]
+ld1row { z0.s }, p0/z, [x0, #224]
+ld1row { z0.s }, p0/z, [x0, #-256]
+ld1rod { z17.d }, p5/z, [x0, #0]
+ld1rod { z0.d }, p0/z, [x0, #224]
+ld1rod { z0.d }, p0/z, [x0, #-256]
+
+zip1 z17.q, z21.q, z5.q
+zip1 z0.q, z0.q, z0.q
+zip2 z17.q, z21.q, z5.q
+zip2 z0.q, z0.q, z0.q
+
+uzip1 z17.q, z21.q, z5.q
+uzip1 z0.q, z0.q, z0.q
+uzip2 z17.q, z21.q, z5.q
+uzip2 z0.q, z0.q, z0.q
+
+trn1 z17.q, z21.q, z5.q
+trn1 z0.q, z0.q, z0.q
+trn2 z17.q, z21.q, z5.q
+trn2 z0.q, z0.q, z0.q
diff --git a/gas/testsuite/gas/aarch64/i8mm.d b/gas/testsuite/gas/aarch64/i8mm.d
new file mode 100644
index 0000000..14db65f
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/i8mm.d
@@ -0,0 +1,43 @@
+#as: -march=armv8.6-a+sve
+#objdump: -dr
+
+.*: file format .*
+
+
+Disassembly of section \.text:
+
+0000000000000000 <\.text>:
+ *[0-9a-f]+: 451b9ab1 smmla z17\.s, z21\.b, z27\.b
+ *[0-9a-f]+: 45009800 smmla z0\.s, z0\.b, z0\.b
+ *[0-9a-f]+: 45db9ab1 ummla z17\.s, z21\.b, z27\.b
+ *[0-9a-f]+: 45c09800 ummla z0\.s, z0\.b, z0\.b
+ *[0-9a-f]+: 459b9ab1 usmmla z17\.s, z21\.b, z27\.b
+ *[0-9a-f]+: 45809800 usmmla z0\.s, z0\.b, z0\.b
+ *[0-9a-f]+: 449b7ab1 usdot z17\.s, z21\.b, z27\.b
+ *[0-9a-f]+: 44807800 usdot z0\.s, z0\.b, z0\.b
+ *[0-9a-f]+: 44bf1ab1 usdot z17\.s, z21\.b, z7\.b\[3\]
+ *[0-9a-f]+: 44b81800 usdot z0\.s, z0\.b, z0\.b\[3\]
+ *[0-9a-f]+: 44a71ab1 usdot z17\.s, z21\.b, z7\.b\[0\]
+ *[0-9a-f]+: 44a01800 usdot z0\.s, z0\.b, z0\.b\[0\]
+ *[0-9a-f]+: 44bf1eb1 sudot z17\.s, z21\.b, z7\.b\[3\]
+ *[0-9a-f]+: 44b81c00 sudot z0\.s, z0\.b, z0\.b\[3\]
+ *[0-9a-f]+: 44a71eb1 sudot z17\.s, z21\.b, z7\.b\[0\]
+ *[0-9a-f]+: 44a01c00 sudot z0\.s, z0\.b, z0\.b\[0\]
+ *[0-9a-f]+: 4e9ba6b1 smmla v17\.4s, v21\.16b, v27\.16b
+ *[0-9a-f]+: 4e9ba6b1 smmla v17\.4s, v21\.16b, v27\.16b
+ *[0-9a-f]+: 6e9ba6b1 ummla v17\.4s, v21\.16b, v27\.16b
+ *[0-9a-f]+: 6e80a400 ummla v0\.4s, v0\.16b, v0\.16b
+ *[0-9a-f]+: 4e80ac00 usmmla v0\.4s, v0\.16b, v0\.16b
+ *[0-9a-f]+: 4e9baeb1 usmmla v17\.4s, v21\.16b, v27\.16b
+ *[0-9a-f]+: 4e9b9eb1 usdot v17\.2s, v21\.8b, v27\.8b
+ *[0-9a-f]+: 4e809c00 usdot v0\.2s, v0\.8b, v0\.8b
+ *[0-9a-f]+: 4e9b9eb1 usdot v17\.2s, v21\.8b, v27\.8b
+ *[0-9a-f]+: 4e809c00 usdot v0\.2s, v0\.8b, v0\.8b
+ *[0-9a-f]+: 4fbbfab1 usdot v17\.2s, v21\.8b, v27\.4b\[3\]
+ *[0-9a-f]+: 4fa0f800 usdot v0\.2s, v0\.8b, v0\.4b\[3\]
+ *[0-9a-f]+: 4f9bf2b1 usdot v17\.2s, v21\.8b, v27\.4b\[0\]
+ *[0-9a-f]+: 4f80f000 usdot v0\.2s, v0\.8b, v0\.4b\[0\]
+ *[0-9a-f]+: 4f3bfab1 sudot v17\.2s, v21\.8b, v27\.4b\[3\]
+ *[0-9a-f]+: 4f20f800 sudot v0\.2s, v0\.8b, v0\.4b\[3\]
+ *[0-9a-f]+: 4f1bf2b1 sudot v17\.2s, v21\.8b, v27\.4b\[0\]
+ *[0-9a-f]+: 4f00f000 sudot v0\.2s, v0\.8b, v0\.4b\[0\]
diff --git a/gas/testsuite/gas/aarch64/i8mm.s b/gas/testsuite/gas/aarch64/i8mm.s
new file mode 100644
index 0000000..38f871d
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/i8mm.s
@@ -0,0 +1,56 @@
+/* The instructions with non-zero register numbers are there to ensure we have
+ the correct argument positioning (i.e. check that the first argument is at
+ the end of the word etc).
+ The instructions with all-zero register numbers are to ensure the previous
+ encoding didn't just "happen" to fit -- so that if we change the registers
+ that changes the correct part of the word.
+ Each of the numbered patterns begin and end with a 1, so we can replace
+ them with all-zeros and see the entire range has changed. */
+
+// SVE
+smmla z17.s, z21.b, z27.b
+smmla z0.s, z0.b, z0.b
+
+ummla z17.s, z21.b, z27.b
+ummla z0.s, z0.b, z0.b
+
+usmmla z17.s, z21.b, z27.b
+usmmla z0.s, z0.b, z0.b
+
+usdot z17.s, z21.b, z27.b
+usdot z0.s, z0.b, z0.b
+
+usdot z17.s, z21.b, z7.b[3]
+usdot z0.s, z0.b, z0.b[3]
+usdot z17.s, z21.b, z7.b[0]
+usdot z0.s, z0.b, z0.b[0]
+
+sudot z17.s, z21.b, z7.b[3]
+sudot z0.s, z0.b, z0.b[3]
+sudot z17.s, z21.b, z7.b[0]
+sudot z0.s, z0.b, z0.b[0]
+
+// SIMD
+smmla v17.4s, v21.16b, v27.16b
+smmla v17.4s, v21.16b, v27.16b
+
+ummla v17.4s, v21.16b, v27.16b
+ummla v0.4s, v0.16b, v0.16b
+
+usmmla v0.4s, v0.16b, v0.16b
+usmmla v17.4s, v21.16b, v27.16b
+
+usdot v17.2s, v21.8b, v27.8b
+usdot v0.2s, v0.8b, v0.8b
+usdot v17.4s, v21.16b, v27.16b
+usdot v0.4s, v0.16b, v0.16b
+
+usdot v17.2s, v21.8b, v27.4b[3]
+usdot v0.2s, v0.8b, v0.4b[3]
+usdot v17.2s, v21.8b, v27.4b[0]
+usdot v0.2s, v0.8b, v0.4b[0]
+
+sudot v17.4s, v21.16b, v27.4b[3]
+sudot v0.4s, v0.16b, v0.4b[3]
+sudot v17.4s, v21.16b, v27.4b[0]
+sudot v0.4s, v0.16b, v0.4b[0]
diff --git a/gas/testsuite/gas/aarch64/sve-movprfx-mm.d b/gas/testsuite/gas/aarch64/sve-movprfx-mm.d
new file mode 100644
index 0000000..197f69f
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/sve-movprfx-mm.d
@@ -0,0 +1,24 @@
+#as: -march=armv8.6-a+f32mm+f64mm+sve
+#objdump: -dr
+
+.* file format .*
+
+Disassembly of section \.text:
+
+0000000000000000 <\.text>:
+ *[0-9a-f]+: 0420bc11 movprfx z17, z0
+ *[0-9a-f]+: 451b9ab1 smmla z17\.s, z21\.b, z27\.b
+ *[0-9a-f]+: 0420bc11 movprfx z17, z0
+ *[0-9a-f]+: 45db9ab1 ummla z17\.s, z21\.b, z27\.b
+ *[0-9a-f]+: 0420bc11 movprfx z17, z0
+ *[0-9a-f]+: 459b9ab1 usmmla z17\.s, z21\.b, z27\.b
+ *[0-9a-f]+: 0420bc11 movprfx z17, z0
+ *[0-9a-f]+: 449b7ab1 usdot z17\.s, z21\.b, z27\.b
+ *[0-9a-f]+: 0420bc11 movprfx z17, z0
+ *[0-9a-f]+: 44bf1ab1 usdot z17\.s, z21\.b, z7\.b\[3\]
+ *[0-9a-f]+: 0420bc11 movprfx z17, z0
+ *[0-9a-f]+: 44bf1eb1 sudot z17\.s, z21\.b, z7\.b\[3\]
+ *[0-9a-f]+: 0420bc11 movprfx z17, z0
+ *[0-9a-f]+: 64bbe6b1 fmmla z17\.s, z21\.s, z27\.s
+ *[0-9a-f]+: 0420bc11 movprfx z17, z0
+ *[0-9a-f]+: 64dbe6b1 fmmla z17\.d, z21\.d, z27\.d
diff --git a/gas/testsuite/gas/aarch64/sve-movprfx-mm.s b/gas/testsuite/gas/aarch64/sve-movprfx-mm.s
new file mode 100644
index 0000000..6af5844
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/sve-movprfx-mm.s
@@ -0,0 +1,25 @@
+/* MOVPRFX tests for matrix multiply instructions */
+
+movprfx z17, z0
+smmla z17.s, z21.b, z27.b
+
+movprfx z17, z0
+ummla z17.s, z21.b, z27.b
+
+movprfx z17, z0
+usmmla z17.s, z21.b, z27.b
+
+movprfx z17, z0
+usdot z17.s, z21.b, z27.b
+
+movprfx z17, z0
+usdot z17.s, z21.b, z7.b[3]
+
+movprfx z17, z0
+sudot z17.s, z21.b, z7.b[3]
+
+movprfx z17, z0
+fmmla z17.s, z21.s, z27.s
+
+movprfx z17, z0
+fmmla z17.d, z21.d, z27.d