From 5437a02abc9fe106054965828787e8f232692935 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 3 Jan 2020 10:16:44 +0100 Subject: Arm64: correct address index operands for LD1RO{H,W,D} Just like their LD1RQ{H,W,D} counterparts, as per the specification the index registers get scaled by element size. --- gas/ChangeLog | 5 +++++ gas/testsuite/gas/aarch64/f64mm.d | 24 ++++++++++++------------ gas/testsuite/gas/aarch64/f64mm.s | 24 ++++++++++++------------ opcodes/ChangeLog | 11 ++++++++--- opcodes/aarch64-tbl.h | 8 ++++---- 5 files changed, 41 insertions(+), 31 deletions(-) diff --git a/gas/ChangeLog b/gas/ChangeLog index 4a8bb54..41959ae 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,5 +1,10 @@ 2020-01-03 Jan Beulich + * testsuite/gas/aarch64/f64mm.s: Scale index of LD1RO{H,W,D}. + * testsuite/gas/aarch64/f64mm.d: Adjust expectations. + +2020-01-03 Jan Beulich + * testsuite/gas/aarch64/i8mm.s: Add 128-bit form tests for by-element usdot. Add 64-bit form tests for by-element sudot. * testsuite/gas/aarch64/i8mm.d: Adjust expectations. diff --git a/gas/testsuite/gas/aarch64/f64mm.d b/gas/testsuite/gas/aarch64/f64mm.d index e9ec694..35c0853 100644 --- a/gas/testsuite/gas/aarch64/f64mm.d +++ b/gas/testsuite/gas/aarch64/f64mm.d @@ -10,20 +10,20 @@ Disassembly of section \.text: *[0-9a-f]+: 64e0e400 fmmla z0\.d, z0\.d, z0\.d *[0-9a-f]+: a43b17f1 ld1rob {z17\.b}, p5/z, \[sp, x27\] *[0-9a-f]+: a42003e0 ld1rob {z0\.b}, p0/z, \[sp, x0\] - *[0-9a-f]+: a4bb17f1 ld1roh {z17\.h}, p5/z, \[sp, x27\] - *[0-9a-f]+: a4a003e0 ld1roh {z0\.h}, p0/z, \[sp, x0\] - *[0-9a-f]+: a53b17f1 ld1row {z17\.s}, p5/z, \[sp, x27\] - *[0-9a-f]+: a52003e0 ld1row {z0\.s}, p0/z, \[sp, x0\] - *[0-9a-f]+: a5bb17f1 ld1rod {z17\.d}, p5/z, \[sp, x27\] - *[0-9a-f]+: a5a003e0 ld1rod {z0\.d}, p0/z, \[sp, x0\] + *[0-9a-f]+: a4bb17f1 ld1roh {z17\.h}, p5/z, \[sp, x27, lsl #1\] + *[0-9a-f]+: a4a003e0 ld1roh {z0\.h}, p0/z, \[sp, x0, lsl #1\] + *[0-9a-f]+: a53b17f1 ld1row {z17\.s}, p5/z, \[sp, x27, lsl #2\] + *[0-9a-f]+: a52003e0 ld1row {z0\.s}, p0/z, \[sp, x0, lsl #2\] + *[0-9a-f]+: a5bb17f1 ld1rod {z17\.d}, p5/z, \[sp, x27, lsl #3\] + *[0-9a-f]+: a5a003e0 ld1rod {z0\.d}, p0/z, \[sp, x0, lsl #3\] *[0-9a-f]+: a43b1411 ld1rob {z17\.b}, p5/z, \[x0, x27\] *[0-9a-f]+: a4200000 ld1rob {z0\.b}, p0/z, \[x0, x0\] - *[0-9a-f]+: a4bb1411 ld1roh {z17\.h}, p5/z, \[x0, x27\] - *[0-9a-f]+: a4a00000 ld1roh {z0\.h}, p0/z, \[x0, x0\] - *[0-9a-f]+: a53b1411 ld1row {z17\.s}, p5/z, \[x0, x27\] - *[0-9a-f]+: a5200000 ld1row {z0\.s}, p0/z, \[x0, x0\] - *[0-9a-f]+: a5bb1411 ld1rod {z17\.d}, p5/z, \[x0, x27\] - *[0-9a-f]+: a5a00000 ld1rod {z0\.d}, p0/z, \[x0, x0\] + *[0-9a-f]+: a4bb1411 ld1roh {z17\.h}, p5/z, \[x0, x27, lsl #1\] + *[0-9a-f]+: a4a00000 ld1roh {z0\.h}, p0/z, \[x0, x0, lsl #1\] + *[0-9a-f]+: a53b1411 ld1row {z17\.s}, p5/z, \[x0, x27, lsl #2\] + *[0-9a-f]+: a5200000 ld1row {z0\.s}, p0/z, \[x0, x0, lsl #2\] + *[0-9a-f]+: a5bb1411 ld1rod {z17\.d}, p5/z, \[x0, x27, lsl #3\] + *[0-9a-f]+: a5a00000 ld1rod {z0\.d}, p0/z, \[x0, x0, lsl #3\] *[0-9a-f]+: a42037f1 ld1rob {z17\.b}, p5/z, \[sp\] *[0-9a-f]+: a42723e0 ld1rob {z0\.b}, p0/z, \[sp, #224\] *[0-9a-f]+: a42823e0 ld1rob {z0\.b}, p0/z, \[sp, #-256\] diff --git a/gas/testsuite/gas/aarch64/f64mm.s b/gas/testsuite/gas/aarch64/f64mm.s index cfe6b17..a58b3e9 100644 --- a/gas/testsuite/gas/aarch64/f64mm.s +++ b/gas/testsuite/gas/aarch64/f64mm.s @@ -13,21 +13,21 @@ fmmla z0.d, z0.d, z0.d ld1rob { z17.b }, p5/z, [sp, x27] ld1rob { z0.b }, p0/z, [sp, x0] -ld1roh { z17.h }, p5/z, [sp, x27] -ld1roh { z0.h }, p0/z, [sp, x0] -ld1row { z17.s }, p5/z, [sp, x27] -ld1row { z0.s }, p0/z, [sp, x0] -ld1rod { z17.d }, p5/z, [sp, x27] -ld1rod { z0.d }, p0/z, [sp, x0] +ld1roh { z17.h }, p5/z, [sp, x27, lsl #1] +ld1roh { z0.h }, p0/z, [sp, x0, lsl #1] +ld1row { z17.s }, p5/z, [sp, x27, lsl #2] +ld1row { z0.s }, p0/z, [sp, x0, lsl #2] +ld1rod { z17.d }, p5/z, [sp, x27, lsl #3] +ld1rod { z0.d }, p0/z, [sp, x0, lsl #3] ld1rob { z17.b }, p5/z, [x0, x27] ld1rob { z0.b }, p0/z, [x0, x0] -ld1roh { z17.h }, p5/z, [x0, x27] -ld1roh { z0.h }, p0/z, [x0, x0] -ld1row { z17.s }, p5/z, [x0, x27] -ld1row { z0.s }, p0/z, [x0, x0] -ld1rod { z17.d }, p5/z, [x0, x27] -ld1rod { z0.d }, p0/z, [x0, x0] +ld1roh { z17.h }, p5/z, [x0, x27, lsl #1] +ld1roh { z0.h }, p0/z, [x0, x0, lsl #1] +ld1row { z17.s }, p5/z, [x0, x27, lsl #2] +ld1row { z0.s }, p0/z, [x0, x0, lsl #2] +ld1rod { z17.d }, p5/z, [x0, x27, lsl #3] +ld1rod { z0.d }, p0/z, [x0, x0, lsl #3] ld1rob { z17.b }, p5/z, [sp, #0] ld1rob { z0.b }, p0/z, [sp, #224] diff --git a/opcodes/ChangeLog b/opcodes/ChangeLog index 19a7b3f..fb9f9e0 100644 --- a/opcodes/ChangeLog +++ b/opcodes/ChangeLog @@ -1,17 +1,22 @@ 2020-01-03 Jan Beulich - * opcodes/aarch64-tbl.h (aarch64_opcode_table): Correct SIMD + * aarch64-tbl.h (aarch64_opcode_table): Use + SVE_ADDR_RX_LSL{1,2,3} for LD1RO{H,W,D}. + +2020-01-03 Jan Beulich + + * aarch64-tbl.h (aarch64_opcode_table): Correct SIMD forms of SUDOT and USDOT. 2020-01-03 Jan Beulich - * opcodes/aarch64-tbl.h (aarch64_opcode_table): Drop 'i' from + * aarch64-tbl.h (aarch64_opcode_table): Drop 'i' from uzip{1,2}. * opcodes/aarch64-dis-2.c: Re-generate. 2020-01-03 Jan Beulich - * opcodes/aarch64-tbl.h (aarch64_opcode_table): Correct 64-bit + * aarch64-tbl.h (aarch64_opcode_table): Correct 64-bit FMMLA encoding. * opcodes/aarch64-dis-2.c: Re-generate. diff --git a/opcodes/aarch64-tbl.h b/opcodes/aarch64-tbl.h index 2655ca5..48872e4 100644 --- a/opcodes/aarch64-tbl.h +++ b/opcodes/aarch64-tbl.h @@ -5074,10 +5074,10 @@ struct aarch64_opcode aarch64_opcode_table[] = INT8MATMUL_SVE_INSNC ("sudot", 0x44a01c00, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_INDEX), OP_SVE_SBB, 0, C_SCAN_MOVPRFX, 0), F32MATMUL_SVE_INSNC ("fmmla", 0x64a0e400, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_S, 0, C_SCAN_MOVPRFX, 0), F64MATMUL_SVE_INSNC ("fmmla", 0x64e0e400, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_D, 0, C_SCAN_MOVPRFX, 0), - F64MATMUL_SVE_INSN ("ld1rob", 0xa4200000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX), OP_SVE_BZU, F_OD(1), 0), - F64MATMUL_SVE_INSN ("ld1roh", 0xa4a00000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX), OP_SVE_HZU, F_OD(1), 0), - F64MATMUL_SVE_INSN ("ld1row", 0xa5200000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX), OP_SVE_SZU, F_OD(1), 0), - F64MATMUL_SVE_INSN ("ld1rod", 0xa5a00000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX), OP_SVE_DZU, F_OD(1), 0), + F64MATMUL_SVE_INSN ("ld1rob", 0xa4200000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX), OP_SVE_BZU, F_OD(1), 0), + F64MATMUL_SVE_INSN ("ld1roh", 0xa4a00000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX_LSL1), OP_SVE_HZU, F_OD(1), 0), + F64MATMUL_SVE_INSN ("ld1row", 0xa5200000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX_LSL2), OP_SVE_SZU, F_OD(1), 0), + F64MATMUL_SVE_INSN ("ld1rod", 0xa5a00000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX_LSL3), OP_SVE_DZU, F_OD(1), 0), F64MATMUL_SVE_INSN ("ld1rob", 0xa4202000, 0xfff0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_BZU, F_OD(1), 0), F64MATMUL_SVE_INSN ("ld1roh", 0xa4a02000, 0xfff0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_HZU, F_OD(1), 0), F64MATMUL_SVE_INSN ("ld1row", 0xa5202000, 0xfff0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_SZU, F_OD(1), 0), -- cgit v1.1