diff options
author | Jiong Wang <jiong.wang@arm.com> | 2016-03-16 16:11:59 +0000 |
---|---|---|
committer | Jiong Wang <jiong.wang@arm.com> | 2016-03-16 16:11:59 +0000 |
commit | cc9333013723880949a1e07a3e04bdbc1a3c3032 (patch) | |
tree | 94d4c8bbf3879eff4ba715ce6eba6ab80a3549a7 | |
parent | 6b94a855beef3a301509dfac6bf5446d1953524b (diff) | |
download | gdb-cc9333013723880949a1e07a3e04bdbc1a3c3032.zip gdb-cc9333013723880949a1e07a3e04bdbc1a3c3032.tar.gz gdb-cc9333013723880949a1e07a3e04bdbc1a3c3032.tar.bz2 |
[ARM] Support ARMv8.2 FP16 simd instructions
gas/
* config/tc-arm.c (N_S_32): New.
(N_F_16_32): Likewise.
(N_SUF_32): Support N_F16.
(N_IF_32): Likewise.
(neon_dyadic_misc): Likewise.
(do_neon_cmp): Likewise.
(do_neon_cmp_inv): Likewise.
(do_neon_mul): Likewise.
(do_neon_fcmp_absolute): Likewise.
(do_neon_step): Likewise.
(do_neon_abs_neg): Likewise.
(CVT_FLAVOR_VAR): Likewise.
(do_neon_cvt_1): Likewise.
(do_neon_recip_est): Likewise.
(do_vmaxnm): Likewise.
(do_vrint_1): Likewise.
(neon_check_type): Check architecture support for FP16 extension.
(insns): Update comments.
* testsuite/gas/arm/armv8-2-fp16-simd.s: New test source.
* testsuite/gas/arm/armv8-2-fp16-simd.d: New testcase for arm mode.
* testsuite/gas/arm/armv8-2-fp16-simd-thumb.d: Likewise for thumb mode.
* testsuite/gas/arm/armv8-2-fp16-simd-warning.d: New rejection test for
arm mode.
* testsuite/gas/arm/armv8-2-fp16-simd-warning-thumb.d: Likewise for
thumb mode.
* testsuite/gas/arm/armv8-2-fp16-simd-warning.l: New expected rejection
error file.
opcode/
* arm-dis.c (neon_opcodes): Support new FP16 instructions.
-rw-r--r-- | gas/ChangeLog | 30 | ||||
-rw-r--r-- | gas/config/tc-arm.c | 96 | ||||
-rw-r--r-- | gas/testsuite/gas/arm/armv8-2-fp16-simd-thumb.d | 147 | ||||
-rw-r--r-- | gas/testsuite/gas/arm/armv8-2-fp16-simd-warning-thumb.d | 4 | ||||
-rw-r--r-- | gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.d | 4 | ||||
-rw-r--r-- | gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.l | 137 | ||||
-rw-r--r-- | gas/testsuite/gas/arm/armv8-2-fp16-simd.d | 147 | ||||
-rw-r--r-- | gas/testsuite/gas/arm/armv8-2-fp16-simd.s | 224 | ||||
-rw-r--r-- | opcodes/ChangeLog | 4 | ||||
-rw-r--r-- | opcodes/arm-dis.c | 104 |
10 files changed, 851 insertions, 46 deletions
diff --git a/gas/ChangeLog b/gas/ChangeLog index 0f1c13b..42dad37 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,3 +1,33 @@ +2016-03-16 Jiong Wang <jiong.wang@arm.com> + + * config/tc-arm.c (N_S_32): New. + (N_F_16_32): Likewise. + (N_SUF_32): Support N_F16. + (N_IF_32): Likewise. + (neon_dyadic_misc): Likewise. + (do_neon_cmp): Likewise. + (do_neon_cmp_inv): Likewise. + (do_neon_mul): Likewise. + (do_neon_fcmp_absolute): Likewise. + (do_neon_step): Likewise. + (do_neon_abs_neg): Likewise. + (CVT_FLAVOR_VAR): Likewise. + (do_neon_cvt_1): Likewise. + (do_neon_recip_est): Likewise. + (do_vmaxnm): Likewise. + (do_vrint_1): Likewise. + (neon_check_type): Check architecture support for FP16 extension. + (insns): Update comments. + * testsuite/gas/arm/armv8-2-fp16-simd.s: New test source. + * testsuite/gas/arm/armv8-2-fp16-simd.d: New testcase for arm mode. + * testsuite/gas/arm/armv8-2-fp16-simd-thumb.d: Likewise for thumb mode. + * testsuite/gas/arm/armv8-2-fp16-simd-warning.d: New rejection test for + arm mode. + * testsuite/gas/arm/armv8-2-fp16-simd-warning-thumb.d: Likewise for + thumb mode. + * testsuite/gas/arm/armv8-2-fp16-simd-warning.l: New expected rejection + error file. + 2016-03-16 Nick Clifton <nickc@redhat.com> * read.c (emit_expr_with_reloc): Add code check a bignum with diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c index 16426b7..5b3cf59 100644 --- a/gas/config/tc-arm.c +++ b/gas/config/tc-arm.c @@ -13443,9 +13443,11 @@ enum neon_type_mask #define N_SU_ALL (N_S8 | N_S16 | N_S32 | N_S64 | N_U8 | N_U16 | N_U32 | N_U64) #define N_SU_32 (N_S8 | N_S16 | N_S32 | N_U8 | N_U16 | N_U32) #define N_SU_16_64 (N_S16 | N_S32 | N_S64 | N_U16 | N_U32 | N_U64) -#define N_SUF_32 (N_SU_32 | N_F32) +#define N_S_32 (N_S8 | N_S16 | N_S32) +#define N_F_16_32 (N_F16 | N_F32) +#define N_SUF_32 (N_SU_32 | N_F_16_32) #define N_I_ALL (N_I8 | N_I16 | N_I32 | N_I64) -#define N_IF_32 (N_I8 | N_I16 | N_I32 | N_F32) +#define N_IF_32 (N_I8 | N_I16 | N_I32 | N_F16 | N_F32) #define N_F_ALL (N_F16 | N_F32 | N_F64) /* Pass this as the first type argument to neon_check_type to ignore types @@ -13915,6 +13917,15 @@ neon_check_type (unsigned els, enum neon_shape ns, ...) k_type = g_type; k_size = g_size; key_allowed = thisarg & ~N_KEY; + + /* Check architecture constraint on FP16 extension. */ + if (k_size == 16 + && k_type == NT_float + && ! ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16)) + { + inst.error = _(BAD_FP16); + return badtype; + } } } else @@ -14726,7 +14737,7 @@ neon_dyadic_misc (enum neon_el_type ubit_meaning, unsigned types, if (et.type == NT_float) { NEON_ENCODE (FLOAT, inst); - neon_three_same (neon_quad (rs), 0, -1); + neon_three_same (neon_quad (rs), 0, et.size == 16 ? (int) et.size : -1); } else { @@ -14887,13 +14898,13 @@ neon_compare (unsigned regtypes, unsigned immtypes, int invert) static void do_neon_cmp (void) { - neon_compare (N_SUF_32, N_S8 | N_S16 | N_S32 | N_F32, FALSE); + neon_compare (N_SUF_32, N_S_32 | N_F_16_32, FALSE); } static void do_neon_cmp_inv (void) { - neon_compare (N_SUF_32, N_S8 | N_S16 | N_S32 | N_F32, TRUE); + neon_compare (N_SUF_32, N_S_32 | N_F_16_32, TRUE); } static void @@ -15021,7 +15032,7 @@ do_neon_mul (void) if (inst.operands[2].isscalar) do_neon_mac_maybe_scalar (); else - neon_dyadic_misc (NT_poly, N_I8 | N_I16 | N_I32 | N_F32 | N_P8, 0); + neon_dyadic_misc (NT_poly, N_I8 | N_I16 | N_I32 | N_F16 | N_F32 | N_P8, 0); } static void @@ -15082,9 +15093,10 @@ static void do_neon_fcmp_absolute (void) { enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL); - neon_check_type (3, rs, N_EQK, N_EQK, N_F32 | N_KEY); + struct neon_type_el et = neon_check_type (3, rs, N_EQK, N_EQK, + N_F_16_32 | N_KEY); /* Size field comes from bit mask. */ - neon_three_same (neon_quad (rs), 1, -1); + neon_three_same (neon_quad (rs), 1, et.size == 16 ? (int) et.size : -1); } static void @@ -15098,8 +15110,9 @@ static void do_neon_step (void) { enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL); - neon_check_type (3, rs, N_EQK, N_EQK, N_F32 | N_KEY); - neon_three_same (neon_quad (rs), 0, -1); + struct neon_type_el et = neon_check_type (3, rs, N_EQK, N_EQK, + N_F_16_32 | N_KEY); + neon_three_same (neon_quad (rs), 0, et.size == 16 ? (int) et.size : -1); } static void @@ -15115,7 +15128,7 @@ do_neon_abs_neg (void) return; rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); - et = neon_check_type (2, rs, N_EQK, N_S8 | N_S16 | N_S32 | N_F32 | N_KEY); + et = neon_check_type (2, rs, N_EQK, N_S_32 | N_F_16_32 | N_KEY); inst.instruction |= LOW4 (inst.operands[0].reg) << 12; inst.instruction |= HI1 (inst.operands[0].reg) << 22; @@ -15324,6 +15337,10 @@ do_neon_shll (void) CVT_VAR (f32_s32, N_F32, N_S32, whole_reg, "fsltos", "fsitos", NULL) \ CVT_VAR (f32_u32, N_F32, N_U32, whole_reg, "fultos", "fuitos", NULL) \ /* Half-precision conversions. */ \ + CVT_VAR (s16_f16, N_S16, N_F16 | N_KEY, whole_reg, NULL, NULL, NULL) \ + CVT_VAR (u16_f16, N_U16, N_F16 | N_KEY, whole_reg, NULL, NULL, NULL) \ + CVT_VAR (f16_s16, N_F16 | N_KEY, N_S16, whole_reg, NULL, NULL, NULL) \ + CVT_VAR (f16_u16, N_F16 | N_KEY, N_U16, whole_reg, NULL, NULL, NULL) \ CVT_VAR (f32_f16, N_F32, N_F16, whole_reg, NULL, NULL, NULL) \ CVT_VAR (f16_f32, N_F16, N_F32, whole_reg, NULL, NULL, NULL) \ /* New VCVT instructions introduced by ARMv8.2 fp16 extension. \ @@ -15556,10 +15573,15 @@ do_neon_cvt_1 (enum neon_cvt_mode mode) NS_NULL); enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs); + if (flavour == neon_cvt_flavour_invalid) + return; + /* PR11109: Handle round-to-zero for VCVT conversions. */ if (mode == neon_cvt_mode_z && ARM_CPU_HAS_FEATURE (cpu_variant, fpu_arch_vfp_v2) - && (flavour == neon_cvt_flavour_s32_f32 + && (flavour == neon_cvt_flavour_s16_f16 + || flavour == neon_cvt_flavour_u16_f16 + || flavour == neon_cvt_flavour_s32_f32 || flavour == neon_cvt_flavour_u32_f32 || flavour == neon_cvt_flavour_s32_f64 || flavour == neon_cvt_flavour_u32_f64) @@ -15598,7 +15620,8 @@ do_neon_cvt_1 (enum neon_cvt_mode mode) case NS_QQI: { unsigned immbits; - unsigned enctab[] = { 0x0000100, 0x1000100, 0x0, 0x1000000 }; + unsigned enctab[] = {0x0000100, 0x1000100, 0x0, 0x1000000, + 0x0000100, 0x1000100, 0x0, 0x1000000}; if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL) return; @@ -15607,7 +15630,6 @@ do_neon_cvt_1 (enum neon_cvt_mode mode) integer conversion. */ if (inst.operands[2].present && inst.operands[2].imm == 0) goto int_encode; - immbits = 32 - inst.operands[2].imm; NEON_ENCODE (IMMED, inst); if (flavour != neon_cvt_flavour_invalid) inst.instruction |= enctab[flavour]; @@ -15617,7 +15639,19 @@ do_neon_cvt_1 (enum neon_cvt_mode mode) inst.instruction |= HI1 (inst.operands[1].reg) << 5; inst.instruction |= neon_quad (rs) << 6; inst.instruction |= 1 << 21; - inst.instruction |= immbits << 16; + if (flavour < neon_cvt_flavour_s16_f16) + { + inst.instruction |= 1 << 21; + immbits = 32 - inst.operands[2].imm; + inst.instruction |= immbits << 16; + } + else + { + inst.instruction |= 3 << 20; + immbits = 16 - inst.operands[2].imm; + inst.instruction |= immbits << 16; + inst.instruction &= ~(1 << 9); + } neon_dp_fixup (&inst); } @@ -15638,8 +15672,14 @@ do_neon_cvt_1 (enum neon_cvt_mode mode) inst.instruction |= LOW4 (inst.operands[1].reg); inst.instruction |= HI1 (inst.operands[1].reg) << 5; inst.instruction |= neon_quad (rs) << 6; - inst.instruction |= (flavour == neon_cvt_flavour_u32_f32) << 7; + inst.instruction |= (flavour == neon_cvt_flavour_u16_f16 + || flavour == neon_cvt_flavour_u32_f32) << 7; inst.instruction |= mode << 8; + if (flavour == neon_cvt_flavour_u16_f16 + || flavour == neon_cvt_flavour_s16_f16) + /* Mask off the original size bits and reencode them. */ + inst.instruction = ((inst.instruction & 0xfff3ffff) | (1 << 18)); + if (thumb_mode) inst.instruction |= 0xfc000000; else @@ -15649,7 +15689,8 @@ do_neon_cvt_1 (enum neon_cvt_mode mode) { int_encode: { - unsigned enctab[] = { 0x100, 0x180, 0x0, 0x080 }; + unsigned enctab[] = { 0x100, 0x180, 0x0, 0x080, + 0x100, 0x180, 0x0, 0x080}; NEON_ENCODE (INTEGER, inst); @@ -15664,7 +15705,12 @@ do_neon_cvt_1 (enum neon_cvt_mode mode) inst.instruction |= LOW4 (inst.operands[1].reg); inst.instruction |= HI1 (inst.operands[1].reg) << 5; inst.instruction |= neon_quad (rs) << 6; - inst.instruction |= 2 << 18; + if (flavour >= neon_cvt_flavour_s16_f16 + && flavour <= neon_cvt_flavour_f16_u16) + /* Half precision. */ + inst.instruction |= 1 << 18; + else + inst.instruction |= 2 << 18; neon_dp_fixup (&inst); } @@ -16487,7 +16533,7 @@ do_neon_recip_est (void) { enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); struct neon_type_el et = neon_check_type (2, rs, - N_EQK | N_FLT, N_F32 | N_U32 | N_KEY); + N_EQK | N_FLT, N_F_16_32 | N_U32 | N_KEY); inst.instruction |= (et.type == NT_float) << 8; neon_two_same (neon_quad (rs), 1, et.size); } @@ -17002,7 +17048,7 @@ do_vmaxnm (void) if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH8) == FAIL) return; - neon_dyadic_misc (NT_untyped, N_F32, 0); + neon_dyadic_misc (NT_untyped, N_F_16_32, 0); } static void @@ -17058,7 +17104,7 @@ do_vrint_1 (enum neon_cvt_mode mode) { /* Neon encodings (or something broken...). */ inst.error = NULL; - et = neon_check_type (2, rs, N_EQK, N_F32 | N_KEY); + et = neon_check_type (2, rs, N_EQK, N_F_16_32 | N_KEY); if (et.type == NT_invtype) return; @@ -17074,6 +17120,10 @@ do_vrint_1 (enum neon_cvt_mode mode) inst.instruction |= LOW4 (inst.operands[1].reg); inst.instruction |= HI1 (inst.operands[1].reg) << 5; inst.instruction |= neon_quad (rs) << 6; + /* Mask off the original size bits and reencode them. */ + inst.instruction = ((inst.instruction & 0xfff3ffff) + | neon_logbits (et.size) << 18); + switch (mode) { case neon_cvt_mode_z: inst.instruction |= 3 << 7; break; @@ -20315,7 +20365,7 @@ static const struct asm_opcode insns[] = NUF(vbitq, 1200110, 3, (RNQ, RNQ, RNQ), neon_bitfield), NUF(vbif, 1300110, 3, (RNDQ, RNDQ, RNDQ), neon_bitfield), NUF(vbifq, 1300110, 3, (RNQ, RNQ, RNQ), neon_bitfield), - /* Int and float variants, types S8 S16 S32 U8 U16 U32 F32. */ + /* Int and float variants, types S8 S16 S32 U8 U16 U32 F16 F32. */ nUF(vabd, _vabd, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su), nUF(vabdq, _vabd, 3, (RNQ, oRNQ, RNQ), neon_dyadic_if_su), nUF(vmax, _vmax, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su), @@ -20478,7 +20528,7 @@ static const struct asm_opcode insns[] = NUF(vpadalq, 1b00600, 2, (RNQ, RNQ), neon_pair_long), NUF(vpaddl, 1b00200, 2, (RNDQ, RNDQ), neon_pair_long), NUF(vpaddlq, 1b00200, 2, (RNQ, RNQ), neon_pair_long), - /* Reciprocal estimates. Types U32 F32. */ + /* Reciprocal estimates. Types U32 F16 F32. */ NUF(vrecpe, 1b30400, 2, (RNDQ, RNDQ), neon_recip_est), NUF(vrecpeq, 1b30400, 2, (RNQ, RNQ), neon_recip_est), NUF(vrsqrte, 1b30480, 2, (RNDQ, RNDQ), neon_recip_est), diff --git a/gas/testsuite/gas/arm/armv8-2-fp16-simd-thumb.d b/gas/testsuite/gas/arm/armv8-2-fp16-simd-thumb.d new file mode 100644 index 0000000..5578b9b --- /dev/null +++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd-thumb.d @@ -0,0 +1,147 @@ +#name: ARM v8.2 FP16 support on SIMD (Thumb) +#source: armv8-2-fp16-simd.s +#objdump: -d +#as: -march=armv8.2-a+fp16 -mfpu=neon-fp-armv8 -mthumb +#skip: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd + +.*: +file format .*arm.* + +Disassembly of section .text: + +00000000 <func>: + 0: ff34 2d0e vabd.f16 d2, d4, d14 + 4: ff38 4d6c vabd.f16 q2, q4, q14 + 8: ef14 2f0e vmax.f16 d2, d4, d14 + c: ef18 4f6c vmax.f16 q2, q4, q14 + 10: ef34 2f0e vmin.f16 d2, d4, d14 + 14: ef38 4f6c vmin.f16 q2, q4, q14 + 18: ff30 0dec vabd.f16 q0, q8, q14 + 1c: ef10 0fec vmax.f16 q0, q8, q14 + 20: ef30 0fec vmin.f16 q0, q8, q14 + 24: ff33 1d0f vabd.f16 d1, d3, d15 + 28: ff31 0d08 vabd.f16 d0, d1, d8 + 2c: ffb5 0708 vabs.f16 d0, d8 + 30: ffb5 0760 vabs.f16 q0, q8 + 34: ffb5 0788 vneg.f16 d0, d8 + 38: ffb5 07e0 vneg.f16 q0, q8 + 3c: ffb5 474c vabs.f16 q2, q6 + 40: ffb5 47cc vneg.f16 q2, q6 + 44: ffb5 7703 vabs.f16 d7, d3 + 48: ffb5 9781 vneg.f16 d9, d1 + 4c: ff14 2e1e vacge.f16 d2, d4, d14 + 50: ff18 4e7c vacge.f16 q2, q4, q14 + 54: ff34 2e1e vacgt.f16 d2, d4, d14 + 58: ff38 4e7c vacgt.f16 q2, q4, q14 + 5c: ff3e 2e14 vacgt.f16 d2, d14, d4 + 60: ff3c 4ed8 vacgt.f16 q2, q14, q4 + 64: ff1e 2e14 vacge.f16 d2, d14, d4 + 68: ff1c 4ed8 vacge.f16 q2, q14, q4 + 6c: ef14 2e0e vceq.f16 d2, d4, d14 + 70: ef18 4e6c vceq.f16 q2, q4, q14 + 74: ff14 2e0e vcge.f16 d2, d4, d14 + 78: ff18 4e6c vcge.f16 q2, q4, q14 + 7c: ff34 2e0e vcgt.f16 d2, d4, d14 + 80: ff38 4e6c vcgt.f16 q2, q4, q14 + 84: ff1e 2e04 vcge.f16 d2, d14, d4 + 88: ff1c 4ec8 vcge.f16 q2, q14, q4 + 8c: ff3e 2e04 vcgt.f16 d2, d14, d4 + 90: ff3c 4ec8 vcgt.f16 q2, q14, q4 + 94: ff10 0efc vacge.f16 q0, q8, q14 + 98: ff30 0efc vacgt.f16 q0, q8, q14 + 9c: ff3c 0ef0 vacgt.f16 q0, q14, q8 + a0: ff1c 0ef0 vacge.f16 q0, q14, q8 + a4: ef10 0eec vceq.f16 q0, q8, q14 + a8: ff10 0eec vcge.f16 q0, q8, q14 + ac: ff30 0eec vcgt.f16 q0, q8, q14 + b0: ff1c 0ee0 vcge.f16 q0, q14, q8 + b4: ff3c 0ee0 vcgt.f16 q0, q14, q8 + b8: ef14 2d0e vadd.f16 d2, d4, d14 + bc: ef18 4d6c vadd.f16 q2, q4, q14 + c0: ef34 2d0e vsub.f16 d2, d4, d14 + c4: ef38 4d6c vsub.f16 q2, q4, q14 + c8: ef10 0dec vadd.f16 q0, q8, q14 + cc: ef30 0dec vsub.f16 q0, q8, q14 + d0: ff14 2f1e vmaxnm.f16 d2, d4, d14 + d4: ff18 4f7c vmaxnm.f16 q2, q4, q14 + d8: ff34 2f1e vminnm.f16 d2, d4, d14 + dc: ff38 4f7c vminnm.f16 q2, q4, q14 + e0: ef14 2c1e vfma.f16 d2, d4, d14 + e4: ef18 4c7c vfma.f16 q2, q4, q14 + e8: ef34 2c1e vfms.f16 d2, d4, d14 + ec: ef38 4c7c vfms.f16 q2, q4, q14 + f0: ef14 2d1e vmla.f16 d2, d4, d14 + f4: ef18 4d7c vmla.f16 q2, q4, q14 + f8: ef34 2d1e vmls.f16 d2, d4, d14 + fc: ef38 4d7c vmls.f16 q2, q4, q14 + 100: ffb6 458e vrintz.f16 d4, d14 + 104: ffb6 85ec vrintz.f16 q4, q14 + 108: ffb6 448e vrintx.f16 d4, d14 + 10c: ffb6 84ec vrintx.f16 q4, q14 + 110: ffb6 450e vrinta.f16 d4, d14 + 114: ffb6 856c vrinta.f16 q4, q14 + 118: ffb6 440e vrintn.f16 d4, d14 + 11c: ffb6 846c vrintn.f16 q4, q14 + 120: ffb6 478e vrintp.f16 d4, d14 + 124: ffb6 87ec vrintp.f16 q4, q14 + 128: ffb6 468e vrintm.f16 d4, d14 + 12c: ffb6 86ec vrintm.f16 q4, q14 + 130: ff18 4d0e vpadd.f16 d4, d8, d14 + 134: ffb7 4508 vrecpe.f16 d4, d8 + 138: ffb7 8560 vrecpe.f16 q4, q8 + 13c: ffb7 4588 vrsqrte.f16 d4, d8 + 140: ffb7 85e0 vrsqrte.f16 q4, q8 + 144: ffb7 0564 vrecpe.f16 q0, q10 + 148: ffb7 05e4 vrsqrte.f16 q0, q10 + 14c: ef1a 8f1c vrecps.f16 d8, d10, d12 + 150: ef54 0ff8 vrecps.f16 q8, q10, q12 + 154: ef3a 8f1c vrsqrts.f16 d8, d10, d12 + 158: ef74 0ff8 vrsqrts.f16 q8, q10, q12 + 15c: ef10 4f58 vrecps.f16 q2, q0, q4 + 160: ef30 4f58 vrsqrts.f16 q2, q0, q4 + 164: ff18 4f0e vpmax.f16 d4, d8, d14 + 168: ff38 af02 vpmin.f16 d10, d8, d2 + 16c: ff18 4d1e vmul.f16 d4, d8, d14 + 170: ff10 7d11 vmul.f16 d7, d0, d1 + 174: ff10 4dd0 vmul.f16 q2, q8, q0 + 178: ffb7 600c vcvta.s16.f16 d6, d12 + 17c: ffb7 c068 vcvta.s16.f16 q6, q12 + 180: ffb7 630c vcvtm.s16.f16 d6, d12 + 184: ffb7 c368 vcvtm.s16.f16 q6, q12 + 188: ffb7 610c vcvtn.s16.f16 d6, d12 + 18c: ffb7 c168 vcvtn.s16.f16 q6, q12 + 190: ffb7 620c vcvtp.s16.f16 d6, d12 + 194: ffb7 c268 vcvtp.s16.f16 q6, q12 + 198: ffb7 608c vcvta.u16.f16 d6, d12 + 19c: ffb7 c0e8 vcvta.u16.f16 q6, q12 + 1a0: ffb7 638c vcvtm.u16.f16 d6, d12 + 1a4: ffb7 c3e8 vcvtm.u16.f16 q6, q12 + 1a8: ffb7 618c vcvtn.u16.f16 d6, d12 + 1ac: ffb7 c1e8 vcvtn.u16.f16 q6, q12 + 1b0: ffb7 628c vcvtp.u16.f16 d6, d12 + 1b4: ffb7 c2e8 vcvtp.u16.f16 q6, q12 + 1b8: ffb7 e700 vcvt.s16.f16 d14, d0 + 1bc: fff7 c740 vcvt.s16.f16 q14, q0 + 1c0: ffb7 e780 vcvt.u16.f16 d14, d0 + 1c4: fff7 c7c0 vcvt.u16.f16 q14, q0 + 1c8: ffb7 e600 vcvt.f16.s16 d14, d0 + 1cc: fff7 c640 vcvt.f16.s16 q14, q0 + 1d0: ffb7 e680 vcvt.f16.u16 d14, d0 + 1d4: fff7 c6c0 vcvt.f16.u16 q14, q0 + 1d8: efbd ed10 vcvt.s16.f16 d14, d0, #3 + 1dc: effd cd50 vcvt.s16.f16 q14, q0, #3 + 1e0: ffbd ed10 vcvt.u16.f16 d14, d0, #3 + 1e4: fffd cd50 vcvt.u16.f16 q14, q0, #3 + 1e8: efbd ec10 vcvt.f16.s16 d14, d0, #3 + 1ec: effd cc50 vcvt.f16.s16 q14, q0, #3 + 1f0: ffbd ec10 vcvt.f16.u16 d14, d0, #3 + 1f4: fffd cc50 vcvt.f16.u16 q14, q0, #3 + 1f8: ffb5 e502 vceq.f16 d14, d2, #0 + 1fc: fff5 c544 vceq.f16 q14, q2, #0 + 200: ffb5 e482 vcge.f16 d14, d2, #0 + 204: fff5 c4c4 vcge.f16 q14, q2, #0 + 208: ffb5 e402 vcgt.f16 d14, d2, #0 + 20c: fff5 c444 vcgt.f16 q14, q2, #0 + 210: ffb5 e582 vcle.f16 d14, d2, #0 + 214: fff5 c5c4 vcle.f16 q14, q2, #0 + 218: ffb5 e602 vclt.f16 d14, d2, #0 + 21c: fff5 c644 vclt.f16 q14, q2, #0 diff --git a/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning-thumb.d b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning-thumb.d new file mode 100644 index 0000000..e78f080 --- /dev/null +++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning-thumb.d @@ -0,0 +1,4 @@ +#name: Reject ARM v8.2 FP16 SIMD instruction for early arch (Thumb) +#source: armv8-2-fp16-simd.s +#as: -march=armv8.2-a -mfpu=neon-fp-armv8 -mthumb +#error-output: armv8-2-fp16-simd-warning.l diff --git a/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.d b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.d new file mode 100644 index 0000000..d39c36d --- /dev/null +++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.d @@ -0,0 +1,4 @@ +#name: Reject ARM v8.2 FP16 SIMD instruction for early arch +#source: armv8-2-fp16-simd.s +#as: -march=armv8.2-a -mfpu=neon-fp-armv8 +#error-output: armv8-2-fp16-simd-warning.l diff --git a/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.l b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.l new file mode 100644 index 0000000..ba27f7c --- /dev/null +++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.l @@ -0,0 +1,137 @@ +[^:]*: Assembler messages: +[^:]*:163: Error: selected processor does not support fp16 instruction -- `vabd.f16 d2,d4,d14' +[^:]*:163: Error: selected processor does not support fp16 instruction -- `vabd.f16 q2,q4,q14' +[^:]*:163: Error: selected processor does not support fp16 instruction -- `vmax.f16 d2,d4,d14' +[^:]*:163: Error: selected processor does not support fp16 instruction -- `vmax.f16 q2,q4,q14' +[^:]*:163: Error: selected processor does not support fp16 instruction -- `vmin.f16 d2,d4,d14' +[^:]*:163: Error: selected processor does not support fp16 instruction -- `vmin.f16 q2,q4,q14' +[^:]*:164: Error: selected processor does not support fp16 instruction -- `vabdq.f16 q0,q8,q14' +[^:]*:164: Error: selected processor does not support fp16 instruction -- `vmaxq.f16 q0,q8,q14' +[^:]*:164: Error: selected processor does not support fp16 instruction -- `vminq.f16 q0,q8,q14' +[^:]*:165: Error: selected processor does not support fp16 instruction -- `vabd.f16 d1,d3,d15' +[^:]*:166: Error: selected processor does not support fp16 instruction -- `vabd.f16 d0,d1,d8' +[^:]*:169: Error: selected processor does not support fp16 instruction -- `vabs.f16 d0,d8' +[^:]*:169: Error: selected processor does not support fp16 instruction -- `vabs.f16 q0,q8' +[^:]*:169: Error: selected processor does not support fp16 instruction -- `vneg.f16 d0,d8' +[^:]*:169: Error: selected processor does not support fp16 instruction -- `vneg.f16 q0,q8' +[^:]*:170: Error: selected processor does not support fp16 instruction -- `vabsq.f16 q2,q6' +[^:]*:170: Error: selected processor does not support fp16 instruction -- `vnegq.f16 q2,q6' +[^:]*:171: Error: selected processor does not support fp16 instruction -- `vabs.f16 d7,d3' +[^:]*:172: Error: selected processor does not support fp16 instruction -- `vneg.f16 d9,d1' +[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacge.f16 d2,d4,d14' +[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacge.f16 q2,q4,q14' +[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacgt.f16 d2,d4,d14' +[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacgt.f16 q2,q4,q14' +[^:]*:175: Error: selected processor does not support fp16 instruction -- `vaclt.f16 d2,d4,d14' +[^:]*:175: Error: selected processor does not support fp16 instruction -- `vaclt.f16 q2,q4,q14' +[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacle.f16 d2,d4,d14' +[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacle.f16 q2,q4,q14' +[^:]*:175: Error: selected processor does not support fp16 instruction -- `vceq.f16 d2,d4,d14' +[^:]*:175: Error: selected processor does not support fp16 instruction -- `vceq.f16 q2,q4,q14' +[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcge.f16 d2,d4,d14' +[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcge.f16 q2,q4,q14' +[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcgt.f16 d2,d4,d14' +[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcgt.f16 q2,q4,q14' +[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcle.f16 d2,d4,d14' +[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcle.f16 q2,q4,q14' +[^:]*:175: Error: selected processor does not support fp16 instruction -- `vclt.f16 d2,d4,d14' +[^:]*:175: Error: selected processor does not support fp16 instruction -- `vclt.f16 q2,q4,q14' +[^:]*:176: Error: selected processor does not support fp16 instruction -- `vacgeq.f16 q0,q8,q14' +[^:]*:176: Error: selected processor does not support fp16 instruction -- `vacgtq.f16 q0,q8,q14' +[^:]*:176: Error: selected processor does not support fp16 instruction -- `vacltq.f16 q0,q8,q14' +[^:]*:176: Error: selected processor does not support fp16 instruction -- `vacleq.f16 q0,q8,q14' +[^:]*:176: Error: selected processor does not support fp16 instruction -- `vceqq.f16 q0,q8,q14' +[^:]*:176: Error: selected processor does not support fp16 instruction -- `vcgeq.f16 q0,q8,q14' +[^:]*:176: Error: selected processor does not support fp16 instruction -- `vcgtq.f16 q0,q8,q14' +[^:]*:176: Error: selected processor does not support fp16 instruction -- `vcleq.f16 q0,q8,q14' +[^:]*:176: Error: selected processor does not support fp16 instruction -- `vcltq.f16 q0,q8,q14' +[^:]*:179: Error: selected processor does not support fp16 instruction -- `vadd.f16 d2,d4,d14' +[^:]*:179: Error: selected processor does not support fp16 instruction -- `vadd.f16 q2,q4,q14' +[^:]*:179: Error: selected processor does not support fp16 instruction -- `vsub.f16 d2,d4,d14' +[^:]*:179: Error: selected processor does not support fp16 instruction -- `vsub.f16 q2,q4,q14' +[^:]*:180: Error: selected processor does not support fp16 instruction -- `vaddq.f16 q0,q8,q14' +[^:]*:180: Error: selected processor does not support fp16 instruction -- `vsubq.f16 q0,q8,q14' +[^:]*:183: Error: selected processor does not support fp16 instruction -- `vmaxnm.f16 d2,d4,d14' +[^:]*:183: Error: selected processor does not support fp16 instruction -- `vmaxnm.f16 q2,q4,q14' +[^:]*:183: Error: selected processor does not support fp16 instruction -- `vminnm.f16 d2,d4,d14' +[^:]*:183: Error: selected processor does not support fp16 instruction -- `vminnm.f16 q2,q4,q14' +[^:]*:186: Error: selected processor does not support fp16 instruction -- `vfma.f16 d2,d4,d14' +[^:]*:186: Error: selected processor does not support fp16 instruction -- `vfma.f16 q2,q4,q14' +[^:]*:186: Error: selected processor does not support fp16 instruction -- `vfms.f16 d2,d4,d14' +[^:]*:186: Error: selected processor does not support fp16 instruction -- `vfms.f16 q2,q4,q14' +[^:]*:189: Error: selected processor does not support fp16 instruction -- `vmla.f16 d2,d4,d14' +[^:]*:189: Error: selected processor does not support fp16 instruction -- `vmla.f16 q2,q4,q14' +[^:]*:189: Error: selected processor does not support fp16 instruction -- `vmls.f16 d2,d4,d14' +[^:]*:189: Error: selected processor does not support fp16 instruction -- `vmls.f16 q2,q4,q14' +[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintz.f16 d4,d14' +[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintz.f16 q4,q14' +[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintx.f16 d4,d14' +[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintx.f16 q4,q14' +[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrinta.f16 d4,d14' +[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrinta.f16 q4,q14' +[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintn.f16 d4,d14' +[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintn.f16 q4,q14' +[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintp.f16 d4,d14' +[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintp.f16 q4,q14' +[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintm.f16 d4,d14' +[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintm.f16 q4,q14' +[^:]*:195: Error: selected processor does not support fp16 instruction -- `vpadd.f16 d4,d8,d14' +[^:]*:198: Error: selected processor does not support fp16 instruction -- `vrecpe.f16 d4,d8' +[^:]*:198: Error: selected processor does not support fp16 instruction -- `vrecpe.f16 q4,q8' +[^:]*:198: Error: selected processor does not support fp16 instruction -- `vrsqrte.f16 d4,d8' +[^:]*:198: Error: selected processor does not support fp16 instruction -- `vrsqrte.f16 q4,q8' +[^:]*:199: Error: selected processor does not support fp16 instruction -- `vrecpeq.f16 q0,q10' +[^:]*:199: Error: selected processor does not support fp16 instruction -- `vrsqrteq.f16 q0,q10' +[^:]*:202: Error: selected processor does not support fp16 instruction -- `vrecps.f16 d8,d10,d12' +[^:]*:202: Error: selected processor does not support fp16 instruction -- `vrecps.f16 q8,q10,q12' +[^:]*:202: Error: selected processor does not support fp16 instruction -- `vrsqrts.f16 d8,d10,d12' +[^:]*:202: Error: selected processor does not support fp16 instruction -- `vrsqrts.f16 q8,q10,q12' +[^:]*:203: Error: selected processor does not support fp16 instruction -- `vrecpsq.f16 q2,q0,q4' +[^:]*:203: Error: selected processor does not support fp16 instruction -- `vrsqrtsq.f16 q2,q0,q4' +[^:]*:206: Error: selected processor does not support fp16 instruction -- `vpmax.f16 d4,d8,d14' +[^:]*:207: Error: selected processor does not support fp16 instruction -- `vpmin.f16 d10,d8,d2' +[^:]*:210: Error: selected processor does not support fp16 instruction -- `vmul.f16 d4,d8,d14' +[^:]*:211: Error: selected processor does not support fp16 instruction -- `vmul.f16 d7,d0,d1' +[^:]*:212: Error: selected processor does not support fp16 instruction -- `vmul.f16 q2,q8,q0' +[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvta.s16.f16 d6,d12' +[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvta.s16.f16 q6,q12' +[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtm.s16.f16 d6,d12' +[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtm.s16.f16 q6,q12' +[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtn.s16.f16 d6,d12' +[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtn.s16.f16 q6,q12' +[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtp.s16.f16 d6,d12' +[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtp.s16.f16 q6,q12' +[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvta.u16.f16 d6,d12' +[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvta.u16.f16 q6,q12' +[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtm.u16.f16 d6,d12' +[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtm.u16.f16 q6,q12' +[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtn.u16.f16 d6,d12' +[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtn.u16.f16 q6,q12' +[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtp.u16.f16 d6,d12' +[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtp.u16.f16 q6,q12' +[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.s16.f16 d14,d0' +[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.s16.f16 q14,q0' +[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.u16.f16 d14,d0' +[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.u16.f16 q14,q0' +[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.f16.s16 d14,d0' +[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.f16.s16 q14,q0' +[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.f16.u16 d14,d0' +[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.f16.u16 q14,q0' +[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.s16.f16 d14,d0,#3' +[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.s16.f16 q14,q0,#3' +[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.u16.f16 d14,d0,#3' +[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.u16.f16 q14,q0,#3' +[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.f16.s16 d14,d0,#3' +[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.f16.s16 q14,q0,#3' +[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.f16.u16 d14,d0,#3' +[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.f16.u16 q14,q0,#3' +[^:]*:224: Error: selected processor does not support fp16 instruction -- `vceq.f16 d14,d2,#0' +[^:]*:224: Error: selected processor does not support fp16 instruction -- `vceq.f16 q14,q2,#0' +[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcge.f16 d14,d2,#0' +[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcge.f16 q14,q2,#0' +[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcgt.f16 d14,d2,#0' +[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcgt.f16 q14,q2,#0' +[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcle.f16 d14,d2,#0' +[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcle.f16 q14,q2,#0' +[^:]*:224: Error: selected processor does not support fp16 instruction -- `vclt.f16 d14,d2,#0' +[^:]*:224: Error: selected processor does not support fp16 instruction -- `vclt.f16 q14,q2,#0' diff --git a/gas/testsuite/gas/arm/armv8-2-fp16-simd.d b/gas/testsuite/gas/arm/armv8-2-fp16-simd.d new file mode 100644 index 0000000..1a97f39 --- /dev/null +++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd.d @@ -0,0 +1,147 @@ +#name: ARM v8.2 FP16 support on SIMD +#source: armv8-2-fp16-simd.s +#objdump: -d +#as: -march=armv8.2-a+fp16 -mfpu=neon-fp-armv8 +#skip: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd + +.*: +file format .*arm.* + +Disassembly of section .text: + +00000000 <func>: + 0: f3342d0e vabd.f16 d2, d4, d14 + 4: f3384d6c vabd.f16 q2, q4, q14 + 8: f2142f0e vmax.f16 d2, d4, d14 + c: f2184f6c vmax.f16 q2, q4, q14 + 10: f2342f0e vmin.f16 d2, d4, d14 + 14: f2384f6c vmin.f16 q2, q4, q14 + 18: f3300dec vabd.f16 q0, q8, q14 + 1c: f2100fec vmax.f16 q0, q8, q14 + 20: f2300fec vmin.f16 q0, q8, q14 + 24: f3331d0f vabd.f16 d1, d3, d15 + 28: f3310d08 vabd.f16 d0, d1, d8 + 2c: f3b50708 vabs.f16 d0, d8 + 30: f3b50760 vabs.f16 q0, q8 + 34: f3b50788 vneg.f16 d0, d8 + 38: f3b507e0 vneg.f16 q0, q8 + 3c: f3b5474c vabs.f16 q2, q6 + 40: f3b547cc vneg.f16 q2, q6 + 44: f3b57703 vabs.f16 d7, d3 + 48: f3b59781 vneg.f16 d9, d1 + 4c: f3142e1e vacge.f16 d2, d4, d14 + 50: f3184e7c vacge.f16 q2, q4, q14 + 54: f3342e1e vacgt.f16 d2, d4, d14 + 58: f3384e7c vacgt.f16 q2, q4, q14 + 5c: f33e2e14 vacgt.f16 d2, d14, d4 + 60: f33c4ed8 vacgt.f16 q2, q14, q4 + 64: f31e2e14 vacge.f16 d2, d14, d4 + 68: f31c4ed8 vacge.f16 q2, q14, q4 + 6c: f2142e0e vceq.f16 d2, d4, d14 + 70: f2184e6c vceq.f16 q2, q4, q14 + 74: f3142e0e vcge.f16 d2, d4, d14 + 78: f3184e6c vcge.f16 q2, q4, q14 + 7c: f3342e0e vcgt.f16 d2, d4, d14 + 80: f3384e6c vcgt.f16 q2, q4, q14 + 84: f31e2e04 vcge.f16 d2, d14, d4 + 88: f31c4ec8 vcge.f16 q2, q14, q4 + 8c: f33e2e04 vcgt.f16 d2, d14, d4 + 90: f33c4ec8 vcgt.f16 q2, q14, q4 + 94: f3100efc vacge.f16 q0, q8, q14 + 98: f3300efc vacgt.f16 q0, q8, q14 + 9c: f33c0ef0 vacgt.f16 q0, q14, q8 + a0: f31c0ef0 vacge.f16 q0, q14, q8 + a4: f2100eec vceq.f16 q0, q8, q14 + a8: f3100eec vcge.f16 q0, q8, q14 + ac: f3300eec vcgt.f16 q0, q8, q14 + b0: f31c0ee0 vcge.f16 q0, q14, q8 + b4: f33c0ee0 vcgt.f16 q0, q14, q8 + b8: f2142d0e vadd.f16 d2, d4, d14 + bc: f2184d6c vadd.f16 q2, q4, q14 + c0: f2342d0e vsub.f16 d2, d4, d14 + c4: f2384d6c vsub.f16 q2, q4, q14 + c8: f2100dec vadd.f16 q0, q8, q14 + cc: f2300dec vsub.f16 q0, q8, q14 + d0: f3142f1e vmaxnm.f16 d2, d4, d14 + d4: f3184f7c vmaxnm.f16 q2, q4, q14 + d8: f3342f1e vminnm.f16 d2, d4, d14 + dc: f3384f7c vminnm.f16 q2, q4, q14 + e0: f2142c1e vfma.f16 d2, d4, d14 + e4: f2184c7c vfma.f16 q2, q4, q14 + e8: f2342c1e vfms.f16 d2, d4, d14 + ec: f2384c7c vfms.f16 q2, q4, q14 + f0: f2142d1e vmla.f16 d2, d4, d14 + f4: f2184d7c vmla.f16 q2, q4, q14 + f8: f2342d1e vmls.f16 d2, d4, d14 + fc: f2384d7c vmls.f16 q2, q4, q14 + 100: f3b6458e vrintz.f16 d4, d14 + 104: f3b685ec vrintz.f16 q4, q14 + 108: f3b6448e vrintx.f16 d4, d14 + 10c: f3b684ec vrintx.f16 q4, q14 + 110: f3b6450e vrinta.f16 d4, d14 + 114: f3b6856c vrinta.f16 q4, q14 + 118: f3b6440e vrintn.f16 d4, d14 + 11c: f3b6846c vrintn.f16 q4, q14 + 120: f3b6478e vrintp.f16 d4, d14 + 124: f3b687ec vrintp.f16 q4, q14 + 128: f3b6468e vrintm.f16 d4, d14 + 12c: f3b686ec vrintm.f16 q4, q14 + 130: f3184d0e vpadd.f16 d4, d8, d14 + 134: f3b74508 vrecpe.f16 d4, d8 + 138: f3b78560 vrecpe.f16 q4, q8 + 13c: f3b74588 vrsqrte.f16 d4, d8 + 140: f3b785e0 vrsqrte.f16 q4, q8 + 144: f3b70564 vrecpe.f16 q0, q10 + 148: f3b705e4 vrsqrte.f16 q0, q10 + 14c: f21a8f1c vrecps.f16 d8, d10, d12 + 150: f2540ff8 vrecps.f16 q8, q10, q12 + 154: f23a8f1c vrsqrts.f16 d8, d10, d12 + 158: f2740ff8 vrsqrts.f16 q8, q10, q12 + 15c: f2104f58 vrecps.f16 q2, q0, q4 + 160: f2304f58 vrsqrts.f16 q2, q0, q4 + 164: f3184f0e vpmax.f16 d4, d8, d14 + 168: f338af02 vpmin.f16 d10, d8, d2 + 16c: f3184d1e vmul.f16 d4, d8, d14 + 170: f3107d11 vmul.f16 d7, d0, d1 + 174: f3104dd0 vmul.f16 q2, q8, q0 + 178: f3b7600c vcvta.s16.f16 d6, d12 + 17c: f3b7c068 vcvta.s16.f16 q6, q12 + 180: f3b7630c vcvtm.s16.f16 d6, d12 + 184: f3b7c368 vcvtm.s16.f16 q6, q12 + 188: f3b7610c vcvtn.s16.f16 d6, d12 + 18c: f3b7c168 vcvtn.s16.f16 q6, q12 + 190: f3b7620c vcvtp.s16.f16 d6, d12 + 194: f3b7c268 vcvtp.s16.f16 q6, q12 + 198: f3b7608c vcvta.u16.f16 d6, d12 + 19c: f3b7c0e8 vcvta.u16.f16 q6, q12 + 1a0: f3b7638c vcvtm.u16.f16 d6, d12 + 1a4: f3b7c3e8 vcvtm.u16.f16 q6, q12 + 1a8: f3b7618c vcvtn.u16.f16 d6, d12 + 1ac: f3b7c1e8 vcvtn.u16.f16 q6, q12 + 1b0: f3b7628c vcvtp.u16.f16 d6, d12 + 1b4: f3b7c2e8 vcvtp.u16.f16 q6, q12 + 1b8: f3b7e700 vcvt.s16.f16 d14, d0 + 1bc: f3f7c740 vcvt.s16.f16 q14, q0 + 1c0: f3b7e780 vcvt.u16.f16 d14, d0 + 1c4: f3f7c7c0 vcvt.u16.f16 q14, q0 + 1c8: f3b7e600 vcvt.f16.s16 d14, d0 + 1cc: f3f7c640 vcvt.f16.s16 q14, q0 + 1d0: f3b7e680 vcvt.f16.u16 d14, d0 + 1d4: f3f7c6c0 vcvt.f16.u16 q14, q0 + 1d8: f2bded10 vcvt.s16.f16 d14, d0, #3 + 1dc: f2fdcd50 vcvt.s16.f16 q14, q0, #3 + 1e0: f3bded10 vcvt.u16.f16 d14, d0, #3 + 1e4: f3fdcd50 vcvt.u16.f16 q14, q0, #3 + 1e8: f2bdec10 vcvt.f16.s16 d14, d0, #3 + 1ec: f2fdcc50 vcvt.f16.s16 q14, q0, #3 + 1f0: f3bdec10 vcvt.f16.u16 d14, d0, #3 + 1f4: f3fdcc50 vcvt.f16.u16 q14, q0, #3 + 1f8: f3b5e502 vceq.f16 d14, d2, #0 + 1fc: f3f5c544 vceq.f16 q14, q2, #0 + 200: f3b5e482 vcge.f16 d14, d2, #0 + 204: f3f5c4c4 vcge.f16 q14, q2, #0 + 208: f3b5e402 vcgt.f16 d14, d2, #0 + 20c: f3f5c444 vcgt.f16 q14, q2, #0 + 210: f3b5e582 vcle.f16 d14, d2, #0 + 214: f3f5c5c4 vcle.f16 q14, q2, #0 + 218: f3b5e602 vclt.f16 d14, d2, #0 + 21c: f3f5c644 vclt.f16 q14, q2, #0 diff --git a/gas/testsuite/gas/arm/armv8-2-fp16-simd.s b/gas/testsuite/gas/arm/armv8-2-fp16-simd.s new file mode 100644 index 0000000..7758f24 --- /dev/null +++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd.s @@ -0,0 +1,224 @@ + .macro f16_dq_ifsu reg0 reg1 reg2 + .irp op, vabd.f16, vmax.f16, vmin.f16 + \op d\reg0, d\reg1, d\reg2 + \op q\reg0, q\reg1, q\reg2 + .endr + .endm + + .macro f16_q_ifsu reg0 reg1 reg2 + .irp op, vabdq.f16, vmaxq.f16, vminq.f16 + \op q\reg0, q\reg1, q\reg2 + .endr + .endm + + .macro f16_dq_abs_neg reg0 reg1 + .irp op, vabs.f16, vneg.f16 + \op d\reg0, d\reg1 + \op q\reg0, q\reg1 + .endr + .endm + + .macro f16_q_abs_neg reg0 reg1 + .irp op, vabsq.f16, vnegq.f16 + \op q\reg0, q\reg1 + .endr + .endm + + .macro f16_dq_fcmp reg0 reg1 reg2 + .irp op, vacge.f16, vacgt.f16, vaclt.f16, vacle.f16, vceq.f16, vcge.f16, vcgt.f16, vcle.f16, vclt.f16 + \op d\reg0, d\reg1, d\reg2 + \op q\reg0, q\reg1, q\reg2 + .endr + .endm + + .macro f16_dq_fcmp_imm0 reg0 reg1 + .irp op, vceq.f16, vcge.f16, vcgt.f16, vcle.f16, vclt.f16 + \op d\reg0, d\reg1, #0 + \op q\reg0, q\reg1, #0 + .endr + .endm + + .macro f16_q_fcmp reg0 reg1 reg2 + .irp op, vacgeq.f16, vacgtq.f16, vacltq.f16, vacleq.f16, vceqq.f16, vcgeq.f16, vcgtq.f16, vcleq.f16, vcltq.f16 + \op q\reg0, q\reg1, q\reg2 + .endr + .endm + + .macro f16_dq_addsub reg0 reg1 reg2 + .irp op, vadd.f16, vsub.f16 + \op d\reg0, d\reg1, d\reg2 + \op q\reg0, q\reg1, q\reg2 + .endr + .endm + + .macro f16_q_addsub reg0 reg1 reg2 + .irp op, vaddq.f16, vsubq.f16 + \op q\reg0, q\reg1, q\reg2 + .endr + .endm + + .macro f16_dq_vmaxnm reg0 reg1 reg2 + .irp op, vmaxnm.f16, vminnm.f16 + \op d\reg0, d\reg1, d\reg2 + \op q\reg0, q\reg1, q\reg2 + .endr + .endm + + .macro f16_dq_fmac reg0 reg1 reg2 + .irp op, vfma.f16, vfms.f16 + \op d\reg0, d\reg1, d\reg2 + \op q\reg0, q\reg1, q\reg2 + .endr + .endm + + .macro f16_dq_fmacmaybe reg0 reg1 reg2 + .irp op, vmla.f16, vmls.f16 + \op d\reg0, d\reg1, d\reg2 + \op q\reg0, q\reg1, q\reg2 + .endr + .endm + + .macro f16_dq_vrint reg0 reg1 + .irp op, vrintz.f16, vrintx.f16, vrinta.f16, vrintn.f16, vrintp.f16, vrintm.f16 + \op d\reg0, d\reg1 + \op q\reg0, q\reg1 + .endr + .endm + + .macro f16_dq_recip reg0 reg1 + .irp op, vrecpe.f16, vrsqrte.f16 + \op d\reg0, d\reg1 + \op q\reg0, q\reg1 + .endr + .endm + + .macro f16_q_recip reg0 reg1 + .irp op, vrecpeq.f16, vrsqrteq.f16 + \op q\reg0, q\reg1 + .endr + .endm + + .macro f16_dq_step reg0 reg1 reg2 + .irp op, vrecps.f16, vrsqrts.f16 + \op d\reg0, d\reg1, d\reg2 + \op q\reg0, q\reg1, q\reg2 + .endr + .endm + + .macro f16_q_step reg0 reg1 reg2 + .irp op, vrecpsq.f16, vrsqrtsq.f16 + \op q\reg0, q\reg1, q\reg2 + .endr + .endm + + .macro f16_dq_cvt reg0 reg1 + .irp op, vcvta.s16.f16, vcvtm.s16.f16, vcvtn.s16.f16, vcvtp.s16.f16, vcvta.u16.f16, vcvtm.u16.f16, vcvtn.u16.f16, vcvtp.u16.f16, + \op d\reg0, d\reg1 + \op q\reg0, q\reg1 + .endr + .endm + + .macro f16_dq_cvtz reg0 reg1 + .irp op, vcvt.s16.f16, vcvt.u16.f16, vcvt.f16.s16, vcvt.f16.u16, + \op d\reg0, d\reg1 + \op q\reg0, q\reg1 + .endr + .endm + + .macro f16_dq_cvtz_fixed reg0 reg1 imm + .irp op, vcvt.s16.f16, vcvt.u16.f16, vcvt.f16.s16, vcvt.f16.u16, + \op d\reg0, d\reg1, #\imm + \op q\reg0, q\reg1, #\imm + .endr + .endm + + .macro f16_dq op reg0 reg1 reg2 + \op d\reg0, d\reg1, d\reg2 + \op q\reg0, q\reg1, q\reg2 + .endm + + .macro f16_d op reg0 reg1 reg2 + \op d\reg0, d\reg1, d\reg2 + .endm + + .macro f16_q op reg0 reg1 reg2 + \op q\reg0, q\reg1, q\reg2 + .endm + + .macro f16_dq_2 op reg0 reg1 + \op d\reg0, d\reg1 + \op q\reg0, q\reg1 + .endm + + .macro f16_d_2 op reg0 reg1 + \op d\reg0, d\reg1 + .endm + + .macro f16_q_2 op reg0 reg1 + \op q\reg0, q\reg1 + .endm + +func: + # neon_dyadic_if_su + f16_dq_ifsu 2 4 14 + f16_q_ifsu 0 8 14 + f16_d vabd.f16 1 3 15 + f16_d vabd.f16 0 1 8 + + # neon_abs_neg + f16_dq_abs_neg 0 8 + f16_q_abs_neg 2 6 + f16_d_2 vabs.f16 7 3 + f16_d_2 vneg.f16 9 1 + + # neon_fcmp + f16_dq_fcmp 2 4 14 + f16_q_fcmp 0 8 14 + + # neon_addsub_if_i + f16_dq_addsub 2 4 14 + f16_q_addsub 0 8 14 + + # neon_vmaxnm + f16_dq_vmaxnm 2 4 14 + + # neon_fmac + f16_dq_fmac 2 4 14 + + # neon_mac_maybe_scalar + f16_dq_fmacmaybe 2 4 14 + + # vrint + f16_dq_vrint 4 14 + + # neon_dyadic_if_i_d + f16_d vpadd.f16 4 8 14 + + # neon_recip_est + f16_dq_recip 4 8 + f16_q_recip 0 10 + + # neon_step + f16_dq_step 8 10 12 + f16_q_step 2 0 4 + + # neon_dyadic_if_su_d + f16_d vpmax.f16 4 8 14 + f16_d vpmin.f16 10 8 2 + + # neon_mul + f16_d vmul.f16 4 8 14 + f16_d vmul.f16 7 0 1 + f16_q vmul.f16 2 8 0 + + # neon_cvt + f16_dq_cvt 6 12 + + # neon_cvtz + f16_dq_cvtz 14, 0 + + # neon_cvtz_fixed + f16_dq_cvtz_fixed 14, 0, 3 + + # neon_fcmp_imm0 + f16_dq_fcmp_imm0 14, 2 diff --git a/opcodes/ChangeLog b/opcodes/ChangeLog index 09fd66f..0474fce 100644 --- a/opcodes/ChangeLog +++ b/opcodes/ChangeLog @@ -1,3 +1,7 @@ +2016-03-16 Jiong Wang <jiong.wang@arm.com> + + * arm-dis.c (neon_opcodes): Support new FP16 instructions. + 2016-03-07 Trevor Saunders <tbsaunde+binutils@tbsaunde.org> * mcore-opc.h: Add const qualifiers. diff --git a/opcodes/arm-dis.c b/opcodes/arm-dis.c index 324304d..322e801 100644 --- a/opcodes/arm-dis.c +++ b/opcodes/arm-dis.c @@ -1032,15 +1032,23 @@ static const struct opcode32 neon_opcodes[] = /* NEON fused multiply add instructions. */ {ARM_FEATURE_COPROC (FPU_NEON_EXT_FMA), - 0xf2000c10, 0xffa00f10, "vfma%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf2000c10, 0xffb00f10, "vfma%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf2100c10, 0xffb00f10, "vfma%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_FMA), - 0xf2200c10, 0xffa00f10, "vfms%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf2200c10, 0xffb00f10, "vfms%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf2300c10, 0xffb00f10, "vfms%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, /* Two registers, miscellaneous. */ {ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8), 0xf3ba0400, 0xffbf0c10, "vrint%7-9?p?m?zaxn%u.f32\t%12-15,22R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf3b60400, 0xffbf0c10, "vrint%7-9?p?m?zaxn%u.f16\t%12-15,22R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8), 0xf3bb0000, 0xffbf0c10, "vcvt%8-9?mpna%u.%7?us32.f32\t%12-15,22R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf3b70000, 0xffbf0c10, "vcvt%8-9?mpna%u.%7?us16.f16\t%12-15,22R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8), 0xf3b00300, 0xffbf0fd0, "aese%u.8\t%12-15,22Q, %0-3,5Q"}, {ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8), @@ -1080,8 +1088,12 @@ static const struct opcode32 neon_opcodes[] = "vshll%c.i%18-19S2\t%12-15,22Q, %0-3,5D, #%18-19S2"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), 0xf3bb0400, 0xffbf0e90, "vrecpe%c.%8?fu%18-19S2\t%12-15,22R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf3b70400, 0xffbf0e90, "vrecpe%c.%8?fu16\t%12-15,22R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), 0xf3bb0480, 0xffbf0e90, "vrsqrte%c.%8?fu%18-19S2\t%12-15,22R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf3b70480, 0xffbf0e90, "vrsqrte%c.%8?fu16\t%12-15,22R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), 0xf3b00000, 0xffb30f90, "vrev64%c.%18-19S2\t%12-15,22R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), @@ -1121,8 +1133,11 @@ static const struct opcode32 neon_opcodes[] = {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), 0xf3b00600, 0xffb30f10, "vpadal%c.%7?us%18-19S2\t%12-15,22R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), - 0xf3b30600, 0xffb30e10, + 0xf3bb0600, 0xffbf0e10, "vcvt%c.%7-8?usff%18-19Sa.%7-8?ffus%18-19Sa\t%12-15,22R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf3b70600, 0xffbf0e10, + "vcvt%c.%7-8?usff16.%7-8?ffus16\t%12-15,22R, %0-3,5R"}, /* Three registers of the same length. */ {ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8), @@ -1140,9 +1155,13 @@ static const struct opcode32 neon_opcodes[] = {ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8), 0xf3200c40, 0xffb00f50, "sha256su1%u.32\t%12-15,22Q, %16-19,7Q, %0-3,5Q"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8), - 0xf3000f10, 0xffa00f10, "vmaxnm%u.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf3000f10, 0xffb00f10, "vmaxnm%u.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf3100f10, 0xffb00f10, "vmaxnm%u.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8), - 0xf3200f10, 0xffa00f10, "vminnm%u.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf3200f10, 0xffb00f10, "vminnm%u.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf3300f10, 0xffb00f10, "vminnm%u.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), 0xf2000110, 0xffb00f10, "vand%c\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), @@ -1160,41 +1179,77 @@ static const struct opcode32 neon_opcodes[] = {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), 0xf3300110, 0xffb00f10, "vbif%c\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), - 0xf2000d00, 0xffa00f10, "vadd%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf2000d00, 0xffb00f10, "vadd%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf2100d00, 0xffb00f10, "vadd%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), - 0xf2000d10, 0xffa00f10, "vmla%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf2000d10, 0xffb00f10, "vmla%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf2100d10, 0xffb00f10, "vmla%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), - 0xf2000e00, 0xffa00f10, "vceq%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf2000e00, 0xffb00f10, "vceq%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf2100e00, 0xffb00f10, "vceq%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), - 0xf2000f00, 0xffa00f10, "vmax%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf2000f00, 0xffb00f10, "vmax%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf2100f00, 0xffb00f10, "vmax%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), - 0xf2000f10, 0xffa00f10, "vrecps%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf2000f10, 0xffb00f10, "vrecps%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf2100f10, 0xffb00f10, "vrecps%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), - 0xf2200d00, 0xffa00f10, "vsub%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf2200d00, 0xffb00f10, "vsub%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf2300d00, 0xffb00f10, "vsub%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), - 0xf2200d10, 0xffa00f10, "vmls%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf2200d10, 0xffb00f10, "vmls%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf2300d10, 0xffb00f10, "vmls%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), - 0xf2200f00, 0xffa00f10, "vmin%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf2200f00, 0xffb00f10, "vmin%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf2300f00, 0xffb00f10, "vmin%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), - 0xf2200f10, 0xffa00f10, "vrsqrts%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf2200f10, 0xffb00f10, "vrsqrts%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf2300f10, 0xffb00f10, "vrsqrts%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), - 0xf3000d00, 0xffa00f10, "vpadd%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf3000d00, 0xffb00f10, "vpadd%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf3100d00, 0xffb00f10, "vpadd%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), - 0xf3000d10, 0xffa00f10, "vmul%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf3000d10, 0xffb00f10, "vmul%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf3100d10, 0xffb00f10, "vmul%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), - 0xf3000e00, 0xffa00f10, "vcge%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf3000e00, 0xffb00f10, "vcge%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf3100e00, 0xffb00f10, "vcge%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), - 0xf3000e10, 0xffa00f10, "vacge%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf3000e10, 0xffb00f10, "vacge%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf3100e10, 0xffb00f10, "vacge%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), - 0xf3000f00, 0xffa00f10, "vpmax%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf3000f00, 0xffb00f10, "vpmax%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf3100f00, 0xffb00f10, "vpmax%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), - 0xf3200d00, 0xffa00f10, "vabd%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf3200d00, 0xffb00f10, "vabd%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf3300d00, 0xffb00f10, "vabd%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), - 0xf3200e00, 0xffa00f10, "vcgt%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf3200e00, 0xffb00f10, "vcgt%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf3300e00, 0xffb00f10, "vcgt%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), - 0xf3200e10, 0xffa00f10, "vacgt%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf3200e10, 0xffb00f10, "vacgt%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf3300e10, 0xffb00f10, "vacgt%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), - 0xf3200f00, 0xffa00f10, "vpmin%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + 0xf3200f00, 0xffb00f10, "vpmin%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf3300f00, 0xffb00f10, "vpmin%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), 0xf2000800, 0xff800f10, "vadd%c.i%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"}, {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), @@ -1426,6 +1481,9 @@ static const struct opcode32 neon_opcodes[] = {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1), 0xf2a00e10, 0xfea00e90, "vcvt%c.%24,8?usff32.%24,8?ffus32\t%12-15,22R, %0-3,5R, #%16-20e"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), + 0xf2a00c10, 0xfea00e90, + "vcvt%c.%24,8?usff16.%24,8?ffus16\t%12-15,22R, %0-3,5R, #%16-20e"}, /* Three registers of different lengths. */ {ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8), |