From aab2c27d9f01d667f1b3356fbca2d931bfa6c599 Mon Sep 17 00:00:00 2001 From: Matthew Malcomson Date: Thu, 7 Nov 2019 16:56:12 +0000 Subject: [binutils][arm] BFloat16 enablement [4/X] Hi, This patch is part of a series that adds support for Armv8.6-A (Matrix Multiply and BFloat16 extensions) to binutils. This patch introduces BFloat16 instructions to the arm backend. The following BFloat16 instructions are added: vdot, vfma{l/t}, vmmla, vfmal{t/b}, vcvt, vcvt{t/b}. gas/ChangeLog: 2019-11-07 Mihail Ionescu 2019-11-07 Matthew Malcomson * config/tc-arm.c (arm_archs): Add armv8.6-a option. (cpu_arch_ver): Add TAG_CPU_ARCH_V8 tag for Armv8.6-a. * doc/c-arm.texi (-march): New armv8.6-a arch. * config/tc-arm.c (arm_ext_bf16): New feature set. (enum neon_el_type): Add NT_bfloat value. (B_MNEM_vfmat, B_MNEM_vfmab): New bfloat16 encoder helpers. (BAD_BF16): New message. (parse_neon_type): Add bf16 type specifier. (enum neon_type_mask): Add N_BF16 type. (type_chk_of_el_type): Account for NT_bfloat. (el_type_of_type_chk): Account for N_BF16. (neon_three_args): Split out from neon_three_same. (neon_three_same): Part split out into neon_three_args. (CVT_FLAVOUR_VAR): Add bf16_f32 cvt flavour. (do_neon_cvt_1): Account for vcvt.bf16.f32. (do_bfloat_vmla): New. (do_mve_vfma): New function to deal with the mnemonic clash between the BF16 vfmat and the MVE vfma in a VPT block with a 't'rue condition. (do_neon_cvttb_1): Account for vcvt{t,b}.bf16.f32. (do_vdot): New (do_vmmla): New (insns): Add vdot and vmmla mnemonics. (arm_extensions): Add "bf16" extension. * doc/c-arm.texi: Document "bf16" extension. * testsuite/gas/arm/attr-march-armv8_6-a.d: New test. * testsuite/gas/arm/bfloat16-bad.d: New test. * testsuite/gas/arm/bfloat16-bad.l: New test. * testsuite/gas/arm/bfloat16-bad.s: New test. * testsuite/gas/arm/bfloat16-cmdline-bad-2.d: New test. * testsuite/gas/arm/bfloat16-cmdline-bad-3.d: New test. * testsuite/gas/arm/bfloat16-cmdline-bad.d: New test. * testsuite/gas/arm/bfloat16-neon.s: New test. * testsuite/gas/arm/bfloat16-non-neon.s: New test. * testsuite/gas/arm/bfloat16-thumb-bad.d: New test. * testsuite/gas/arm/bfloat16-thumb-bad.l: New test. * testsuite/gas/arm/bfloat16-thumb.d: New test. * testsuite/gas/arm/bfloat16-vfp.d: New test. * testsuite/gas/arm/bfloat16.d: New test. * testsuite/gas/arm/bfloat16.s: New test. include/ChangeLog: 2019-11-07 Mihail Ionescu 2019-11-07 Matthew Malcomson * opcode/arm.h (ARM_EXT2_V8_6A, ARM_AEXT2_V8_6A, ARM_ARCH_V8_6A): New. * opcode/arm.h (ARM_EXT2_BF16): New feature macro. (ARM_AEXT2_V8_6A): Include above macro in definition. opcodes/ChangeLog: 2019-11-07 Mihail Ionescu 2019-11-07 Matthew Malcomson * arm-dis.c (select_arm_features): Update bfd_march_arm_8 with Armv8.6-A. (coprocessor_opcodes): Add bfloat16 vcvt{t,b}. (neon_opcodes): Add bfloat SIMD instructions. (print_insn_coprocessor): Add new control character %b to print condition code without checking cp_num. (print_insn_neon): Account for BFloat16 instructions that have no special top-byte handling. Regression tested on arm-none-eabi. Is it ok for trunk? Regards, Mihail --- include/opcode/arm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/opcode/arm.h') diff --git a/include/opcode/arm.h b/include/opcode/arm.h index a870905..7aea4d6 100644 --- a/include/opcode/arm.h +++ b/include/opcode/arm.h @@ -73,6 +73,8 @@ #define ARM_EXT2_SB 0x00002000 /* Speculation Barrier instruction. */ #define ARM_EXT2_PREDRES 0x00004000 /* Prediction Restriction insns. */ #define ARM_EXT2_V8_1M_MAIN 0x00008000 /* ARMv8.1-M Mainline. */ +#define ARM_EXT2_V8_6A 0x00010000 /* ARM V8.6A. */ +#define ARM_EXT2_BF16 0x00020000 /* ARMv8 bfloat16. */ /* Co-processor space extensions. */ #define ARM_CEXT_XSCALE 0x00000001 /* Allow MIA etc. */ @@ -169,6 +171,7 @@ | ARM_EXT2_V8_4A) #define ARM_AEXT2_V8_5A (ARM_AEXT2_V8_4A | ARM_EXT2_V8_5A | ARM_EXT2_SB \ | ARM_EXT2_PREDRES) +#define ARM_AEXT2_V8_6A (ARM_AEXT2_V8_5A | ARM_EXT2_V8_6A | ARM_EXT2_BF16) #define ARM_AEXT_V8M_BASE (ARM_AEXT_V6SM | ARM_EXT_DIV) #define ARM_AEXT_V8M_MAIN ARM_AEXT_V7M #define ARM_AEXT_V8M_MAIN_DSP ARM_AEXT_V7EM @@ -352,6 +355,9 @@ #define ARM_ARCH_V8_5A ARM_FEATURE (ARM_AEXT_V8A, ARM_AEXT2_V8_5A, \ CRC_EXT_ARMV8 | FPU_NEON_EXT_RDMA \ | FPU_NEON_EXT_DOTPROD) +#define ARM_ARCH_V8_6A ARM_FEATURE (ARM_AEXT_V8A, ARM_AEXT2_V8_6A, \ + CRC_EXT_ARMV8 | FPU_NEON_EXT_RDMA \ + | FPU_NEON_EXT_DOTPROD) #define ARM_ARCH_V8M_BASE ARM_FEATURE_CORE (ARM_AEXT_V8M_BASE, \ ARM_AEXT2_V8M_BASE) #define ARM_ARCH_V8M_MAIN ARM_FEATURE_CORE (ARM_AEXT_V8M_MAIN, \ -- cgit v1.1