diff options
author | Cui,Lili <lili.cui@intel.com> | 2021-06-14 11:05:05 +0800 |
---|---|---|
committer | Cui,Lili <lili.cui@intel.com> | 2021-08-05 21:03:41 +0800 |
commit | 0cc7872125efa71879e34403cc644cd19434eae3 (patch) | |
tree | a342743f974e2be23c3d25d9f79c7f52560cb0d8 /gas | |
parent | ddbe6976d51240c806488beb53b708858d8a3a67 (diff) | |
download | binutils-0cc7872125efa71879e34403cc644cd19434eae3.zip binutils-0cc7872125efa71879e34403cc644cd19434eae3.tar.gz binutils-0cc7872125efa71879e34403cc644cd19434eae3.tar.bz2 |
[PATCH 1/2] Enable Intel AVX512_FP16 instructions
Intel AVX512 FP16 instructions use maps 3, 5 and 6. Maps 5 and 6 use 3 bits
in the EVEX.mmm field (0b101, 0b110). Map 5 is for instructions that were FP32
in map 1 (0Fxx). Map 6 is for instructions that were FP32 in map 2 (0F38xx).
There are some exceptions to this rule. Some things in map 1 (0Fxx) with imm8
operands predated our current conventions; those instructions moved to map 3.
FP32 things in map 3 (0F3Axx) found new opcodes in map3 for FP16 because map3
is very sparsely populated. Most of the FP16 instructions share opcodes and
prefix (EVEX.pp) bits with the related FP32 operations.
Intel AVX512 FP16 instructions has new displacements scaling rules, please refer
to the public software developer manual for detail information.
gas/
2021-08-05 Igor Tsimbalist <igor.v.tsimbalist@intel.com>
H.J. Lu <hongjiu.lu@intel.com>
Wei Xiao <wei3.xiao@intel.com>
Lili Cui <lili.cui@intel.com>
* config/tc-i386.c (struct Broadcast_Operation): Adjust comment.
(cpu_arch): Add .avx512_fp16.
(cpu_noarch): Add noavx512_fp16.
(pte): Add evexmap5 and evexmap6.
(build_evex_prefix): Handle EVEXMAP5 and EVEXMAP6.
(check_VecOperations): Handle {1to32}.
(check_VecOperands): Handle CheckRegNumb.
(check_word_reg): Handle Toqword.
(i386_error): Add invalid_dest_and_src_register_set.
(match_template): Handle invalid_dest_and_src_register_set.
* doc/c-i386.texi: Document avx512_fp16, noavx512_fp16.
opcodes/
2021-08-05 Igor Tsimbalist <igor.v.tsimbalist@intel.com>
H.J. Lu <hongjiu.lu@intel.com>
Wei Xiao <wei3.xiao@intel.com>
Lili Cui <lili.cui@intel.com>
* i386-dis.c (EXwScalarS): New.
(EXxh): Ditto.
(EXxhc): Ditto.
(EXxmmqh): Ditto.
(EXxmmqdh): Ditto.
(EXEvexXwb): Ditto.
(DistinctDest_Fixup): Ditto.
(enum): Add xh_mode, evex_half_bcst_xmmqh_mode, evex_half_bcst_xmmqdh_mode
and w_swap_mode.
(enum): Add PREFIX_EVEX_0F3A08_W_0, PREFIX_EVEX_0F3A0A_W_0,
PREFIX_EVEX_0F3A26, PREFIX_EVEX_0F3A27, PREFIX_EVEX_0F3A56,
PREFIX_EVEX_0F3A57, PREFIX_EVEX_0F3A66, PREFIX_EVEX_0F3A67,
PREFIX_EVEX_0F3AC2, PREFIX_EVEX_MAP5_10, PREFIX_EVEX_MAP5_11,
PREFIX_EVEX_MAP5_1D, PREFIX_EVEX_MAP5_2A, PREFIX_EVEX_MAP5_2C,
PREFIX_EVEX_MAP5_2D, PREFIX_EVEX_MAP5_2E, PREFIX_EVEX_MAP5_2F,
PREFIX_EVEX_MAP5_51, PREFIX_EVEX_MAP5_58, PREFIX_EVEX_MAP5_59,
PREFIX_EVEX_MAP5_5A_W_0, PREFIX_EVEX_MAP5_5A_W_1,
PREFIX_EVEX_MAP5_5B_W_0, PREFIX_EVEX_MAP5_5B_W_1,
PREFIX_EVEX_MAP5_5C, PREFIX_EVEX_MAP5_5D, PREFIX_EVEX_MAP5_5E,
PREFIX_EVEX_MAP5_5F, PREFIX_EVEX_MAP5_78, PREFIX_EVEX_MAP5_79,
PREFIX_EVEX_MAP5_7A, PREFIX_EVEX_MAP5_7B, PREFIX_EVEX_MAP5_7C,
PREFIX_EVEX_MAP5_7D_W_0, PREFIX_EVEX_MAP6_13, PREFIX_EVEX_MAP6_56,
PREFIX_EVEX_MAP6_57, PREFIX_EVEX_MAP6_D6, PREFIX_EVEX_MAP6_D7
(enum): Add EVEX_MAP5 and EVEX_MAP6.
(enum): Add EVEX_W_MAP5_5A, EVEX_W_MAP5_5B,
EVEX_W_MAP5_78_P_0, EVEX_W_MAP5_78_P_2, EVEX_W_MAP5_79_P_0,
EVEX_W_MAP5_79_P_2, EVEX_W_MAP5_7A_P_2, EVEX_W_MAP5_7A_P_3,
EVEX_W_MAP5_7B_P_2, EVEX_W_MAP5_7C_P_0, EVEX_W_MAP5_7C_P_2,
EVEX_W_MAP5_7D, EVEX_W_MAP6_13_P_0, EVEX_W_MAP6_13_P_2,
(get_valid_dis386): Properly handle new instructions.
(intel_operand_size): Handle new modes.
(OP_E_memory): Ditto.
(OP_EX): Ditto.
* i386-dis-evex.h: Updated for AVX512_FP16.
* i386-dis-evex-mod.h: Updated for AVX512_FP16.
* i386-dis-evex-prefix.h: Updated for AVX512_FP16.
* i386-dis-evex-reg.h : Updated for AVX512_FP16.
* i386-dis-evex-w.h : Updated for AVX512_FP16.
* i386-gen.c (cpu_flag_init): Add CPU_AVX512_FP16_FLAGS,
and CPU_ANY_AVX512_FP16_FLAGS. Update CPU_ANY_AVX512F_FLAGS
and CPU_ANY_AVX512BW_FLAGS.
(cpu_flags): Add CpuAVX512_FP16.
(opcode_modifiers): Add DistinctDest.
* i386-opc.h (enum): (AVX512_FP16): New.
(i386_opcode_modifier): Add reqdistinctreg.
(i386_cpu_flags): Add cpuavx512_fp16.
(EVEXMAP5): Defined as a macro.
(EVEXMAP6): Ditto.
* i386-opc.tbl: Add Intel AVX512_FP16 instructions.
* i386-init.h: Regenerated.
* i386-tbl.h: Ditto.
Diffstat (limited to 'gas')
-rw-r--r-- | gas/NEWS | 2 | ||||
-rw-r--r-- | gas/config/tc-i386.c | 45 | ||||
-rw-r--r-- | gas/doc/c-i386.texi | 4 |
3 files changed, 47 insertions, 4 deletions
@@ -1,5 +1,7 @@ -*- text -*- +* Add support for Intel AVX512_FP16 instructions. + Changes in 2.37: * arm-symbianelf support removed. diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 1235c3e..cdc660f 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -246,6 +246,7 @@ enum i386_error invalid_vsib_address, invalid_vector_register_set, invalid_tmm_register_set, + invalid_dest_and_src_register_set, unsupported_vector_index_register, unsupported_broadcast, broadcast_needed, @@ -380,7 +381,7 @@ struct _i386_insn expresses the broadcast factor. */ struct Broadcast_Operation { - /* Type of broadcast: {1to2}, {1to4}, {1to8}, or {1to16}. */ + /* Type of broadcast: {1to2}, {1to4}, {1to8}, {1to16} or {1to32}. */ unsigned int type; /* Index of broadcasted operand. */ @@ -1237,6 +1238,8 @@ static const arch_entry cpu_arch[] = CPU_UINTR_FLAGS, 0 }, { STRING_COMMA_LEN (".hreset"), PROCESSOR_UNKNOWN, CPU_HRESET_FLAGS, 0 }, + { STRING_COMMA_LEN (".avx512_fp16"), PROCESSOR_UNKNOWN, + CPU_AVX512_FP16_FLAGS, 0 }, }; static const noarch_entry cpu_noarch[] = @@ -1292,6 +1295,7 @@ static const noarch_entry cpu_noarch[] = { STRING_COMMA_LEN ("nowidekl"), CPU_ANY_WIDEKL_FLAGS }, { STRING_COMMA_LEN ("nouintr"), CPU_ANY_UINTR_FLAGS }, { STRING_COMMA_LEN ("nohreset"), CPU_ANY_HRESET_FLAGS }, + { STRING_COMMA_LEN ("noavx512_fp16"), CPU_ANY_AVX512_FP16_FLAGS }, }; #ifdef I386COFF @@ -3270,7 +3274,7 @@ pte (insn_template *t) { static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 }; static const char *const opc_spc[] = { - NULL, "0f", "0f38", "0f3a", NULL, NULL, NULL, NULL, + NULL, "0f", "0f38", "0f3a", NULL, "evexmap5", "evexmap6", NULL, "XOP08", "XOP09", "XOP0A", }; unsigned int j; @@ -3865,7 +3869,7 @@ build_evex_prefix (void) /* The high 3 bits of the second EVEX byte are 1's compliment of RXB bits from REX. */ gas_assert (i.tm.opcode_modifier.opcodespace >= SPACE_0F); - gas_assert (i.tm.opcode_modifier.opcodespace <= SPACE_0F3A); + gas_assert (i.tm.opcode_modifier.opcodespace <= SPACE_EVEXMAP6); i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace; /* The fifth bit of the second EVEX byte is 1's compliment of the @@ -6088,6 +6092,24 @@ check_VecOperands (const insn_template *t) return 1; } + /* For some special instructions require that destination must be distinct + from source registers. */ + if (t->opcode_modifier.distinctdest) + { + unsigned int dest_reg = i.operands - 1; + + know (i.operands >= 3); + + /* #UD if dest_reg == src1_reg or dest_reg == src2_reg. */ + if (i.op[dest_reg - 1].regs == i.op[dest_reg].regs + || (i.reg_operands > 2 + && i.op[dest_reg - 2].regs == i.op[dest_reg].regs)) + { + i.error = invalid_dest_and_src_register_set; + return 1; + } + } + /* Check if broadcast is supported by the instruction and is applied to the memory operand. */ if (i.broadcast.type) @@ -6848,6 +6870,9 @@ match_template (char mnem_suffix) case invalid_tmm_register_set: err_msg = _("all tmm registers must be distinct"); break; + case invalid_dest_and_src_register_set: + err_msg = _("destination and source registers must be distinct"); + break; case unsupported_vector_index_register: err_msg = _("unsupported vector index register"); break; @@ -7628,6 +7653,14 @@ check_word_reg (void) i.suffix); return 0; } + /* For some instructions need encode as EVEX.W=1 without explicit VexW1. */ + else if (i.types[op].bitfield.qword + && intel_syntax + && i.tm.opcode_modifier.toqword) + { + /* Convert to QWORD. We want EVEX.W byte. */ + i.suffix = QWORD_MNEM_SUFFIX; + } return 1; } @@ -10520,6 +10553,12 @@ check_VecOperations (char *op_string) bcst_type = 16; op_string++; } + else if (*op_string == '3' + && *(op_string+1) == '2') + { + bcst_type = 32; + op_string++; + } else { as_bad (_("Unsupported broadcast: `%s'"), saved); diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi index c987dc0..9058ad4 100644 --- a/gas/doc/c-i386.texi +++ b/gas/doc/c-i386.texi @@ -214,6 +214,7 @@ accept various extension mnemonics. For example, @code{tdx}, @code{avx512_bf16}, @code{avx_vnni}, +@code{avx512_fp16}, @code{noavx512f}, @code{noavx512cd}, @code{noavx512er}, @@ -233,6 +234,7 @@ accept various extension mnemonics. For example, @code{notdx}, @code{noavx512_bf16}, @code{noavx_vnni}, +@code{noavx512_fp16}, @code{noenqcmd}, @code{noserialize}, @code{notsxldtrk}, @@ -1519,7 +1521,7 @@ supported on the CPU specified. The choices for @var{cpu_type} are: @item @samp{.avx512vbmi} @tab @samp{.avx512_4fmaps} @tab @samp{.avx512_4vnniw} @item @samp{.avx512_vpopcntdq} @tab @samp{.avx512_vbmi2} @tab @samp{.avx512_vnni} @item @samp{.avx512_bitalg} @tab @samp{.avx512_bf16} @tab @samp{.avx512_vp2intersect} -@item @samp{.tdx} @tab @samp{.avx_vnni} +@item @samp{.tdx} @tab @samp{.avx_vnni} @tab @samp{.avx512_fp16} @item @samp{.clwb} @tab @samp{.rdpid} @tab @samp{.ptwrite} @tab @item @samp{.ibt} @item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg} @tab @samp{.cldemote} @item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq} |