aboutsummaryrefslogtreecommitdiff
path: root/gas
diff options
context:
space:
mode:
authorCui,Lili <lili.cui@intel.com>2021-06-14 11:05:05 +0800
committerCui,Lili <lili.cui@intel.com>2021-08-05 21:03:41 +0800
commit0cc7872125efa71879e34403cc644cd19434eae3 (patch)
treea342743f974e2be23c3d25d9f79c7f52560cb0d8 /gas
parentddbe6976d51240c806488beb53b708858d8a3a67 (diff)
downloadbinutils-0cc7872125efa71879e34403cc644cd19434eae3.zip
binutils-0cc7872125efa71879e34403cc644cd19434eae3.tar.gz
binutils-0cc7872125efa71879e34403cc644cd19434eae3.tar.bz2
[PATCH 1/2] Enable Intel AVX512_FP16 instructions
Intel AVX512 FP16 instructions use maps 3, 5 and 6. Maps 5 and 6 use 3 bits in the EVEX.mmm field (0b101, 0b110). Map 5 is for instructions that were FP32 in map 1 (0Fxx). Map 6 is for instructions that were FP32 in map 2 (0F38xx). There are some exceptions to this rule. Some things in map 1 (0Fxx) with imm8 operands predated our current conventions; those instructions moved to map 3. FP32 things in map 3 (0F3Axx) found new opcodes in map3 for FP16 because map3 is very sparsely populated. Most of the FP16 instructions share opcodes and prefix (EVEX.pp) bits with the related FP32 operations. Intel AVX512 FP16 instructions has new displacements scaling rules, please refer to the public software developer manual for detail information. gas/ 2021-08-05 Igor Tsimbalist <igor.v.tsimbalist@intel.com> H.J. Lu <hongjiu.lu@intel.com> Wei Xiao <wei3.xiao@intel.com> Lili Cui <lili.cui@intel.com> * config/tc-i386.c (struct Broadcast_Operation): Adjust comment. (cpu_arch): Add .avx512_fp16. (cpu_noarch): Add noavx512_fp16. (pte): Add evexmap5 and evexmap6. (build_evex_prefix): Handle EVEXMAP5 and EVEXMAP6. (check_VecOperations): Handle {1to32}. (check_VecOperands): Handle CheckRegNumb. (check_word_reg): Handle Toqword. (i386_error): Add invalid_dest_and_src_register_set. (match_template): Handle invalid_dest_and_src_register_set. * doc/c-i386.texi: Document avx512_fp16, noavx512_fp16. opcodes/ 2021-08-05 Igor Tsimbalist <igor.v.tsimbalist@intel.com> H.J. Lu <hongjiu.lu@intel.com> Wei Xiao <wei3.xiao@intel.com> Lili Cui <lili.cui@intel.com> * i386-dis.c (EXwScalarS): New. (EXxh): Ditto. (EXxhc): Ditto. (EXxmmqh): Ditto. (EXxmmqdh): Ditto. (EXEvexXwb): Ditto. (DistinctDest_Fixup): Ditto. (enum): Add xh_mode, evex_half_bcst_xmmqh_mode, evex_half_bcst_xmmqdh_mode and w_swap_mode. (enum): Add PREFIX_EVEX_0F3A08_W_0, PREFIX_EVEX_0F3A0A_W_0, PREFIX_EVEX_0F3A26, PREFIX_EVEX_0F3A27, PREFIX_EVEX_0F3A56, PREFIX_EVEX_0F3A57, PREFIX_EVEX_0F3A66, PREFIX_EVEX_0F3A67, PREFIX_EVEX_0F3AC2, PREFIX_EVEX_MAP5_10, PREFIX_EVEX_MAP5_11, PREFIX_EVEX_MAP5_1D, PREFIX_EVEX_MAP5_2A, PREFIX_EVEX_MAP5_2C, PREFIX_EVEX_MAP5_2D, PREFIX_EVEX_MAP5_2E, PREFIX_EVEX_MAP5_2F, PREFIX_EVEX_MAP5_51, PREFIX_EVEX_MAP5_58, PREFIX_EVEX_MAP5_59, PREFIX_EVEX_MAP5_5A_W_0, PREFIX_EVEX_MAP5_5A_W_1, PREFIX_EVEX_MAP5_5B_W_0, PREFIX_EVEX_MAP5_5B_W_1, PREFIX_EVEX_MAP5_5C, PREFIX_EVEX_MAP5_5D, PREFIX_EVEX_MAP5_5E, PREFIX_EVEX_MAP5_5F, PREFIX_EVEX_MAP5_78, PREFIX_EVEX_MAP5_79, PREFIX_EVEX_MAP5_7A, PREFIX_EVEX_MAP5_7B, PREFIX_EVEX_MAP5_7C, PREFIX_EVEX_MAP5_7D_W_0, PREFIX_EVEX_MAP6_13, PREFIX_EVEX_MAP6_56, PREFIX_EVEX_MAP6_57, PREFIX_EVEX_MAP6_D6, PREFIX_EVEX_MAP6_D7 (enum): Add EVEX_MAP5 and EVEX_MAP6. (enum): Add EVEX_W_MAP5_5A, EVEX_W_MAP5_5B, EVEX_W_MAP5_78_P_0, EVEX_W_MAP5_78_P_2, EVEX_W_MAP5_79_P_0, EVEX_W_MAP5_79_P_2, EVEX_W_MAP5_7A_P_2, EVEX_W_MAP5_7A_P_3, EVEX_W_MAP5_7B_P_2, EVEX_W_MAP5_7C_P_0, EVEX_W_MAP5_7C_P_2, EVEX_W_MAP5_7D, EVEX_W_MAP6_13_P_0, EVEX_W_MAP6_13_P_2, (get_valid_dis386): Properly handle new instructions. (intel_operand_size): Handle new modes. (OP_E_memory): Ditto. (OP_EX): Ditto. * i386-dis-evex.h: Updated for AVX512_FP16. * i386-dis-evex-mod.h: Updated for AVX512_FP16. * i386-dis-evex-prefix.h: Updated for AVX512_FP16. * i386-dis-evex-reg.h : Updated for AVX512_FP16. * i386-dis-evex-w.h : Updated for AVX512_FP16. * i386-gen.c (cpu_flag_init): Add CPU_AVX512_FP16_FLAGS, and CPU_ANY_AVX512_FP16_FLAGS. Update CPU_ANY_AVX512F_FLAGS and CPU_ANY_AVX512BW_FLAGS. (cpu_flags): Add CpuAVX512_FP16. (opcode_modifiers): Add DistinctDest. * i386-opc.h (enum): (AVX512_FP16): New. (i386_opcode_modifier): Add reqdistinctreg. (i386_cpu_flags): Add cpuavx512_fp16. (EVEXMAP5): Defined as a macro. (EVEXMAP6): Ditto. * i386-opc.tbl: Add Intel AVX512_FP16 instructions. * i386-init.h: Regenerated. * i386-tbl.h: Ditto.
Diffstat (limited to 'gas')
-rw-r--r--gas/NEWS2
-rw-r--r--gas/config/tc-i386.c45
-rw-r--r--gas/doc/c-i386.texi4
3 files changed, 47 insertions, 4 deletions
diff --git a/gas/NEWS b/gas/NEWS
index 3803632..9e24e4d 100644
--- a/gas/NEWS
+++ b/gas/NEWS
@@ -1,5 +1,7 @@
-*- text -*-
+* Add support for Intel AVX512_FP16 instructions.
+
Changes in 2.37:
* arm-symbianelf support removed.
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 1235c3e..cdc660f 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -246,6 +246,7 @@ enum i386_error
invalid_vsib_address,
invalid_vector_register_set,
invalid_tmm_register_set,
+ invalid_dest_and_src_register_set,
unsupported_vector_index_register,
unsupported_broadcast,
broadcast_needed,
@@ -380,7 +381,7 @@ struct _i386_insn
expresses the broadcast factor. */
struct Broadcast_Operation
{
- /* Type of broadcast: {1to2}, {1to4}, {1to8}, or {1to16}. */
+ /* Type of broadcast: {1to2}, {1to4}, {1to8}, {1to16} or {1to32}. */
unsigned int type;
/* Index of broadcasted operand. */
@@ -1237,6 +1238,8 @@ static const arch_entry cpu_arch[] =
CPU_UINTR_FLAGS, 0 },
{ STRING_COMMA_LEN (".hreset"), PROCESSOR_UNKNOWN,
CPU_HRESET_FLAGS, 0 },
+ { STRING_COMMA_LEN (".avx512_fp16"), PROCESSOR_UNKNOWN,
+ CPU_AVX512_FP16_FLAGS, 0 },
};
static const noarch_entry cpu_noarch[] =
@@ -1292,6 +1295,7 @@ static const noarch_entry cpu_noarch[] =
{ STRING_COMMA_LEN ("nowidekl"), CPU_ANY_WIDEKL_FLAGS },
{ STRING_COMMA_LEN ("nouintr"), CPU_ANY_UINTR_FLAGS },
{ STRING_COMMA_LEN ("nohreset"), CPU_ANY_HRESET_FLAGS },
+ { STRING_COMMA_LEN ("noavx512_fp16"), CPU_ANY_AVX512_FP16_FLAGS },
};
#ifdef I386COFF
@@ -3270,7 +3274,7 @@ pte (insn_template *t)
{
static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
static const char *const opc_spc[] = {
- NULL, "0f", "0f38", "0f3a", NULL, NULL, NULL, NULL,
+ NULL, "0f", "0f38", "0f3a", NULL, "evexmap5", "evexmap6", NULL,
"XOP08", "XOP09", "XOP0A",
};
unsigned int j;
@@ -3865,7 +3869,7 @@ build_evex_prefix (void)
/* The high 3 bits of the second EVEX byte are 1's compliment of RXB
bits from REX. */
gas_assert (i.tm.opcode_modifier.opcodespace >= SPACE_0F);
- gas_assert (i.tm.opcode_modifier.opcodespace <= SPACE_0F3A);
+ gas_assert (i.tm.opcode_modifier.opcodespace <= SPACE_EVEXMAP6);
i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace;
/* The fifth bit of the second EVEX byte is 1's compliment of the
@@ -6088,6 +6092,24 @@ check_VecOperands (const insn_template *t)
return 1;
}
+ /* For some special instructions require that destination must be distinct
+ from source registers. */
+ if (t->opcode_modifier.distinctdest)
+ {
+ unsigned int dest_reg = i.operands - 1;
+
+ know (i.operands >= 3);
+
+ /* #UD if dest_reg == src1_reg or dest_reg == src2_reg. */
+ if (i.op[dest_reg - 1].regs == i.op[dest_reg].regs
+ || (i.reg_operands > 2
+ && i.op[dest_reg - 2].regs == i.op[dest_reg].regs))
+ {
+ i.error = invalid_dest_and_src_register_set;
+ return 1;
+ }
+ }
+
/* Check if broadcast is supported by the instruction and is applied
to the memory operand. */
if (i.broadcast.type)
@@ -6848,6 +6870,9 @@ match_template (char mnem_suffix)
case invalid_tmm_register_set:
err_msg = _("all tmm registers must be distinct");
break;
+ case invalid_dest_and_src_register_set:
+ err_msg = _("destination and source registers must be distinct");
+ break;
case unsupported_vector_index_register:
err_msg = _("unsupported vector index register");
break;
@@ -7628,6 +7653,14 @@ check_word_reg (void)
i.suffix);
return 0;
}
+ /* For some instructions need encode as EVEX.W=1 without explicit VexW1. */
+ else if (i.types[op].bitfield.qword
+ && intel_syntax
+ && i.tm.opcode_modifier.toqword)
+ {
+ /* Convert to QWORD. We want EVEX.W byte. */
+ i.suffix = QWORD_MNEM_SUFFIX;
+ }
return 1;
}
@@ -10520,6 +10553,12 @@ check_VecOperations (char *op_string)
bcst_type = 16;
op_string++;
}
+ else if (*op_string == '3'
+ && *(op_string+1) == '2')
+ {
+ bcst_type = 32;
+ op_string++;
+ }
else
{
as_bad (_("Unsupported broadcast: `%s'"), saved);
diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi
index c987dc0..9058ad4 100644
--- a/gas/doc/c-i386.texi
+++ b/gas/doc/c-i386.texi
@@ -214,6 +214,7 @@ accept various extension mnemonics. For example,
@code{tdx},
@code{avx512_bf16},
@code{avx_vnni},
+@code{avx512_fp16},
@code{noavx512f},
@code{noavx512cd},
@code{noavx512er},
@@ -233,6 +234,7 @@ accept various extension mnemonics. For example,
@code{notdx},
@code{noavx512_bf16},
@code{noavx_vnni},
+@code{noavx512_fp16},
@code{noenqcmd},
@code{noserialize},
@code{notsxldtrk},
@@ -1519,7 +1521,7 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{.avx512vbmi} @tab @samp{.avx512_4fmaps} @tab @samp{.avx512_4vnniw}
@item @samp{.avx512_vpopcntdq} @tab @samp{.avx512_vbmi2} @tab @samp{.avx512_vnni}
@item @samp{.avx512_bitalg} @tab @samp{.avx512_bf16} @tab @samp{.avx512_vp2intersect}
-@item @samp{.tdx} @tab @samp{.avx_vnni}
+@item @samp{.tdx} @tab @samp{.avx_vnni} @tab @samp{.avx512_fp16}
@item @samp{.clwb} @tab @samp{.rdpid} @tab @samp{.ptwrite} @tab @item @samp{.ibt}
@item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg} @tab @samp{.cldemote}
@item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}