aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@suse.com>2023-11-24 09:55:29 +0100
committerJan Beulich <jbeulich@suse.com>2023-11-24 09:55:29 +0100
commit39bb3ade816faf42ec34cc7ca962ad350cb4d16a (patch)
tree20ebf46bd6a4f68bb3d7c6e8fcd3c4ae26f889c2
parenteb5e952f95423bc6ae18457ccc359c8b6c0fa387 (diff)
downloadfsf-binutils-gdb-39bb3ade816faf42ec34cc7ca962ad350cb4d16a.zip
fsf-binutils-gdb-39bb3ade816faf42ec34cc7ca962ad350cb4d16a.tar.gz
fsf-binutils-gdb-39bb3ade816faf42ec34cc7ca962ad350cb4d16a.tar.bz2
x86: also prefer VEX encoding over EVEX one for VCVTNEPS2BF16 when possible
Deal with what 58bceb182740 ("x86: prefer VEX encodings over EVEX ones when possible") left out, for being slightly less straightforward.
-rw-r--r--gas/config/tc-i386.c20
-rw-r--r--gas/testsuite/gas/i386/avx-vex.l48
-rw-r--r--gas/testsuite/gas/i386/avx-vex.s7
-rw-r--r--opcodes/i386-opc.tbl6
4 files changed, 68 insertions, 13 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 2651cd2..71e0c4d 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -7475,20 +7475,27 @@ match_template (char mnem_suffix)
}
/* Check whether to use the shorter VEX encoding for certain insns where
- the EVEX enconding comes first in the table. This requires the respective
- AVX-* feature to be explicitly enabled. */
- if (t == current_templates->start
+ the EVEX encoding comes first in the table. This requires the respective
+ AVX-* feature to be explicitly enabled.
+
+ Most of the respective insns have just a single EVEX and a single VEX
+ template. The one that's presently different is generated using the
+ Vxy / Exy constructs: There are 3 suffix-less EVEX forms, the latter
+ two of which may fall back to their two corresponding VEX forms. */
+ j = t->mnem_off != MN_vcvtneps2bf16 ? 1 : 2;
+ if ((t == current_templates->start || j > 1)
&& t->opcode_modifier.disp8memshift
&& !t->opcode_modifier.vex
&& !need_evex_encoding ()
- && t + 1 < current_templates->end
- && t[1].opcode_modifier.vex)
+ && t + j < current_templates->end
+ && t[j].opcode_modifier.vex)
{
i386_cpu_flags cpu;
unsigned int memshift = i.memshift;
i.memshift = 0;
- cpu = cpu_flags_and (cpu_flags_from_attr (t[1].cpu), cpu_arch_isa_flags);
+ cpu = cpu_flags_and (cpu_flags_from_attr (t[j].cpu),
+ cpu_arch_isa_flags);
if (!cpu_flags_all_zero (&cpu)
&& (!i.types[0].bitfield.disp8
|| !operand_type_check (i.types[0], disp)
@@ -7496,6 +7503,7 @@ match_template (char mnem_suffix)
|| fits_in_disp8 (i.op[0].disps->X_add_number)))
{
specific_error = progress (internal_error);
+ t += j - 1;
continue;
}
i.memshift = memshift;
diff --git a/gas/testsuite/gas/i386/avx-vex.l b/gas/testsuite/gas/i386/avx-vex.l
index e409be3..d93768e 100644
--- a/gas/testsuite/gas/i386/avx-vex.l
+++ b/gas/testsuite/gas/i386/avx-vex.l
@@ -27,7 +27,8 @@
[ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
[ ]*[0-9]+[ ]+B410
[ ]*[0-9]+[ ]+> *
-[ ]*[0-9]+[ ]+>.*
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 %ymm0,%xmm1
+[ ]*[0-9]+[ ]+72C8
[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y %ymm0,%xmm1
[ ]*[0-9]+[ ]+72C8
[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y 0x20\(%eax\),%xmm1
@@ -36,6 +37,12 @@
[ ]*[0-9]+[ ]+724808
[ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
[ ]*[0-9]+[ ]+7208
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+> \.intel_syntax noprefix
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E08 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\]
+[ ]*[0-9]+[ ]+7201
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\]
+[ ]*[0-9]+[ ]+7201
#...
[ ]*[0-9]+[ ]+> \.arch \.noavx512vl
[ ]*[0-9]+[ ]+> *
@@ -56,7 +63,8 @@
[ ]*[0-9]+[ ]+00
[ ]*[0-9]+[ ]+> vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
[ ]*[0-9]+[ ]+> *
-[ ]*[0-9]+[ ]+>.*
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 %ymm0,%xmm1
+[ ]*[0-9]+[ ]+C8
[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y %ymm0,%xmm1
[ ]*[0-9]+[ ]+C8
[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y 0x20\(%eax\),%xmm1
@@ -65,6 +73,12 @@
[ ]*[0-9]+[ ]+88000100 *
[ ]*[0-9]+[ ]+00
[ ]*[0-9]+[ ]+> vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+> \.intel_syntax noprefix
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27A72 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\]
+[ ]*[0-9]+[ ]+01
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\]
+[ ]*[0-9]+[ ]+01
#...
[ ]*[0-9]+[ ]+> \.arch \.noavx512f
[ ]*[0-9]+[ ]+> *
@@ -85,7 +99,8 @@
[ ]*[0-9]+[ ]+00
[ ]*[0-9]+[ ]+> vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
[ ]*[0-9]+[ ]+> *
-[ ]*[0-9]+[ ]+>.*
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 %ymm0,%xmm1
+[ ]*[0-9]+[ ]+C8
[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y %ymm0,%xmm1
[ ]*[0-9]+[ ]+C8
[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y 0x20\(%eax\),%xmm1
@@ -113,7 +128,8 @@
[ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
[ ]*[0-9]+[ ]+B410
[ ]*[0-9]+[ ]+> *
-[ ]*[0-9]+[ ]+>.*
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 %ymm0,%xmm1
+[ ]*[0-9]+[ ]+72C8
[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y %ymm0,%xmm1
[ ]*[0-9]+[ ]+72C8
[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y 0x20\(%eax\),%xmm1
@@ -122,6 +138,12 @@
[ ]*[0-9]+[ ]+724808
[ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
[ ]*[0-9]+[ ]+7208
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+> \.intel_syntax noprefix
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E08 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\]
+[ ]*[0-9]+[ ]+7201
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\]
+[ ]*[0-9]+[ ]+7201
#...
[ ]*[0-9]+[ ]+> \.arch \.avx_ifma
[ ]*[0-9]+[ ]+> *
@@ -141,7 +163,8 @@
[ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
[ ]*[0-9]+[ ]+B410
[ ]*[0-9]+[ ]+> *
-[ ]*[0-9]+[ ]+>.*
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 %ymm0,%xmm1
+[ ]*[0-9]+[ ]+72C8
[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y %ymm0,%xmm1
[ ]*[0-9]+[ ]+72C8
[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y 0x20\(%eax\),%xmm1
@@ -150,6 +173,12 @@
[ ]*[0-9]+[ ]+724808
[ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
[ ]*[0-9]+[ ]+7208
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+> \.intel_syntax noprefix
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E08 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\]
+[ ]*[0-9]+[ ]+7201
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\]
+[ ]*[0-9]+[ ]+7201
#...
[ ]*[0-9]+[ ]+> \.arch \.avx_ne_convert
[ ]*[0-9]+[ ]+> *
@@ -169,7 +198,8 @@
[ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
[ ]*[0-9]+[ ]+B410
[ ]*[0-9]+[ ]+> *
-[ ]*[0-9]+[ ]+>.*
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 %ymm0,%xmm1
+[ ]*[0-9]+[ ]+C8
[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y %ymm0,%xmm1
[ ]*[0-9]+[ ]+C8
[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y 0x20\(%eax\),%xmm1
@@ -178,4 +208,10 @@
[ ]*[0-9]+[ ]+724808
[ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
[ ]*[0-9]+[ ]+7208
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+> \.intel_syntax noprefix
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27A72 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\]
+[ ]*[0-9]+[ ]+01
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\]
+[ ]*[0-9]+[ ]+01
#pass
diff --git a/gas/testsuite/gas/i386/avx-vex.s b/gas/testsuite/gas/i386/avx-vex.s
index 1d87738..0490853 100644
--- a/gas/testsuite/gas/i386/avx-vex.s
+++ b/gas/testsuite/gas/i386/avx-vex.s
@@ -14,10 +14,15 @@
vpmadd52luq 0x100(%eax), %ymm1, %ymm2
vpmadd52luq (%eax){1to4}, %ymm1, %ymm2
-# vcvtneps2bf16 %ymm0, %xmm1
+ vcvtneps2bf16 %ymm0, %xmm1
vcvtneps2bf16y %ymm0, %xmm1
vcvtneps2bf16y 0x20(%eax), %xmm1
vcvtneps2bf16y 0x100(%eax), %xmm1
vcvtneps2bf16y (%eax){1to8}, %xmm1
+ .intel_syntax noprefix
+ vcvtneps2bf16 xmm0, xmmword ptr [ecx]
+ vcvtneps2bf16 xmm0, ymmword ptr [ecx]
+ .att_syntax prefix
+
.endr
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 167c0a0..b170d70 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -1481,6 +1481,9 @@ gf2p8mulb<gfni>, 0x660f38cf, <gfni:cpu>GFNI, Modrm|<gfni:w0>|NoSuf, { RegXMM|Uns
true_us:1f:C>
// <Vxy> is used for VEX instructions with x/y suffixes.
+// NOTE: The order of the "unnamed" ($-prefixed) entries here needs to remain
+// in sync with <Exy>, for match_template()'s EVEX-to-VEX lowering to
+// continue to work.
<Vxy:vex:syntax:src, +
$i:Vex:IntelSyntax:RegXMM|RegYMM|Unspecified|BaseIndex, +
$a:Vex:ATTSyntax:RegXMM|RegYMM, +
@@ -2097,6 +2100,9 @@ vpclmulhqhqdq, 0x6644/0x11, VPCLMULQDQ&(AVX|AVX512F), Modrm|Space0F3A|Vex|EVexDY
// AVX512F instructions.
// <Exy> is used for EVEX instructions with x/y suffixes.
+// NOTE: The order of the "unnamed" ($-prefixed) entries here needs to remain
+// in sync with <Vxy>, for match_template()'s EVEX-to-VEX lowering to
+// continue to work.
<Exy:vl:attr:sr:sae:src:dst, +
$z::EVex512|Disp8MemShift=6:StaticRounding|SAE:SAE:RegZMM|Unspecified|BaseIndex:RegYMM, +
$i:AVX512VL:Disp8ShiftVL|IntelSyntax:::RegXMM|RegYMM|Unspecified|BaseIndex:RegXMM, +