diff options
author | Jan Beulich <jbeulich@suse.com> | 2023-11-24 09:55:29 +0100 |
---|---|---|
committer | Jan Beulich <jbeulich@suse.com> | 2023-11-24 09:55:29 +0100 |
commit | 39bb3ade816faf42ec34cc7ca962ad350cb4d16a (patch) | |
tree | 20ebf46bd6a4f68bb3d7c6e8fcd3c4ae26f889c2 | |
parent | eb5e952f95423bc6ae18457ccc359c8b6c0fa387 (diff) | |
download | fsf-binutils-gdb-39bb3ade816faf42ec34cc7ca962ad350cb4d16a.zip fsf-binutils-gdb-39bb3ade816faf42ec34cc7ca962ad350cb4d16a.tar.gz fsf-binutils-gdb-39bb3ade816faf42ec34cc7ca962ad350cb4d16a.tar.bz2 |
x86: also prefer VEX encoding over EVEX one for VCVTNEPS2BF16 when possible
Deal with what 58bceb182740 ("x86: prefer VEX encodings over EVEX ones
when possible") left out, for being slightly less straightforward.
-rw-r--r-- | gas/config/tc-i386.c | 20 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/avx-vex.l | 48 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/avx-vex.s | 7 | ||||
-rw-r--r-- | opcodes/i386-opc.tbl | 6 |
4 files changed, 68 insertions, 13 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 2651cd2..71e0c4d 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -7475,20 +7475,27 @@ match_template (char mnem_suffix) } /* Check whether to use the shorter VEX encoding for certain insns where - the EVEX enconding comes first in the table. This requires the respective - AVX-* feature to be explicitly enabled. */ - if (t == current_templates->start + the EVEX encoding comes first in the table. This requires the respective + AVX-* feature to be explicitly enabled. + + Most of the respective insns have just a single EVEX and a single VEX + template. The one that's presently different is generated using the + Vxy / Exy constructs: There are 3 suffix-less EVEX forms, the latter + two of which may fall back to their two corresponding VEX forms. */ + j = t->mnem_off != MN_vcvtneps2bf16 ? 1 : 2; + if ((t == current_templates->start || j > 1) && t->opcode_modifier.disp8memshift && !t->opcode_modifier.vex && !need_evex_encoding () - && t + 1 < current_templates->end - && t[1].opcode_modifier.vex) + && t + j < current_templates->end + && t[j].opcode_modifier.vex) { i386_cpu_flags cpu; unsigned int memshift = i.memshift; i.memshift = 0; - cpu = cpu_flags_and (cpu_flags_from_attr (t[1].cpu), cpu_arch_isa_flags); + cpu = cpu_flags_and (cpu_flags_from_attr (t[j].cpu), + cpu_arch_isa_flags); if (!cpu_flags_all_zero (&cpu) && (!i.types[0].bitfield.disp8 || !operand_type_check (i.types[0], disp) @@ -7496,6 +7503,7 @@ match_template (char mnem_suffix) || fits_in_disp8 (i.op[0].disps->X_add_number))) { specific_error = progress (internal_error); + t += j - 1; continue; } i.memshift = memshift; diff --git a/gas/testsuite/gas/i386/avx-vex.l b/gas/testsuite/gas/i386/avx-vex.l index e409be3..d93768e 100644 --- a/gas/testsuite/gas/i386/avx-vex.l +++ b/gas/testsuite/gas/i386/avx-vex.l @@ -27,7 +27,8 @@ [ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2 [ ]*[0-9]+[ ]+B410 [ ]*[0-9]+[ ]+> * -[ ]*[0-9]+[ ]+>.* +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 %ymm0,%xmm1 +[ ]*[0-9]+[ ]+72C8 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y %ymm0,%xmm1 [ ]*[0-9]+[ ]+72C8 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y 0x20\(%eax\),%xmm1 @@ -36,6 +37,12 @@ [ ]*[0-9]+[ ]+724808 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1 [ ]*[0-9]+[ ]+7208 +[ ]*[0-9]+[ ]+> * +[ ]*[0-9]+[ ]+> \.intel_syntax noprefix +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E08 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\] +[ ]*[0-9]+[ ]+7201 +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\] +[ ]*[0-9]+[ ]+7201 #... [ ]*[0-9]+[ ]+> \.arch \.noavx512vl [ ]*[0-9]+[ ]+> * @@ -56,7 +63,8 @@ [ ]*[0-9]+[ ]+00 [ ]*[0-9]+[ ]+> vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2 [ ]*[0-9]+[ ]+> * -[ ]*[0-9]+[ ]+>.* +[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 %ymm0,%xmm1 +[ ]*[0-9]+[ ]+C8 [ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y %ymm0,%xmm1 [ ]*[0-9]+[ ]+C8 [ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y 0x20\(%eax\),%xmm1 @@ -65,6 +73,12 @@ [ ]*[0-9]+[ ]+88000100 * [ ]*[0-9]+[ ]+00 [ ]*[0-9]+[ ]+> vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1 +[ ]*[0-9]+[ ]+> * +[ ]*[0-9]+[ ]+> \.intel_syntax noprefix +[ ]*[0-9]+[ ]+\?\?\?\? C4E27A72 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\] +[ ]*[0-9]+[ ]+01 +[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\] +[ ]*[0-9]+[ ]+01 #... [ ]*[0-9]+[ ]+> \.arch \.noavx512f [ ]*[0-9]+[ ]+> * @@ -85,7 +99,8 @@ [ ]*[0-9]+[ ]+00 [ ]*[0-9]+[ ]+> vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2 [ ]*[0-9]+[ ]+> * -[ ]*[0-9]+[ ]+>.* +[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 %ymm0,%xmm1 +[ ]*[0-9]+[ ]+C8 [ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y %ymm0,%xmm1 [ ]*[0-9]+[ ]+C8 [ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y 0x20\(%eax\),%xmm1 @@ -113,7 +128,8 @@ [ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2 [ ]*[0-9]+[ ]+B410 [ ]*[0-9]+[ ]+> * -[ ]*[0-9]+[ ]+>.* +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 %ymm0,%xmm1 +[ ]*[0-9]+[ ]+72C8 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y %ymm0,%xmm1 [ ]*[0-9]+[ ]+72C8 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y 0x20\(%eax\),%xmm1 @@ -122,6 +138,12 @@ [ ]*[0-9]+[ ]+724808 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1 [ ]*[0-9]+[ ]+7208 +[ ]*[0-9]+[ ]+> * +[ ]*[0-9]+[ ]+> \.intel_syntax noprefix +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E08 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\] +[ ]*[0-9]+[ ]+7201 +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\] +[ ]*[0-9]+[ ]+7201 #... [ ]*[0-9]+[ ]+> \.arch \.avx_ifma [ ]*[0-9]+[ ]+> * @@ -141,7 +163,8 @@ [ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2 [ ]*[0-9]+[ ]+B410 [ ]*[0-9]+[ ]+> * -[ ]*[0-9]+[ ]+>.* +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 %ymm0,%xmm1 +[ ]*[0-9]+[ ]+72C8 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y %ymm0,%xmm1 [ ]*[0-9]+[ ]+72C8 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y 0x20\(%eax\),%xmm1 @@ -150,6 +173,12 @@ [ ]*[0-9]+[ ]+724808 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1 [ ]*[0-9]+[ ]+7208 +[ ]*[0-9]+[ ]+> * +[ ]*[0-9]+[ ]+> \.intel_syntax noprefix +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E08 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\] +[ ]*[0-9]+[ ]+7201 +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\] +[ ]*[0-9]+[ ]+7201 #... [ ]*[0-9]+[ ]+> \.arch \.avx_ne_convert [ ]*[0-9]+[ ]+> * @@ -169,7 +198,8 @@ [ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2 [ ]*[0-9]+[ ]+B410 [ ]*[0-9]+[ ]+> * -[ ]*[0-9]+[ ]+>.* +[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 %ymm0,%xmm1 +[ ]*[0-9]+[ ]+C8 [ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y %ymm0,%xmm1 [ ]*[0-9]+[ ]+C8 [ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y 0x20\(%eax\),%xmm1 @@ -178,4 +208,10 @@ [ ]*[0-9]+[ ]+724808 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1 [ ]*[0-9]+[ ]+7208 +[ ]*[0-9]+[ ]+> * +[ ]*[0-9]+[ ]+> \.intel_syntax noprefix +[ ]*[0-9]+[ ]+\?\?\?\? C4E27A72 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\] +[ ]*[0-9]+[ ]+01 +[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\] +[ ]*[0-9]+[ ]+01 #pass diff --git a/gas/testsuite/gas/i386/avx-vex.s b/gas/testsuite/gas/i386/avx-vex.s index 1d87738..0490853 100644 --- a/gas/testsuite/gas/i386/avx-vex.s +++ b/gas/testsuite/gas/i386/avx-vex.s @@ -14,10 +14,15 @@ vpmadd52luq 0x100(%eax), %ymm1, %ymm2 vpmadd52luq (%eax){1to4}, %ymm1, %ymm2 -# vcvtneps2bf16 %ymm0, %xmm1 + vcvtneps2bf16 %ymm0, %xmm1 vcvtneps2bf16y %ymm0, %xmm1 vcvtneps2bf16y 0x20(%eax), %xmm1 vcvtneps2bf16y 0x100(%eax), %xmm1 vcvtneps2bf16y (%eax){1to8}, %xmm1 + .intel_syntax noprefix + vcvtneps2bf16 xmm0, xmmword ptr [ecx] + vcvtneps2bf16 xmm0, ymmword ptr [ecx] + .att_syntax prefix + .endr diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index 167c0a0..b170d70 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -1481,6 +1481,9 @@ gf2p8mulb<gfni>, 0x660f38cf, <gfni:cpu>GFNI, Modrm|<gfni:w0>|NoSuf, { RegXMM|Uns true_us:1f:C> // <Vxy> is used for VEX instructions with x/y suffixes. +// NOTE: The order of the "unnamed" ($-prefixed) entries here needs to remain +// in sync with <Exy>, for match_template()'s EVEX-to-VEX lowering to +// continue to work. <Vxy:vex:syntax:src, + $i:Vex:IntelSyntax:RegXMM|RegYMM|Unspecified|BaseIndex, + $a:Vex:ATTSyntax:RegXMM|RegYMM, + @@ -2097,6 +2100,9 @@ vpclmulhqhqdq, 0x6644/0x11, VPCLMULQDQ&(AVX|AVX512F), Modrm|Space0F3A|Vex|EVexDY // AVX512F instructions. // <Exy> is used for EVEX instructions with x/y suffixes. +// NOTE: The order of the "unnamed" ($-prefixed) entries here needs to remain +// in sync with <Vxy>, for match_template()'s EVEX-to-VEX lowering to +// continue to work. <Exy:vl:attr:sr:sae:src:dst, + $z::EVex512|Disp8MemShift=6:StaticRounding|SAE:SAE:RegZMM|Unspecified|BaseIndex:RegYMM, + $i:AVX512VL:Disp8ShiftVL|IntelSyntax:::RegXMM|RegYMM|Unspecified|BaseIndex:RegXMM, + |