diff options
author | Jan Beulich <jbeulich@suse.com> | 2023-11-24 09:55:29 +0100 |
---|---|---|
committer | Jan Beulich <jbeulich@suse.com> | 2023-11-24 09:55:29 +0100 |
commit | 39bb3ade816faf42ec34cc7ca962ad350cb4d16a (patch) | |
tree | 20ebf46bd6a4f68bb3d7c6e8fcd3c4ae26f889c2 /gas | |
parent | eb5e952f95423bc6ae18457ccc359c8b6c0fa387 (diff) | |
download | binutils-39bb3ade816faf42ec34cc7ca962ad350cb4d16a.zip binutils-39bb3ade816faf42ec34cc7ca962ad350cb4d16a.tar.gz binutils-39bb3ade816faf42ec34cc7ca962ad350cb4d16a.tar.bz2 |
x86: also prefer VEX encoding over EVEX one for VCVTNEPS2BF16 when possible
Deal with what 58bceb182740 ("x86: prefer VEX encodings over EVEX ones
when possible") left out, for being slightly less straightforward.
Diffstat (limited to 'gas')
-rw-r--r-- | gas/config/tc-i386.c | 20 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/avx-vex.l | 48 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/avx-vex.s | 7 |
3 files changed, 62 insertions, 13 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 2651cd2..71e0c4d 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -7475,20 +7475,27 @@ match_template (char mnem_suffix) } /* Check whether to use the shorter VEX encoding for certain insns where - the EVEX enconding comes first in the table. This requires the respective - AVX-* feature to be explicitly enabled. */ - if (t == current_templates->start + the EVEX encoding comes first in the table. This requires the respective + AVX-* feature to be explicitly enabled. + + Most of the respective insns have just a single EVEX and a single VEX + template. The one that's presently different is generated using the + Vxy / Exy constructs: There are 3 suffix-less EVEX forms, the latter + two of which may fall back to their two corresponding VEX forms. */ + j = t->mnem_off != MN_vcvtneps2bf16 ? 1 : 2; + if ((t == current_templates->start || j > 1) && t->opcode_modifier.disp8memshift && !t->opcode_modifier.vex && !need_evex_encoding () - && t + 1 < current_templates->end - && t[1].opcode_modifier.vex) + && t + j < current_templates->end + && t[j].opcode_modifier.vex) { i386_cpu_flags cpu; unsigned int memshift = i.memshift; i.memshift = 0; - cpu = cpu_flags_and (cpu_flags_from_attr (t[1].cpu), cpu_arch_isa_flags); + cpu = cpu_flags_and (cpu_flags_from_attr (t[j].cpu), + cpu_arch_isa_flags); if (!cpu_flags_all_zero (&cpu) && (!i.types[0].bitfield.disp8 || !operand_type_check (i.types[0], disp) @@ -7496,6 +7503,7 @@ match_template (char mnem_suffix) || fits_in_disp8 (i.op[0].disps->X_add_number))) { specific_error = progress (internal_error); + t += j - 1; continue; } i.memshift = memshift; diff --git a/gas/testsuite/gas/i386/avx-vex.l b/gas/testsuite/gas/i386/avx-vex.l index e409be3..d93768e 100644 --- a/gas/testsuite/gas/i386/avx-vex.l +++ b/gas/testsuite/gas/i386/avx-vex.l @@ -27,7 +27,8 @@ [ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2 [ ]*[0-9]+[ ]+B410 [ ]*[0-9]+[ ]+> * -[ ]*[0-9]+[ ]+>.* +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 %ymm0,%xmm1 +[ ]*[0-9]+[ ]+72C8 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y %ymm0,%xmm1 [ ]*[0-9]+[ ]+72C8 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y 0x20\(%eax\),%xmm1 @@ -36,6 +37,12 @@ [ ]*[0-9]+[ ]+724808 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1 [ ]*[0-9]+[ ]+7208 +[ ]*[0-9]+[ ]+> * +[ ]*[0-9]+[ ]+> \.intel_syntax noprefix +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E08 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\] +[ ]*[0-9]+[ ]+7201 +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\] +[ ]*[0-9]+[ ]+7201 #... [ ]*[0-9]+[ ]+> \.arch \.noavx512vl [ ]*[0-9]+[ ]+> * @@ -56,7 +63,8 @@ [ ]*[0-9]+[ ]+00 [ ]*[0-9]+[ ]+> vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2 [ ]*[0-9]+[ ]+> * -[ ]*[0-9]+[ ]+>.* +[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 %ymm0,%xmm1 +[ ]*[0-9]+[ ]+C8 [ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y %ymm0,%xmm1 [ ]*[0-9]+[ ]+C8 [ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y 0x20\(%eax\),%xmm1 @@ -65,6 +73,12 @@ [ ]*[0-9]+[ ]+88000100 * [ ]*[0-9]+[ ]+00 [ ]*[0-9]+[ ]+> vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1 +[ ]*[0-9]+[ ]+> * +[ ]*[0-9]+[ ]+> \.intel_syntax noprefix +[ ]*[0-9]+[ ]+\?\?\?\? C4E27A72 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\] +[ ]*[0-9]+[ ]+01 +[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\] +[ ]*[0-9]+[ ]+01 #... [ ]*[0-9]+[ ]+> \.arch \.noavx512f [ ]*[0-9]+[ ]+> * @@ -85,7 +99,8 @@ [ ]*[0-9]+[ ]+00 [ ]*[0-9]+[ ]+> vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2 [ ]*[0-9]+[ ]+> * -[ ]*[0-9]+[ ]+>.* +[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 %ymm0,%xmm1 +[ ]*[0-9]+[ ]+C8 [ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y %ymm0,%xmm1 [ ]*[0-9]+[ ]+C8 [ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y 0x20\(%eax\),%xmm1 @@ -113,7 +128,8 @@ [ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2 [ ]*[0-9]+[ ]+B410 [ ]*[0-9]+[ ]+> * -[ ]*[0-9]+[ ]+>.* +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 %ymm0,%xmm1 +[ ]*[0-9]+[ ]+72C8 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y %ymm0,%xmm1 [ ]*[0-9]+[ ]+72C8 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y 0x20\(%eax\),%xmm1 @@ -122,6 +138,12 @@ [ ]*[0-9]+[ ]+724808 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1 [ ]*[0-9]+[ ]+7208 +[ ]*[0-9]+[ ]+> * +[ ]*[0-9]+[ ]+> \.intel_syntax noprefix +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E08 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\] +[ ]*[0-9]+[ ]+7201 +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\] +[ ]*[0-9]+[ ]+7201 #... [ ]*[0-9]+[ ]+> \.arch \.avx_ifma [ ]*[0-9]+[ ]+> * @@ -141,7 +163,8 @@ [ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2 [ ]*[0-9]+[ ]+B410 [ ]*[0-9]+[ ]+> * -[ ]*[0-9]+[ ]+>.* +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 %ymm0,%xmm1 +[ ]*[0-9]+[ ]+72C8 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y %ymm0,%xmm1 [ ]*[0-9]+[ ]+72C8 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y 0x20\(%eax\),%xmm1 @@ -150,6 +173,12 @@ [ ]*[0-9]+[ ]+724808 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1 [ ]*[0-9]+[ ]+7208 +[ ]*[0-9]+[ ]+> * +[ ]*[0-9]+[ ]+> \.intel_syntax noprefix +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E08 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\] +[ ]*[0-9]+[ ]+7201 +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\] +[ ]*[0-9]+[ ]+7201 #... [ ]*[0-9]+[ ]+> \.arch \.avx_ne_convert [ ]*[0-9]+[ ]+> * @@ -169,7 +198,8 @@ [ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2 [ ]*[0-9]+[ ]+B410 [ ]*[0-9]+[ ]+> * -[ ]*[0-9]+[ ]+>.* +[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 %ymm0,%xmm1 +[ ]*[0-9]+[ ]+C8 [ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y %ymm0,%xmm1 [ ]*[0-9]+[ ]+C8 [ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y 0x20\(%eax\),%xmm1 @@ -178,4 +208,10 @@ [ ]*[0-9]+[ ]+724808 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1 [ ]*[0-9]+[ ]+7208 +[ ]*[0-9]+[ ]+> * +[ ]*[0-9]+[ ]+> \.intel_syntax noprefix +[ ]*[0-9]+[ ]+\?\?\?\? C4E27A72 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\] +[ ]*[0-9]+[ ]+01 +[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\] +[ ]*[0-9]+[ ]+01 #pass diff --git a/gas/testsuite/gas/i386/avx-vex.s b/gas/testsuite/gas/i386/avx-vex.s index 1d87738..0490853 100644 --- a/gas/testsuite/gas/i386/avx-vex.s +++ b/gas/testsuite/gas/i386/avx-vex.s @@ -14,10 +14,15 @@ vpmadd52luq 0x100(%eax), %ymm1, %ymm2 vpmadd52luq (%eax){1to4}, %ymm1, %ymm2 -# vcvtneps2bf16 %ymm0, %xmm1 + vcvtneps2bf16 %ymm0, %xmm1 vcvtneps2bf16y %ymm0, %xmm1 vcvtneps2bf16y 0x20(%eax), %xmm1 vcvtneps2bf16y 0x100(%eax), %xmm1 vcvtneps2bf16y (%eax){1to8}, %xmm1 + .intel_syntax noprefix + vcvtneps2bf16 xmm0, xmmword ptr [ecx] + vcvtneps2bf16 xmm0, ymmword ptr [ecx] + .att_syntax prefix + .endr |