diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2019-03-19 21:12:47 +0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2019-03-19 21:13:49 +0800 |
commit | 392a59728b7286d5fd1a1c377de3c40334bbb36f (patch) | |
tree | 9d2f03c918633a003f3e82c5790917b97ef7ec66 /gas/config/tc-i386.c | |
parent | 7b1d7ca194544554f7d41aea7fdf7a69c232f15d (diff) | |
download | fsf-binutils-gdb-392a59728b7286d5fd1a1c377de3c40334bbb36f.zip fsf-binutils-gdb-392a59728b7286d5fd1a1c377de3c40334bbb36f.tar.gz fsf-binutils-gdb-392a59728b7286d5fd1a1c377de3c40334bbb36f.tar.bz2 |
x86: Correct EVEX vector load/store optimization
Update EVEX vector load/store optimization:
1. There is no need to check AVX since AVX2 is required for AVX512F.
2. We need to check both operands for ZMM register since AT&T syntax
may not set zmmword on the first operand.
3. Update Opcode_SIMD_IntD check and set.
4. Since the VEX prefix has 2 or 3 bytes, the EVEX prefix has 4 bytes,
EVEX Disp8 has 1 byte and VEX Disp32 has 4 bytes, we choose EVEX Disp8
over VEX Disp32.
* config/tc-i386.c (optimize_encoding): Don't check AVX for
EVEX vector load/store optimization. Check both operands for
ZMM register. Update EVEX vector load/store opcode check.
Choose EVEX Disp8 over VEX Disp32.
* testsuite/gas/i386/optimize-1.d: Updated.
* testsuite/gas/i386/optimize-1a.d: Likewise.
* testsuite/gas/i386/optimize-2.d: Likewise.
* testsuite/gas/i386/optimize-4.d: Likewise.
* testsuite/gas/i386/optimize-5.d: Likewise.
* testsuite/gas/i386/x86-64-optimize-2.d: Likewise.
* testsuite/gas/i386/x86-64-optimize-2a.d: Likewise.
* testsuite/gas/i386/x86-64-optimize-2b.d: Likewise.
* testsuite/gas/i386/x86-64-optimize-3.d: Likewise.
* testsuite/gas/i386/x86-64-optimize-5.d: Likewise.
* testsuite/gas/i386/x86-64-optimize-6.d: Likewise.
* testsuite/gas/i386/optimize-1.s: Add ZMM register load
test.
* testsuite/gas/i386/x86-64-optimize-2.s: Likewise.
Diffstat (limited to 'gas/config/tc-i386.c')
-rw-r--r-- | gas/config/tc-i386.c | 43 |
1 files changed, 30 insertions, 13 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 3885728..690fd23 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -4068,18 +4068,14 @@ optimize_encoding (void) i.types[j].bitfield.ymmword = 0; } } - else if ((cpu_arch_flags.bitfield.cpuavx - || cpu_arch_isa_flags.bitfield.cpuavx) - && i.vec_encoding != vex_encoding_evex + else if (i.vec_encoding != vex_encoding_evex && !i.types[0].bitfield.zmmword + && !i.types[1].bitfield.zmmword && !i.mask && is_evex_encoding (&i.tm) - && (i.tm.base_opcode == 0x666f - || (i.tm.base_opcode ^ Opcode_SIMD_IntD) == 0x666f - || i.tm.base_opcode == 0xf36f - || (i.tm.base_opcode ^ Opcode_SIMD_IntD) == 0xf36f - || i.tm.base_opcode == 0xf26f - || (i.tm.base_opcode ^ Opcode_SIMD_IntD) == 0xf26f) + && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x666f + || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf36f + || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f) && i.tm.extension_opcode == None) { /* Optimize: -O1: @@ -4098,10 +4094,31 @@ optimize_encoding (void) EVEX VOP mem, %ymmN -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16) */ - if (i.tm.base_opcode == 0xf26f) - i.tm.base_opcode = 0xf36f; - else if ((i.tm.base_opcode ^ Opcode_SIMD_IntD) == 0xf26f) - i.tm.base_opcode = 0xf36f ^ Opcode_SIMD_IntD; + for (j = 0; j < 2; j++) + if (operand_type_check (i.types[j], disp) + && i.op[j].disps->X_op == O_constant) + { + /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix + has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4 + bytes, we choose EVEX Disp8 over VEX Disp32. */ + int evex_disp8, vex_disp8; + unsigned int memshift = i.memshift; + offsetT n = i.op[j].disps->X_add_number; + + evex_disp8 = fits_in_disp8 (n); + i.memshift = 0; + vex_disp8 = fits_in_disp8 (n); + if (evex_disp8 != vex_disp8) + { + i.memshift = memshift; + return; + } + + i.types[j].bitfield.disp8 = vex_disp8; + break; + } + if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f) + i.tm.base_opcode ^= 0xf36f ^ 0xf26f; i.tm.opcode_modifier.vex = i.types[0].bitfield.ymmword ? VEX256 : VEX128; i.tm.opcode_modifier.vexw = VEXW0; |