diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2019-03-18 08:56:10 +0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2019-03-18 08:58:19 +0800 |
commit | 97ed31ae00ea83410f9daf61ece8a606044af365 (patch) | |
tree | 9afcfdc297efe11f38e852d4c2509e6039a23390 /gas/config/tc-i386.c | |
parent | 7bc0961cfec1f138a3127e8f210909aa430c425f (diff) | |
download | fsf-binutils-gdb-97ed31ae00ea83410f9daf61ece8a606044af365.zip fsf-binutils-gdb-97ed31ae00ea83410f9daf61ece8a606044af365.tar.gz fsf-binutils-gdb-97ed31ae00ea83410f9daf61ece8a606044af365.tar.bz2 |
x86: Optimize EVEX vector load/store instructions
When there is no write mask, we can encode lower 16 128-bit/256-bit
EVEX vector register load and store instructions as VEX vector register
load and store instructions with -O1.
gas/
PR gas/24348
* config/tc-i386.c (optimize_encoding): Encode 128-bit and
256-bit EVEX vector register load/store instructions as VEX
vector register load/store instructions for -O1.
* doc/c-i386.texi: Update -O1 documentation.
* testsuite/gas/i386/i386.exp: Run PR gas/24348 tests.
* testsuite/gas/i386/optimize-1.s: Add tests for EVEX vector
load/store instructions.
* testsuite/gas/i386/optimize-2.s: Likewise.
* testsuite/gas/i386/optimize-3.s: Likewise.
* testsuite/gas/i386/optimize-5.s: Likewise.
* testsuite/gas/i386/x86-64-optimize-2.s: Likewise.
* testsuite/gas/i386/x86-64-optimize-3.s: Likewise.
* testsuite/gas/i386/x86-64-optimize-4.s: Likewise.
* testsuite/gas/i386/x86-64-optimize-5.s: Likewise.
* testsuite/gas/i386/x86-64-optimize-6.s: Likewise.
* testsuite/gas/i386/optimize-1.d: Updated.
* testsuite/gas/i386/optimize-2.d: Likewise.
* testsuite/gas/i386/optimize-3.d: Likewise.
* testsuite/gas/i386/optimize-4.d: Likewise.
* testsuite/gas/i386/optimize-5.d: Likewise.
* testsuite/gas/i386/x86-64-optimize-2.d: Likewise.
* testsuite/gas/i386/x86-64-optimize-3.d: Likewise.
* testsuite/gas/i386/x86-64-optimize-4.d: Likewise.
* testsuite/gas/i386/x86-64-optimize-5.d: Likewise.
* testsuite/gas/i386/x86-64-optimize-6.d: Likewise.
* testsuite/gas/i386/optimize-7.d: New file.
* testsuite/gas/i386/optimize-7.s: Likewise.
* testsuite/gas/i386/x86-64-optimize-8.d: Likewise.
* testsuite/gas/i386/x86-64-optimize-8.s: Likewise.
opcodes/
PR gas/24348
* i386-opc.tbl: Add Optimize to vmovdqa32, vmovdqa64, vmovdqu8,
vmovdqu16, vmovdqu32 and vmovdqu64.
* i386-tbl.h: Regenerated.
Diffstat (limited to 'gas/config/tc-i386.c')
-rw-r--r-- | gas/config/tc-i386.c | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 856c18d..fa06075 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -4075,6 +4075,56 @@ optimize_encoding (void) i.types[j].bitfield.ymmword = 0; } } + else if ((cpu_arch_flags.bitfield.cpuavx + || cpu_arch_isa_flags.bitfield.cpuavx) + && i.vec_encoding != vex_encoding_evex + && !i.types[0].bitfield.zmmword + && !i.mask + && is_evex_encoding (&i.tm) + && (i.tm.base_opcode == 0x666f + || (i.tm.base_opcode ^ Opcode_SIMD_IntD) == 0x666f + || i.tm.base_opcode == 0xf36f + || (i.tm.base_opcode ^ Opcode_SIMD_IntD) == 0xf36f + || i.tm.base_opcode == 0xf26f + || (i.tm.base_opcode ^ Opcode_SIMD_IntD) == 0xf26f) + && i.tm.extension_opcode == None) + { + /* Optimize: -O1: + VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16, + vmovdqu32 and vmovdqu64: + EVEX VOP %xmmM, %xmmN + -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16) + EVEX VOP %ymmM, %ymmN + -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16) + EVEX VOP %xmmM, mem + -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16) + EVEX VOP %ymmM, mem + -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16) + EVEX VOP mem, %xmmN + -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16) + EVEX VOP mem, %ymmN + -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16) + */ + if (i.tm.base_opcode == 0xf26f) + i.tm.base_opcode = 0xf36f; + else if ((i.tm.base_opcode ^ Opcode_SIMD_IntD) == 0xf26f) + i.tm.base_opcode = 0xf36f ^ Opcode_SIMD_IntD; + i.tm.opcode_modifier.vex + = i.types[0].bitfield.ymmword ? VEX256 : VEX128; + i.tm.opcode_modifier.vexw = VEXW0; + i.tm.opcode_modifier.evex = 0; + i.tm.opcode_modifier.masking = 0; + i.tm.opcode_modifier.disp8memshift = 0; + i.memshift = 0; + for (j = 0; j < 2; j++) + if (operand_type_check (i.types[j], disp) + && i.op[j].disps->X_op == O_constant) + { + i.types[j].bitfield.disp8 + = fits_in_disp8 (i.op[j].disps->X_add_number); + break; + } + } } /* This is the guts of the machine-dependent assembler. LINE points to a |