aboutsummaryrefslogtreecommitdiff
path: root/gas/config/tc-i386.c
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2019-03-18 08:56:10 +0800
committerH.J. Lu <hjl.tools@gmail.com>2019-03-18 08:58:19 +0800
commit97ed31ae00ea83410f9daf61ece8a606044af365 (patch)
tree9afcfdc297efe11f38e852d4c2509e6039a23390 /gas/config/tc-i386.c
parent7bc0961cfec1f138a3127e8f210909aa430c425f (diff)
downloadfsf-binutils-gdb-97ed31ae00ea83410f9daf61ece8a606044af365.zip
fsf-binutils-gdb-97ed31ae00ea83410f9daf61ece8a606044af365.tar.gz
fsf-binutils-gdb-97ed31ae00ea83410f9daf61ece8a606044af365.tar.bz2
x86: Optimize EVEX vector load/store instructions
When there is no write mask, we can encode lower 16 128-bit/256-bit EVEX vector register load and store instructions as VEX vector register load and store instructions with -O1. gas/ PR gas/24348 * config/tc-i386.c (optimize_encoding): Encode 128-bit and 256-bit EVEX vector register load/store instructions as VEX vector register load/store instructions for -O1. * doc/c-i386.texi: Update -O1 documentation. * testsuite/gas/i386/i386.exp: Run PR gas/24348 tests. * testsuite/gas/i386/optimize-1.s: Add tests for EVEX vector load/store instructions. * testsuite/gas/i386/optimize-2.s: Likewise. * testsuite/gas/i386/optimize-3.s: Likewise. * testsuite/gas/i386/optimize-5.s: Likewise. * testsuite/gas/i386/x86-64-optimize-2.s: Likewise. * testsuite/gas/i386/x86-64-optimize-3.s: Likewise. * testsuite/gas/i386/x86-64-optimize-4.s: Likewise. * testsuite/gas/i386/x86-64-optimize-5.s: Likewise. * testsuite/gas/i386/x86-64-optimize-6.s: Likewise. * testsuite/gas/i386/optimize-1.d: Updated. * testsuite/gas/i386/optimize-2.d: Likewise. * testsuite/gas/i386/optimize-3.d: Likewise. * testsuite/gas/i386/optimize-4.d: Likewise. * testsuite/gas/i386/optimize-5.d: Likewise. * testsuite/gas/i386/x86-64-optimize-2.d: Likewise. * testsuite/gas/i386/x86-64-optimize-3.d: Likewise. * testsuite/gas/i386/x86-64-optimize-4.d: Likewise. * testsuite/gas/i386/x86-64-optimize-5.d: Likewise. * testsuite/gas/i386/x86-64-optimize-6.d: Likewise. * testsuite/gas/i386/optimize-7.d: New file. * testsuite/gas/i386/optimize-7.s: Likewise. * testsuite/gas/i386/x86-64-optimize-8.d: Likewise. * testsuite/gas/i386/x86-64-optimize-8.s: Likewise. opcodes/ PR gas/24348 * i386-opc.tbl: Add Optimize to vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16, vmovdqu32 and vmovdqu64. * i386-tbl.h: Regenerated.
Diffstat (limited to 'gas/config/tc-i386.c')
-rw-r--r--gas/config/tc-i386.c50
1 files changed, 50 insertions, 0 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 856c18d..fa06075 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -4075,6 +4075,56 @@ optimize_encoding (void)
i.types[j].bitfield.ymmword = 0;
}
}
+ else if ((cpu_arch_flags.bitfield.cpuavx
+ || cpu_arch_isa_flags.bitfield.cpuavx)
+ && i.vec_encoding != vex_encoding_evex
+ && !i.types[0].bitfield.zmmword
+ && !i.mask
+ && is_evex_encoding (&i.tm)
+ && (i.tm.base_opcode == 0x666f
+ || (i.tm.base_opcode ^ Opcode_SIMD_IntD) == 0x666f
+ || i.tm.base_opcode == 0xf36f
+ || (i.tm.base_opcode ^ Opcode_SIMD_IntD) == 0xf36f
+ || i.tm.base_opcode == 0xf26f
+ || (i.tm.base_opcode ^ Opcode_SIMD_IntD) == 0xf26f)
+ && i.tm.extension_opcode == None)
+ {
+ /* Optimize: -O1:
+ VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
+ vmovdqu32 and vmovdqu64:
+ EVEX VOP %xmmM, %xmmN
+ -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
+ EVEX VOP %ymmM, %ymmN
+ -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
+ EVEX VOP %xmmM, mem
+ -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
+ EVEX VOP %ymmM, mem
+ -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
+ EVEX VOP mem, %xmmN
+ -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
+ EVEX VOP mem, %ymmN
+ -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
+ */
+ if (i.tm.base_opcode == 0xf26f)
+ i.tm.base_opcode = 0xf36f;
+ else if ((i.tm.base_opcode ^ Opcode_SIMD_IntD) == 0xf26f)
+ i.tm.base_opcode = 0xf36f ^ Opcode_SIMD_IntD;
+ i.tm.opcode_modifier.vex
+ = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
+ i.tm.opcode_modifier.vexw = VEXW0;
+ i.tm.opcode_modifier.evex = 0;
+ i.tm.opcode_modifier.masking = 0;
+ i.tm.opcode_modifier.disp8memshift = 0;
+ i.memshift = 0;
+ for (j = 0; j < 2; j++)
+ if (operand_type_check (i.types[j], disp)
+ && i.op[j].disps->X_op == O_constant)
+ {
+ i.types[j].bitfield.disp8
+ = fits_in_disp8 (i.op[j].disps->X_add_number);
+ break;
+ }
+ }
}
/* This is the guts of the machine-dependent assembler. LINE points to a