diff options
author | Jan Beulich <jbeulich@suse.com> | 2019-07-01 08:33:56 +0200 |
---|---|---|
committer | Jan Beulich <jbeulich@suse.com> | 2019-07-01 08:33:56 +0200 |
commit | 79dec6b7baa243a4ada018a082491990adb6aec1 (patch) | |
tree | e77adf4f441561605525a616766898d5280192cb /gas/config/tc-i386.c | |
parent | a80195f1224aced64392b452d55980b6bbd2a8b9 (diff) | |
download | gdb-79dec6b7baa243a4ada018a082491990adb6aec1.zip gdb-79dec6b7baa243a4ada018a082491990adb6aec1.tar.gz gdb-79dec6b7baa243a4ada018a082491990adb6aec1.tar.bz2 |
x86-64: optimize certain commutative VEX-encoded insns
When they're in the 0F opcode space, swapping their source operands may
allow switching from 3-byte to 2-byte VEX prefix encoding. Note that NaN
behavior precludes us doing so for many packed and scalar floating point
insns; such an optimization would need to be done by the compiler
instead in this case, when it knows that NaN-s have undefined behavior
anyway.
While for explicitly specified AVX/AVX2 insns the optimization (for now
at least) gets done only for -O2 and -Os, it is utilized by default in
SSE2AVX mode, as there we're re-writing the programmer's specified insns
anyway.
Rather than introducing a new attribute flag, the change re-uses one
which so far was meaningful only for EVEX-encoded insns.
Diffstat (limited to 'gas/config/tc-i386.c')
-rw-r--r-- | gas/config/tc-i386.c | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index fe50566..28dd540 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -98,6 +98,9 @@ #define END_OF_INSN '\0' +/* This matches the C -> StaticRounding alias in the opcode table. */ +#define commutative staticrounding + /* 'templates' is for grouping together 'template' structures for opcodes of the same name. This is only used for storing the insns in the grand @@ -3438,6 +3441,43 @@ build_vex_prefix (const insn_template *t) i.tm = t[1]; } + /* Use 2-byte VEX prefix by swapping commutative source operands if there + are no memory operands and at least 3 register ones. */ + if (i.reg_operands >= 3 + && i.vec_encoding != vex_encoding_vex3 + && i.reg_operands == i.operands - i.imm_operands + && i.tm.opcode_modifier.vex + && i.tm.opcode_modifier.commutative + && (i.tm.opcode_modifier.sse2avx || optimize > 1) + && i.rex == REX_B + && i.vex.register_specifier + && !(i.vex.register_specifier->reg_flags & RegRex)) + { + unsigned int xchg = i.operands - i.reg_operands; + union i386_op temp_op; + i386_operand_type temp_type; + + gas_assert (i.tm.opcode_modifier.vexopcode == VEX0F); + gas_assert (!i.tm.opcode_modifier.sae); + gas_assert (operand_type_equal (&i.types[i.operands - 2], + &i.types[i.operands - 3])); + gas_assert (i.rm.mode == 3); + + temp_type = i.types[xchg]; + i.types[xchg] = i.types[xchg + 1]; + i.types[xchg + 1] = temp_type; + temp_op = i.op[xchg]; + i.op[xchg] = i.op[xchg + 1]; + i.op[xchg + 1] = temp_op; + + i.rex = 0; + xchg = i.rm.regmem | 8; + i.rm.regmem = ~register_specifier & 0xf; + gas_assert (!(i.rm.regmem & 8)); + i.vex.register_specifier += xchg - i.rm.regmem; + register_specifier = ~xchg & 0xf; + } + if (i.tm.opcode_modifier.vex == VEXScalar) vector_length = avxscalar; else if (i.tm.opcode_modifier.vex == VEX256) @@ -4157,6 +4197,9 @@ optimize_encoding (void) i.tm.opcode_modifier.vex = i.types[0].bitfield.ymmword ? VEX256 : VEX128; i.tm.opcode_modifier.vexw = VEXW0; + /* VPAND, VPOR, and VPXOR are commutative. */ + if (i.reg_operands == 3 && i.tm.base_opcode != 0x66df) + i.tm.opcode_modifier.commutative = 1; i.tm.opcode_modifier.evex = 0; i.tm.opcode_modifier.masking = 0; i.tm.opcode_modifier.broadcast = 0; |