From 79dec6b7baa243a4ada018a082491990adb6aec1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 1 Jul 2019 08:33:56 +0200 Subject: x86-64: optimize certain commutative VEX-encoded insns When they're in the 0F opcode space, swapping their source operands may allow switching from 3-byte to 2-byte VEX prefix encoding. Note that NaN behavior precludes us doing so for many packed and scalar floating point insns; such an optimization would need to be done by the compiler instead in this case, when it knows that NaN-s have undefined behavior anyway. While for explicitly specified AVX/AVX2 insns the optimization (for now at least) gets done only for -O2 and -Os, it is utilized by default in SSE2AVX mode, as there we're re-writing the programmer's specified insns anyway. Rather than introducing a new attribute flag, the change re-uses one which so far was meaningful only for EVEX-encoded insns. --- gas/config/tc-i386.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'gas/config/tc-i386.c') diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index fe50566..28dd540 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -98,6 +98,9 @@ #define END_OF_INSN '\0' +/* This matches the C -> StaticRounding alias in the opcode table. */ +#define commutative staticrounding + /* 'templates' is for grouping together 'template' structures for opcodes of the same name. This is only used for storing the insns in the grand @@ -3438,6 +3441,43 @@ build_vex_prefix (const insn_template *t) i.tm = t[1]; } + /* Use 2-byte VEX prefix by swapping commutative source operands if there + are no memory operands and at least 3 register ones. */ + if (i.reg_operands >= 3 + && i.vec_encoding != vex_encoding_vex3 + && i.reg_operands == i.operands - i.imm_operands + && i.tm.opcode_modifier.vex + && i.tm.opcode_modifier.commutative + && (i.tm.opcode_modifier.sse2avx || optimize > 1) + && i.rex == REX_B + && i.vex.register_specifier + && !(i.vex.register_specifier->reg_flags & RegRex)) + { + unsigned int xchg = i.operands - i.reg_operands; + union i386_op temp_op; + i386_operand_type temp_type; + + gas_assert (i.tm.opcode_modifier.vexopcode == VEX0F); + gas_assert (!i.tm.opcode_modifier.sae); + gas_assert (operand_type_equal (&i.types[i.operands - 2], + &i.types[i.operands - 3])); + gas_assert (i.rm.mode == 3); + + temp_type = i.types[xchg]; + i.types[xchg] = i.types[xchg + 1]; + i.types[xchg + 1] = temp_type; + temp_op = i.op[xchg]; + i.op[xchg] = i.op[xchg + 1]; + i.op[xchg + 1] = temp_op; + + i.rex = 0; + xchg = i.rm.regmem | 8; + i.rm.regmem = ~register_specifier & 0xf; + gas_assert (!(i.rm.regmem & 8)); + i.vex.register_specifier += xchg - i.rm.regmem; + register_specifier = ~xchg & 0xf; + } + if (i.tm.opcode_modifier.vex == VEXScalar) vector_length = avxscalar; else if (i.tm.opcode_modifier.vex == VEX256) @@ -4157,6 +4197,9 @@ optimize_encoding (void) i.tm.opcode_modifier.vex = i.types[0].bitfield.ymmword ? VEX256 : VEX128; i.tm.opcode_modifier.vexw = VEXW0; + /* VPAND, VPOR, and VPXOR are commutative. */ + if (i.reg_operands == 3 && i.tm.base_opcode != 0x66df) + i.tm.opcode_modifier.commutative = 1; i.tm.opcode_modifier.evex = 0; i.tm.opcode_modifier.masking = 0; i.tm.opcode_modifier.broadcast = 0; -- cgit v1.1