diff options
author | Jan Beulich <jbeulich@suse.com> | 2024-07-26 07:59:04 +0200 |
---|---|---|
committer | Jan Beulich <jbeulich@suse.com> | 2024-07-26 07:59:04 +0200 |
commit | 1cd36be7c960ce2f2db0cdaedd57058df9247e05 (patch) | |
tree | 5313836160599e925dc5565612931a1b832ea53e /gas/config | |
parent | b47de6c86f9836cef0caed02b97d81d3f2560ebf (diff) | |
download | fsf-binutils-gdb-1cd36be7c960ce2f2db0cdaedd57058df9247e05.zip fsf-binutils-gdb-1cd36be7c960ce2f2db0cdaedd57058df9247e05.tar.gz fsf-binutils-gdb-1cd36be7c960ce2f2db0cdaedd57058df9247e05.tar.bz2 |
x86/APX: optimize certain {nf}-form insns to BMI2 ones
..., as those leave EFLAGS untouched anyway. That's a shorter encoding,
available as long as no eGPR is in use anywhere.
Diffstat (limited to 'gas/config')
-rw-r--r-- | gas/config/tc-i386.c | 107 |
1 files changed, 107 insertions, 0 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index f7be075..109fb7e 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -5769,6 +5769,113 @@ optimize_nf_encoding (void) i.imm_operands = 0; pp.has_nf = false; } + else if (cpu_arch_isa_flags.bitfield.cpubmi2 + && pp.encoding == encoding_default + && (i.operands > 2 || !i.mem_operands) + && (i.types[i.operands - 1].bitfield.dword + || i.types[i.operands - 1].bitfield.qword)) + { + if (i.tm.base_opcode == 0xd2) + { + /* Optimize: -O: + <OP> one of sal, sar, shl, shr: + {nf} <OP> %cl, %rN -> <OP>x %{e,r}cx, %rN, %rN (N < 16) + {nf} <OP> %cl, ..., %rN -> <OP>x %{e,r}cx, ..., %rN (no eGPR used) + */ + gas_assert (i.tm.extension_opcode & 4); + i.tm.operand_types[0] = i.tm.operand_types[i.operands - 1]; + /* NB: i.op[0].regs specifying %cl is good enough. */ + i.types[0] = i.types[i.operands - 1]; + if (i.operands == 2) + { + i.tm.operand_types[0].bitfield.baseindex = 0; + i.tm.operand_types[2] = i.tm.operand_types[0]; + i.op[2].regs = i.op[1].regs; + i.types[2] = i.types[1]; + i.reg_operands = i.operands = 3; + } + pp.has_nf = false; + i.tm.opcode_modifier.w = 0; + i.tm.opcode_modifier.evex = 0; + i.tm.opcode_modifier.vex = VEX128; + i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC2; + i.tm.opcode_space = SPACE_0F38; + i.tm.base_opcode = 0xf7; + i.tm.opcode_modifier.opcodeprefix + = !(i.tm.extension_opcode & 1) + ? PREFIX_0X66 /* shlx */ + : i.tm.extension_opcode & 2 + ? PREFIX_0XF3 /* sarx */ + : PREFIX_0XF2 /* shrx */; + i.tm.extension_opcode = None; + } + else if (i.tm.base_opcode == 0xc0 + && i.tm.extension_opcode <= 1 + && i.op[0].imms->X_op == O_constant) + { + /* Optimize: -O: + {nf} rol $I, %rN -> rorx $osz-I, %rN, %rN (I != osz-1, N < 16) + {nf} rol $I, ..., %rN -> rorx $osz-I, ..., %rN (I != osz-1, no eGPR used) + {nf} ror $I, %rN -> rorx $I, %rN, %rN (I != 1, N < 16) + {nf} ror $I, ..., %rN -> rorx $I,..., %rN (I != 1, no eGPR used) + NB: rol -> ror transformation for I == osz-1 was already handled above. + NB2: ror with an immediate of 1 uses a different base opcode. + */ + if (i.operands == 2) + { + i.tm.operand_types[2] = i.tm.operand_types[1]; + i.tm.operand_types[2].bitfield.baseindex = 0; + i.op[2].regs = i.op[1].regs; + i.types[2] = i.types[1]; + i.reg_operands = 2; + i.operands = 3; + } + pp.has_nf = false; + i.tm.opcode_modifier.w = 0; + i.tm.opcode_modifier.evex = 0; + i.tm.opcode_modifier.vex = VEX128; + i.tm.opcode_modifier.vexvvvv = 0; + i.tm.opcode_space = SPACE_0F3A; + i.tm.base_opcode = 0xf0; + i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF2; + if (!i.tm.extension_opcode) + i.op[0].imms->X_add_number = + (i.types[i.operands - 1].bitfield.byte + ? 8 : i.types[i.operands - 1].bitfield.word + ? 16 : 64 >> i.types[i.operands - 1].bitfield.dword) + - i.op[0].imms->X_add_number; + i.tm.extension_opcode = None; + } + else if (i.tm.base_opcode == 0xf6 + && i.tm.extension_opcode == 4 + && !i.mem_operands + && i.op[0].regs->reg_num == 2 + && !(i.op[0].regs->reg_flags & RegRex) ) + { + /* Optimize: -O: + {nf} mul %edx -> mulx %eax, %eax, %edx + {nf} mul %rdx -> mulx %rax, %rax, %rdx + */ + i.tm.operand_types[1] = i.tm.operand_types[0]; + i.tm.operand_types[1].bitfield.baseindex = 0; + i.tm.operand_types[2] = i.tm.operand_types[1]; + i.op[2].regs = i.op[0].regs; + /* NB: %eax is good enough also for 64-bit operand size. */ + i.op[1].regs = i.op[0].regs = reg_eax; + i.types[2] = i.types[1] = i.types[0]; + i.reg_operands = i.operands = 3; + + pp.has_nf = false; + i.tm.opcode_modifier.w = 0; + i.tm.opcode_modifier.evex = 0; + i.tm.opcode_modifier.vex = VEX128; + i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1; + i.tm.opcode_space = SPACE_0F38; + i.tm.base_opcode = 0xf6; + i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF2; + i.tm.extension_opcode = None; + } + } } static void |