aboutsummaryrefslogtreecommitdiff
path: root/gas/config
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@suse.com>2024-07-26 07:59:04 +0200
committerJan Beulich <jbeulich@suse.com>2024-07-26 07:59:04 +0200
commit1cd36be7c960ce2f2db0cdaedd57058df9247e05 (patch)
tree5313836160599e925dc5565612931a1b832ea53e /gas/config
parentb47de6c86f9836cef0caed02b97d81d3f2560ebf (diff)
downloadfsf-binutils-gdb-1cd36be7c960ce2f2db0cdaedd57058df9247e05.zip
fsf-binutils-gdb-1cd36be7c960ce2f2db0cdaedd57058df9247e05.tar.gz
fsf-binutils-gdb-1cd36be7c960ce2f2db0cdaedd57058df9247e05.tar.bz2
x86/APX: optimize certain {nf}-form insns to BMI2 ones
..., as those leave EFLAGS untouched anyway. That's a shorter encoding, available as long as no eGPR is in use anywhere.
Diffstat (limited to 'gas/config')
-rw-r--r--gas/config/tc-i386.c107
1 files changed, 107 insertions, 0 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index f7be075..109fb7e 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -5769,6 +5769,113 @@ optimize_nf_encoding (void)
i.imm_operands = 0;
pp.has_nf = false;
}
+ else if (cpu_arch_isa_flags.bitfield.cpubmi2
+ && pp.encoding == encoding_default
+ && (i.operands > 2 || !i.mem_operands)
+ && (i.types[i.operands - 1].bitfield.dword
+ || i.types[i.operands - 1].bitfield.qword))
+ {
+ if (i.tm.base_opcode == 0xd2)
+ {
+ /* Optimize: -O:
+ <OP> one of sal, sar, shl, shr:
+ {nf} <OP> %cl, %rN -> <OP>x %{e,r}cx, %rN, %rN (N < 16)
+ {nf} <OP> %cl, ..., %rN -> <OP>x %{e,r}cx, ..., %rN (no eGPR used)
+ */
+ gas_assert (i.tm.extension_opcode & 4);
+ i.tm.operand_types[0] = i.tm.operand_types[i.operands - 1];
+ /* NB: i.op[0].regs specifying %cl is good enough. */
+ i.types[0] = i.types[i.operands - 1];
+ if (i.operands == 2)
+ {
+ i.tm.operand_types[0].bitfield.baseindex = 0;
+ i.tm.operand_types[2] = i.tm.operand_types[0];
+ i.op[2].regs = i.op[1].regs;
+ i.types[2] = i.types[1];
+ i.reg_operands = i.operands = 3;
+ }
+ pp.has_nf = false;
+ i.tm.opcode_modifier.w = 0;
+ i.tm.opcode_modifier.evex = 0;
+ i.tm.opcode_modifier.vex = VEX128;
+ i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC2;
+ i.tm.opcode_space = SPACE_0F38;
+ i.tm.base_opcode = 0xf7;
+ i.tm.opcode_modifier.opcodeprefix
+ = !(i.tm.extension_opcode & 1)
+ ? PREFIX_0X66 /* shlx */
+ : i.tm.extension_opcode & 2
+ ? PREFIX_0XF3 /* sarx */
+ : PREFIX_0XF2 /* shrx */;
+ i.tm.extension_opcode = None;
+ }
+ else if (i.tm.base_opcode == 0xc0
+ && i.tm.extension_opcode <= 1
+ && i.op[0].imms->X_op == O_constant)
+ {
+ /* Optimize: -O:
+ {nf} rol $I, %rN -> rorx $osz-I, %rN, %rN (I != osz-1, N < 16)
+ {nf} rol $I, ..., %rN -> rorx $osz-I, ..., %rN (I != osz-1, no eGPR used)
+ {nf} ror $I, %rN -> rorx $I, %rN, %rN (I != 1, N < 16)
+ {nf} ror $I, ..., %rN -> rorx $I,..., %rN (I != 1, no eGPR used)
+ NB: rol -> ror transformation for I == osz-1 was already handled above.
+ NB2: ror with an immediate of 1 uses a different base opcode.
+ */
+ if (i.operands == 2)
+ {
+ i.tm.operand_types[2] = i.tm.operand_types[1];
+ i.tm.operand_types[2].bitfield.baseindex = 0;
+ i.op[2].regs = i.op[1].regs;
+ i.types[2] = i.types[1];
+ i.reg_operands = 2;
+ i.operands = 3;
+ }
+ pp.has_nf = false;
+ i.tm.opcode_modifier.w = 0;
+ i.tm.opcode_modifier.evex = 0;
+ i.tm.opcode_modifier.vex = VEX128;
+ i.tm.opcode_modifier.vexvvvv = 0;
+ i.tm.opcode_space = SPACE_0F3A;
+ i.tm.base_opcode = 0xf0;
+ i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF2;
+ if (!i.tm.extension_opcode)
+ i.op[0].imms->X_add_number =
+ (i.types[i.operands - 1].bitfield.byte
+ ? 8 : i.types[i.operands - 1].bitfield.word
+ ? 16 : 64 >> i.types[i.operands - 1].bitfield.dword)
+ - i.op[0].imms->X_add_number;
+ i.tm.extension_opcode = None;
+ }
+ else if (i.tm.base_opcode == 0xf6
+ && i.tm.extension_opcode == 4
+ && !i.mem_operands
+ && i.op[0].regs->reg_num == 2
+ && !(i.op[0].regs->reg_flags & RegRex) )
+ {
+ /* Optimize: -O:
+ {nf} mul %edx -> mulx %eax, %eax, %edx
+ {nf} mul %rdx -> mulx %rax, %rax, %rdx
+ */
+ i.tm.operand_types[1] = i.tm.operand_types[0];
+ i.tm.operand_types[1].bitfield.baseindex = 0;
+ i.tm.operand_types[2] = i.tm.operand_types[1];
+ i.op[2].regs = i.op[0].regs;
+ /* NB: %eax is good enough also for 64-bit operand size. */
+ i.op[1].regs = i.op[0].regs = reg_eax;
+ i.types[2] = i.types[1] = i.types[0];
+ i.reg_operands = i.operands = 3;
+
+ pp.has_nf = false;
+ i.tm.opcode_modifier.w = 0;
+ i.tm.opcode_modifier.evex = 0;
+ i.tm.opcode_modifier.vex = VEX128;
+ i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
+ i.tm.opcode_space = SPACE_0F38;
+ i.tm.base_opcode = 0xf6;
+ i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF2;
+ i.tm.extension_opcode = None;
+ }
+ }
}
static void