aboutsummaryrefslogtreecommitdiff
path: root/gas/config
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@suse.com>2021-04-26 10:37:30 +0200
committerJan Beulich <jbeulich@suse.com>2021-04-26 10:37:30 +0200
commitfe134c656991cda74ab7bad80947930f666f8907 (patch)
treeeca41b24b1d7416e13375c9dc9685b1ebf5b5a4e /gas/config
parentb818855549013ba43e730e07f790e5cbfd16a757 (diff)
downloadgdb-fe134c656991cda74ab7bad80947930f666f8907.zip
gdb-fe134c656991cda74ab7bad80947930f666f8907.tar.gz
gdb-fe134c656991cda74ab7bad80947930f666f8907.tar.bz2
x86: optimize LEA
Over the years I've seen a number of instances where people used lea (%reg1), %reg2 or lea symbol, %reg despite the same thing being expressable via MOV. Since additionally LEA often has restrictions towards the ports it can be issued to, while MOV typically gets dealt with simply by register renaming, transform to MOV when possible (without growing opcode size and without altering involved relocation types). Note that for Mach-O the new 64-bit testcases would fail (for BFD_RELOC_X86_64_32S not having a representation), and hence get skipped there.
Diffstat (limited to 'gas/config')
-rw-r--r--gas/config/tc-i386.c142
1 files changed, 141 insertions, 1 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 1f9844d..116b9db 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -3562,7 +3562,8 @@ want_disp32 (const insn_template *t)
|| i.prefix[ADDR_PREFIX]
|| (t->base_opcode == 0x8d
&& t->opcode_modifier.opcodespace == SPACE_BASE
- && !i.types[1].bitfield.qword);
+ && (!i.types[1].bitfield.qword
+ || t->opcode_modifier.size == SIZE32));
}
static int
@@ -4069,6 +4070,145 @@ optimize_encoding (void)
{
unsigned int j;
+ if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
+ && i.tm.base_opcode == 0x8d)
+ {
+ /* Optimize: -O:
+ lea symbol, %rN -> mov $symbol, %rN
+ lea (%rM), %rN -> mov %rM, %rN
+ lea (,%rM,1), %rN -> mov %rM, %rN
+
+ and in 32-bit mode for 16-bit addressing
+
+ lea (%rM), %rN -> movzx %rM, %rN
+
+ and in 64-bit mode zap 32-bit addressing in favor of using a
+ 32-bit (or less) destination.
+ */
+ if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
+ {
+ if (!i.op[1].regs->reg_type.bitfield.word)
+ i.tm.opcode_modifier.size = SIZE32;
+ i.prefix[ADDR_PREFIX] = 0;
+ }
+
+ if (!i.index_reg && !i.base_reg)
+ {
+ /* Handle:
+ lea symbol, %rN -> mov $symbol, %rN
+ */
+ if (flag_code == CODE_64BIT)
+ {
+ /* Don't transform a relocation to a 16-bit one. */
+ if (i.op[0].disps
+ && i.op[0].disps->X_op != O_constant
+ && i.op[1].regs->reg_type.bitfield.word)
+ return;
+
+ if (!i.op[1].regs->reg_type.bitfield.qword
+ || i.tm.opcode_modifier.size == SIZE32)
+ {
+ i.tm.base_opcode = 0xb8;
+ i.tm.opcode_modifier.modrm = 0;
+ if (!i.op[1].regs->reg_type.bitfield.word)
+ i.types[0].bitfield.imm32 = 1;
+ else
+ {
+ i.tm.opcode_modifier.size = SIZE16;
+ i.types[0].bitfield.imm16 = 1;
+ }
+ }
+ else
+ {
+ /* Subject to further optimization below. */
+ i.tm.base_opcode = 0xc7;
+ i.tm.extension_opcode = 0;
+ i.types[0].bitfield.imm32s = 1;
+ i.types[0].bitfield.baseindex = 0;
+ }
+ }
+ /* Outside of 64-bit mode address and operand sizes have to match if
+ a relocation is involved, as otherwise we wouldn't (currently) or
+ even couldn't express the relocation correctly. */
+ else if (i.op[0].disps
+ && i.op[0].disps->X_op != O_constant
+ && ((!i.prefix[ADDR_PREFIX])
+ != (flag_code == CODE_32BIT
+ ? i.op[1].regs->reg_type.bitfield.dword
+ : i.op[1].regs->reg_type.bitfield.word)))
+ return;
+ else
+ {
+ i.tm.base_opcode = 0xb8;
+ i.tm.opcode_modifier.modrm = 0;
+ if (i.op[1].regs->reg_type.bitfield.dword)
+ i.types[0].bitfield.imm32 = 1;
+ else
+ i.types[0].bitfield.imm16 = 1;
+
+ if (i.op[0].disps
+ && i.op[0].disps->X_op == O_constant
+ && i.op[1].regs->reg_type.bitfield.dword
+ && !i.prefix[ADDR_PREFIX] != (flag_code == CODE_32BIT))
+ i.op[0].disps->X_add_number &= 0xffff;
+ }
+
+ i.tm.operand_types[0] = i.types[0];
+ i.imm_operands = 1;
+ if (!i.op[0].imms)
+ {
+ i.op[0].imms = &im_expressions[0];
+ i.op[0].imms->X_op = O_absent;
+ }
+ }
+ else if (i.op[0].disps
+ && (i.op[0].disps->X_op != O_constant
+ || i.op[0].disps->X_add_number))
+ return;
+ else
+ {
+ /* Handle:
+ lea (%rM), %rN -> mov %rM, %rN
+ lea (,%rM,1), %rN -> mov %rM, %rN
+ lea (%rM), %rN -> movzx %rM, %rN
+ */
+ const reg_entry *addr_reg;
+
+ if (!i.index_reg && i.base_reg->reg_num != RegIP)
+ addr_reg = i.base_reg;
+ else if (!i.base_reg
+ && i.index_reg->reg_num != RegIZ
+ && !i.log2_scale_factor)
+ addr_reg = i.index_reg;
+ else
+ return;
+
+ if (addr_reg->reg_type.bitfield.word
+ && i.op[1].regs->reg_type.bitfield.dword)
+ {
+ if (flag_code != CODE_32BIT)
+ return;
+ i.tm.opcode_modifier.opcodespace = SPACE_0F;
+ i.tm.base_opcode = 0xb7;
+ }
+ else
+ i.tm.base_opcode = 0x8b;
+
+ if (addr_reg->reg_type.bitfield.dword
+ && i.op[1].regs->reg_type.bitfield.qword)
+ i.tm.opcode_modifier.size = SIZE32;
+
+ i.op[0].regs = addr_reg;
+ i.reg_operands = 2;
+ }
+
+ i.mem_operands = 0;
+ i.disp_operands = 0;
+ i.prefix[ADDR_PREFIX] = 0;
+ i.prefix[SEG_PREFIX] = 0;
+ i.seg[0] = NULL;
+ }
+
if (optimize_for_space
&& i.tm.opcode_modifier.opcodespace == SPACE_BASE
&& i.reg_operands == 1