diff options
author | Jan Beulich <jbeulich@suse.com> | 2024-06-21 14:39:52 +0200 |
---|---|---|
committer | Jan Beulich <jbeulich@suse.com> | 2024-06-21 14:39:52 +0200 |
commit | fa2c4239f1ab70b198f6d404f727d0d744221595 (patch) | |
tree | 36037743ae009cc65aab80b416dab9517431104a /gas/config/tc-i386.c | |
parent | 87860ef6f4650d513530af4cb4e3e85a863139cf (diff) | |
download | binutils-fa2c4239f1ab70b198f6d404f727d0d744221595.zip binutils-fa2c4239f1ab70b198f6d404f727d0d744221595.tar.gz binutils-fa2c4239f1ab70b198f6d404f727d0d744221595.tar.bz2 |
x86: optimize left-shift-by-1
These can be replaced by adds when acting on a register operand.
While for the scalar forms there's no gain in encoding size, ADD
generally has higher throughput than SHL. EFLAGS set by ADD are a
superset of those set by SHL (AF in particular is undefined there).
For the SIMD cases the transformation also reduced code size, by
eliminating the 1-byte immediate from the resulting encoding. Note
that this transformation is not applied by gcc13 (according to my
observations), so would - as of now - even improve compiler generated
code.
Diffstat (limited to 'gas/config/tc-i386.c')
-rw-r--r-- | gas/config/tc-i386.c | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index e94b937..ae1ef56 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -4925,6 +4925,43 @@ optimize_encoding (void) */ i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1); } + else if (!optimize_for_space + && i.tm.base_opcode == 0xd0 + && (i.tm.opcode_space == SPACE_BASE + || i.tm.opcode_space == SPACE_EVEXMAP4) + && !i.mem_operands) + { + /* Optimize: -O: + shlb $1, %rN -> addb %rN, %rN + shlw $1, %rN -> addw %rN, %rN + shll $1, %rN -> addl %rN, %rN + shlq $1, %rN -> addq %rN, %rN + + shlb $1, %rN, %rM -> addb %rN, %rN, %rM + shlw $1, %rN, %rM -> addw %rN, %rN, %rM + shll $1, %rN, %rM -> addl %rN, %rN, %rM + shlq $1, %rN, %rM -> addq %rN, %rN, %rM + */ + gas_assert (i.tm.extension_opcode == 4); + i.tm.base_opcode = 0x00; + i.tm.extension_opcode = None; + if (i.operands >= 2) + { + i.tm.operand_types[0] = i.tm.operand_types[1]; + i.op[0].regs = i.op[1].regs; + i.types[0] = i.types[1]; + } + else + { + /* Legacy form with omitted shift count operand. */ + i.tm.operand_types[1] = i.tm.operand_types[0]; + i.op[1].regs = i.op[0].regs; + i.types[1] = i.types[0]; + i.operands = 2; + } + i.reg_operands++; + i.imm_operands = 0; + } else if (i.tm.base_opcode == 0xba && i.tm.opcode_space == SPACE_0F && i.reg_operands == 1 @@ -5181,6 +5218,48 @@ optimize_encoding (void) i.op[1].regs = i.op[0].regs; } } + else if (i.tm.extension_opcode == 6 + && i.tm.base_opcode >= 0x71 + && i.tm.base_opcode <= 0x73 + && i.tm.opcode_space == SPACE_0F + && i.op[0].imms->X_op == O_constant + && i.op[0].imms->X_add_number == 1 + && !i.mem_operands) + { + /* Optimize: -O: + psllw $1, %mmxN -> paddw %mmxN, %mmxN + psllw $1, %xmmN -> paddw %xmmN, %xmmN + vpsllw $1, %xmmN, %xmmM -> vpaddw %xmmN, %xmmN, %xmmM + vpsllw $1, %ymmN, %ymmM -> vpaddw %ymmN, %ymmN, %ymmM + vpsllw $1, %zmmN, %zmmM -> vpaddw %zmmN, %zmmN, %zmmM + + pslld $1, %mmxN -> paddd %mmxN, %mmxN + pslld $1, %xmmN -> paddd %xmmN, %xmmN + vpslld $1, %xmmN, %xmmM -> vpaddd %xmmN, %xmmN, %xmmM + vpslld $1, %ymmN, %ymmM -> vpaddd %ymmN, %ymmN, %ymmM + vpslld $1, %zmmN, %zmmM -> vpaddd %zmmN, %zmmN, %zmmM + + psllq $1, %xmmN -> paddq %xmmN, %xmmN + vpsllq $1, %xmmN, %xmmM -> vpaddq %xmmN, %xmmN, %xmmM + vpsllq $1, %ymmN, %ymmM -> vpaddq %ymmN, %ymmN, %ymmM + vpsllq $1, %zmmN, %zmmM -> vpaddq %zmmN, %zmmN, %zmmM + */ + if (i.tm.base_opcode != 0x73) + i.tm.base_opcode |= 0xfc; /* {,v}padd{w,d} */ + else + { + gas_assert (i.tm.operand_types[1].bitfield.class != RegMMX); + i.tm.base_opcode = 0xd4; /* {,v}paddq */ + } + i.tm.extension_opcode = None; + if (i.tm.opcode_modifier.vexvvvv) + i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1; + i.tm.operand_types[0] = i.tm.operand_types[1]; + i.op[0].regs = i.op[1].regs; + i.types[0] = i.types[1]; + i.reg_operands++; + i.imm_operands = 0; + } else if (optimize_for_space && i.tm.base_opcode == 0x59 && i.tm.opcode_space == SPACE_0F38 |