From 82e06fa803fd00c0640dd4b2078f829f832847fd Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 28 Jun 2024 08:22:39 +0200 Subject: x86/APX: optimize {nf}-form IMUL-by-power-of-2 to SHL ..., for differing only in the resulting EFLAGS, which are left untouched anyway. That's a shorter encoding, available as long as certain constraints on operands are met; see code comments. (SHL-by-1 forms may then be subject to further optimization that was introduced earlier.) Note that kind of as a side effect this also converts multiplication by 1 to shift by 0, which is a plain move or even no-op anyway. That could be further shrunk (as could be presence of shifts/rotates by 0 in the original code as well as a fair set of other {nf}-form insns), yet the expectation (for now) is that people won't write such code in the first place. --- gas/config/tc-i386.c | 70 +++++++++++++++++++++ .../gas/i386/x86-64-apx-nf-optimize-size.d | 73 ++++++++++++++++++++++ gas/testsuite/gas/i386/x86-64-apx-nf-optimize.d | 73 ++++++++++++++++++++++ gas/testsuite/gas/i386/x86-64-apx-nf.s | 36 +++++++++++ opcodes/i386-opc.tbl | 12 ++-- opcodes/i386-tbl.h | 12 ++-- 6 files changed, 264 insertions(+), 12 deletions(-) diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 9452b3c..7ad0267 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -5452,6 +5452,75 @@ optimize_nf_encoding (void) i.tm.operand_types[0].bitfield.imm1 = 1; i.imm_operands = 0; } + else if ((i.tm.base_opcode | 2) == 0x6b + && i.op[0].imms->X_op == O_constant + && (i.op[0].imms->X_add_number > 0 + ? !(i.op[0].imms->X_add_number & (i.op[0].imms->X_add_number - 1)) + /* optimize_imm() converts to sign-extended representation where + possible (and input can also come with these specific numbers). */ + : (i.types[i.operands - 1].bitfield.word + && i.op[0].imms->X_add_number == -0x8000) + || (i.types[i.operands - 1].bitfield.dword + && i.op[0].imms->X_add_number + 1 == -0x7fffffff)) + /* 16-bit 3-operand non-ZU forms need leaviong alone, to prevent + zero-extension of the result. Unless, of course, both non- + immediate operands match (which can be converted to the non-NDD + form). */ + && (i.operands < 3 + || !i.types[2].bitfield.word + || i.tm.mnem_off == MN_imulzu + || i.op[2].regs == i.op[1].regs) + /* When merely optimizing for size, exclude cases where we'd convert + from Imm8S to Imm8 encoding, thus not actually reducing size. */ + && (!optimize_for_space + || i.tm.base_opcode == 0x69 + || !(i.op[0].imms->X_add_number & 0x7d))) + { + /* Optimize: -O: + {nf} imul $1< {nf} shl $N, ... + {nf} imulzu $1< {nf} shl $N, ... + */ + if (i.op[0].imms->X_add_number != 2) + { + i.tm.base_opcode = 0xc0; + i.op[0].imms->X_add_number = ffs (i.op[0].imms->X_add_number) - 1; + i.tm.operand_types[0].bitfield.imm8 = 1; + i.tm.operand_types[0].bitfield.imm16 = 0; + i.tm.operand_types[0].bitfield.imm32 = 0; + i.tm.operand_types[0].bitfield.imm32s = 0; + } + else + { + i.tm.base_opcode = 0xd0; + i.tm.operand_types[0].bitfield.imm1 = 1; + } + i.types[0] = i.tm.operand_types[0]; + i.tm.extension_opcode = 4; + i.tm.opcode_modifier.w = 1; + i.tm.opcode_modifier.operandconstraint = 0; + if (i.operands == 3) + { + if (i.op[2].regs == i.op[1].regs && i.tm.mnem_off != MN_imulzu) + { + /* Convert to non-NDD form. This is required for 16-bit insns + (to prevent zero-extension) and benign for others. */ + i.operands = 2; + i.reg_operands = 1; + } + else + i.tm.opcode_modifier.vexvvvv = VexVVVV_DST; + } + else if (i.tm.mnem_off == MN_imulzu) + { + /* Convert to NDD form, to effect zero-extension of the result. */ + i.tm.opcode_modifier.vexvvvv = VexVVVV_DST; + i.operands = 3; + i.reg_operands = 2; + i.op[2].regs = i.op[1].regs; + i.tm.operand_types[2] = i.tm.operand_types[1]; + i.types[2] = i.types[1]; + } + } if (optimize_for_space && i.encoding != encoding_evex @@ -5598,6 +5667,7 @@ optimize_nf_encoding (void) else if (i.tm.base_opcode == 0x6b && !i.mem_operands && i.encoding != encoding_evex + && i.tm.mnem_off != MN_imulzu && is_plausible_suffix (1) /* %rsp can't be the index. */ && is_index (i.op[1].regs) diff --git a/gas/testsuite/gas/i386/x86-64-apx-nf-optimize-size.d b/gas/testsuite/gas/i386/x86-64-apx-nf-optimize-size.d index 412292d..1950508 100644 --- a/gas/testsuite/gas/i386/x86-64-apx-nf-optimize-size.d +++ b/gas/testsuite/gas/i386/x86-64-apx-nf-optimize-size.d @@ -1522,14 +1522,87 @@ Disassembly of section \.text: [ ]*[a-f0-9]+:[ ]*62 e4 7d 0c 6b c5 03[ ]+\{nf\} imul \$0x3,%bp,%r16w [ ]*[a-f0-9]+:[ ]*62 fc 7d 0c 6b d5 05[ ]+\{nf\} imul \$0x5,%r21w,%dx [ ]*[a-f0-9]+:[ ]*62 ec 7d 0c 6b ed 09[ ]+\{nf\} imul \$0x9,%r21w,%r21w +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b ca 02 \{nf\} imul \$0x2,%dx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b 0a 02 \{nf\} imul \$0x2,\(%rdx\),%cx +[ ]*[a-f0-9]+:[ ]*66 8d 0c 09[ ]+lea \(%rcx,%rcx,1\),%cx +[ ]*[a-f0-9]+:[ ]*66 8d 0c 09[ ]+lea \(%rcx,%rcx,1\),%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c d1 e2 \{nf\} shl \$1,%dx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c d1 22 \{nf\} shl \$1,\(%rdx\),%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c d1 e1 \{nf\} shl \$1,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c d1 e1 \{nf\} shl \$1,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b ca 04 \{nf\} imul \$0x4,%dx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b 0a 04 \{nf\} imul \$0x4,\(%rdx\),%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b c9 04 \{nf\} imul \$0x4,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b c9 04 \{nf\} imul \$0x4,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c 6b ca 04 \{nf\} imulzu \$0x4,%dx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c 6b 0a 04 \{nf\} imulzu \$0x4,\(%rdx\),%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c 6b c9 04 \{nf\} imulzu \$0x4,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c 6b c9 04 \{nf\} imulzu \$0x4,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b ca 40 \{nf\} imul \$0x40,%dx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b 0a 40 \{nf\} imul \$0x40,\(%rdx\),%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b c9 40 \{nf\} imul \$0x40,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b c9 40 \{nf\} imul \$0x40,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c 6b ca 40 \{nf\} imulzu \$0x40,%dx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c 6b 0a 40 \{nf\} imulzu \$0x40,\(%rdx\),%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c 6b c9 40 \{nf\} imulzu \$0x40,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c 6b c9 40 \{nf\} imulzu \$0x40,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 69 ca 80 00 \{nf\} imul \$0x80,%dx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 69 0a 80 00 \{nf\} imul \$0x80,\(%rdx\),%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 07 \{nf\} shl \$0x7,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 07 \{nf\} shl \$0x7,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e2 07 \{nf\} shl \$0x7,%dx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 22 07 \{nf\} shl \$0x7,\(%rdx\),%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 07 \{nf\} shl \$0x7,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 07 \{nf\} shl \$0x7,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 69 ca 00 80 \{nf\} imul \$0x8000,%dx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 69 0a 00 80 \{nf\} imul \$0x8000,\(%rdx\),%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 0f \{nf\} shl \$0xf,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 0f \{nf\} shl \$0xf,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 0f \{nf\} shl \$0xf,%cx,%cx [ ]*[a-f0-9]+:[ ]*8d 14 49[ ]+lea \(%rcx,%rcx,2\),%edx [ ]*[a-f0-9]+:[ ]*8d 54 ad 00[ ]+lea 0x0\(%rbp,%rbp,4\),%edx [ ]*[a-f0-9]+:[ ]*8d 2c c9[ ]+lea \(%rcx,%rcx,8\),%ebp [ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b d4 03[ ]+\{nf\} imul \$0x3,%esp,%edx [ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b e4 05[ ]+\{nf\} imul \$0x5,%esp,%esp +[ ]*[a-f0-9]+:[ ]*8d 0c 12[ ]+lea \(%rdx,%rdx,1\),%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 74 1c d1 22 \{nf\} shl \$1,\(%rdx\),%ecx +[ ]*[a-f0-9]+:[ ]*8d 0c 09[ ]+lea \(%rcx,%rcx,1\),%ecx +[ ]*[a-f0-9]+:[ ]*8d 0c 09[ ]+lea \(%rcx,%rcx,1\),%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b ca 04 \{nf\} imul \$0x4,%edx,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b 0a 04 \{nf\} imul \$0x4,\(%rdx\),%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b c9 04 \{nf\} imul \$0x4,%ecx,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b c9 04 \{nf\} imul \$0x4,%ecx,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b ca 40 \{nf\} imul \$0x40,%edx,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b 0a 40 \{nf\} imul \$0x40,\(%rdx\),%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b c9 40 \{nf\} imul \$0x40,%ecx,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b c9 40 \{nf\} imul \$0x40,%ecx,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 e2 07 \{nf\} shl \$0x7,%edx,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 22 07 \{nf\} shl \$0x7,\(%rdx\),%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c c1 e1 07 \{nf\} shl \$0x7,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c c1 e1 07 \{nf\} shl \$0x7,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 e2 1f \{nf\} shl \$0x1f,%edx,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 22 1f \{nf\} shl \$0x1f,\(%rdx\),%ecx [ ]*[a-f0-9]+:[ ]*48 8d 14 49[ ]+lea \(%rcx,%rcx,2\),%rdx [ ]*[a-f0-9]+:[ ]*48 8d 54 ad 00[ ]+lea 0x0\(%rbp,%rbp,4\),%rdx [ ]*[a-f0-9]+:[ ]*48 8d 2c c9[ ]+lea \(%rcx,%rcx,8\),%rbp [ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b d4 03[ ]+\{nf\} imul \$0x3,%rsp,%rdx [ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b e4 05[ ]+\{nf\} imul \$0x5,%rsp,%rsp +[ ]*[a-f0-9]+:[ ]*48 8d 0c 12[ ]+lea \(%rdx,%rdx,1\),%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c d1 22 \{nf\} shl \$1,\(%rdx\),%rcx +[ ]*[a-f0-9]+:[ ]*48 8d 0c 09[ ]+lea \(%rcx,%rcx,1\),%rcx +[ ]*[a-f0-9]+:[ ]*48 8d 0c 09[ ]+lea \(%rcx,%rcx,1\),%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b ca 04 \{nf\} imul \$0x4,%rdx,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b 0a 04 \{nf\} imul \$0x4,\(%rdx\),%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b c9 04 \{nf\} imul \$0x4,%rcx,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b c9 04 \{nf\} imul \$0x4,%rcx,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b ca 40 \{nf\} imul \$0x40,%rdx,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b 0a 40 \{nf\} imul \$0x40,\(%rdx\),%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b c9 40 \{nf\} imul \$0x40,%rcx,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b c9 40 \{nf\} imul \$0x40,%rcx,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 e2 07 \{nf\} shl \$0x7,%rdx,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 22 07 \{nf\} shl \$0x7,\(%rdx\),%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c c1 e1 07 \{nf\} shl \$0x7,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c c1 e1 07 \{nf\} shl \$0x7,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 e2 1e \{nf\} shl \$0x1e,%rdx,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 69 ca 00 00 00 80 \{nf\} imul \$0xffffffff80000000,%rdx,%rcx #pass diff --git a/gas/testsuite/gas/i386/x86-64-apx-nf-optimize.d b/gas/testsuite/gas/i386/x86-64-apx-nf-optimize.d index ba50ef1..2356c64 100644 --- a/gas/testsuite/gas/i386/x86-64-apx-nf-optimize.d +++ b/gas/testsuite/gas/i386/x86-64-apx-nf-optimize.d @@ -1522,14 +1522,87 @@ Disassembly of section \.text: [ ]*[a-f0-9]+:[ ]*66 d5 40 8d 44 6d 00[ ]+lea 0x0\(%rbp,%rbp,2\),%r16w [ ]*[a-f0-9]+:[ ]*66 d5 30 8d 54 ad 00[ ]+lea 0x0\(%r21,%r21,4\),%dx [ ]*[a-f0-9]+:[ ]*66 d5 70 8d 6c ed 00[ ]+lea 0x0\(%r21,%r21,8\),%r21w +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b ca 02 \{nf\} imul \$0x2,%dx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b 0a 02 \{nf\} imul \$0x2,\(%rdx\),%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 01 c9 \{nf\} add %cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 01 c9 \{nf\} add %cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c 01 d2 \{nf\} add %dx,%dx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c d1 22 \{nf\} shl \$1,\(%rdx\),%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c 01 c9 \{nf\} add %cx,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c 01 c9 \{nf\} add %cx,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b ca 04 \{nf\} imul \$0x4,%dx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b 0a 04 \{nf\} imul \$0x4,\(%rdx\),%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 02 \{nf\} shl \$0x2,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 02 \{nf\} shl \$0x2,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e2 02 \{nf\} shl \$0x2,%dx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 22 02 \{nf\} shl \$0x2,\(%rdx\),%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 02 \{nf\} shl \$0x2,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 02 \{nf\} shl \$0x2,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b ca 40 \{nf\} imul \$0x40,%dx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b 0a 40 \{nf\} imul \$0x40,\(%rdx\),%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 06 \{nf\} shl \$0x6,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 06 \{nf\} shl \$0x6,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e2 06 \{nf\} shl \$0x6,%dx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 22 06 \{nf\} shl \$0x6,\(%rdx\),%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 06 \{nf\} shl \$0x6,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 06 \{nf\} shl \$0x6,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 69 ca 80 00 \{nf\} imul \$0x80,%dx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 69 0a 80 00 \{nf\} imul \$0x80,\(%rdx\),%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 07 \{nf\} shl \$0x7,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 07 \{nf\} shl \$0x7,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e2 07 \{nf\} shl \$0x7,%dx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 22 07 \{nf\} shl \$0x7,\(%rdx\),%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 07 \{nf\} shl \$0x7,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 07 \{nf\} shl \$0x7,%cx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 69 ca 00 80 \{nf\} imul \$0x8000,%dx,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 69 0a 00 80 \{nf\} imul \$0x8000,\(%rdx\),%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 0f \{nf\} shl \$0xf,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 0f \{nf\} shl \$0xf,%cx +[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 0f \{nf\} shl \$0xf,%cx,%cx [ ]*[a-f0-9]+:[ ]*8d 14 49[ ]+lea \(%rcx,%rcx,2\),%edx [ ]*[a-f0-9]+:[ ]*8d 54 ad 00[ ]+lea 0x0\(%rbp,%rbp,4\),%edx [ ]*[a-f0-9]+:[ ]*8d 2c c9[ ]+lea \(%rcx,%rcx,8\),%ebp [ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b d4 03[ ]+\{nf\} imul \$0x3,%esp,%edx [ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b e4 05[ ]+\{nf\} imul \$0x5,%esp,%esp +[ ]*[a-f0-9]+:[ ]*62 f4 74 1c 01 d2 \{nf\} add %edx,%edx,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 74 1c d1 22 \{nf\} shl \$1,\(%rdx\),%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 01 c9 \{nf\} add %ecx,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 01 c9 \{nf\} add %ecx,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 e2 02 \{nf\} shl \$0x2,%edx,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 22 02 \{nf\} shl \$0x2,\(%rdx\),%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c c1 e1 02 \{nf\} shl \$0x2,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c c1 e1 02 \{nf\} shl \$0x2,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 e2 06 \{nf\} shl \$0x6,%edx,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 22 06 \{nf\} shl \$0x6,\(%rdx\),%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c c1 e1 06 \{nf\} shl \$0x6,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c c1 e1 06 \{nf\} shl \$0x6,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 e2 07 \{nf\} shl \$0x7,%edx,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 22 07 \{nf\} shl \$0x7,\(%rdx\),%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c c1 e1 07 \{nf\} shl \$0x7,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c c1 e1 07 \{nf\} shl \$0x7,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 e2 1f \{nf\} shl \$0x1f,%edx,%ecx +[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 22 1f \{nf\} shl \$0x1f,\(%rdx\),%ecx [ ]*[a-f0-9]+:[ ]*48 8d 14 49[ ]+lea \(%rcx,%rcx,2\),%rdx [ ]*[a-f0-9]+:[ ]*48 8d 54 ad 00[ ]+lea 0x0\(%rbp,%rbp,4\),%rdx [ ]*[a-f0-9]+:[ ]*48 8d 2c c9[ ]+lea \(%rcx,%rcx,8\),%rbp [ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b d4 03[ ]+\{nf\} imul \$0x3,%rsp,%rdx [ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b e4 05[ ]+\{nf\} imul \$0x5,%rsp,%rsp +[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c 01 d2 \{nf\} add %rdx,%rdx,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c d1 22 \{nf\} shl \$1,\(%rdx\),%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 01 c9 \{nf\} add %rcx,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 01 c9 \{nf\} add %rcx,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 e2 02 \{nf\} shl \$0x2,%rdx,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 22 02 \{nf\} shl \$0x2,\(%rdx\),%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c c1 e1 02 \{nf\} shl \$0x2,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c c1 e1 02 \{nf\} shl \$0x2,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 e2 06 \{nf\} shl \$0x6,%rdx,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 22 06 \{nf\} shl \$0x6,\(%rdx\),%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c c1 e1 06 \{nf\} shl \$0x6,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c c1 e1 06 \{nf\} shl \$0x6,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 e2 07 \{nf\} shl \$0x7,%rdx,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 22 07 \{nf\} shl \$0x7,\(%rdx\),%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c c1 e1 07 \{nf\} shl \$0x7,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c c1 e1 07 \{nf\} shl \$0x7,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 e2 1e \{nf\} shl \$0x1e,%rdx,%rcx +[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 69 ca 00 00 00 80 \{nf\} imul \$0xffffffff80000000,%rdx,%rcx #pass diff --git a/gas/testsuite/gas/i386/x86-64-apx-nf.s b/gas/testsuite/gas/i386/x86-64-apx-nf.s index fe3bee4..d30e011 100644 --- a/gas/testsuite/gas/i386/x86-64-apx-nf.s +++ b/gas/testsuite/gas/i386/x86-64-apx-nf.s @@ -1472,4 +1472,40 @@ optimize: {nf} imul $5, %r21w, %dx {nf} imul $9, %r21w .endif + + # Note: 2-6 want leaving alone with -Os. + .irp n, 1, 2, 6, 7 + # Note: 16-bit 3-operand src!=dst non-ZU form needs leaving alone. + {nf} imul $1<<\n, %\r\()dx, %\r\()cx + {nf} imul $1<<\n, (%rdx), %\r\()cx + {nf} imul $1<<\n, %\r\()cx, %\r\()cx + {nf} imul $1<<\n, %\r\()cx + + .ifeqs "\r","" + {nf} imulzu $1<<\n, %dx, %cx + {nf} imulzu $1<<\n, (%rdx), %cx + {nf} imulzu $1<<\n, %cx, %cx + {nf} imulzu $1<<\n, %cx + .endif + .endr + + .ifeqs "\r","" + # Note: 3-operand src!=dst non-ZU form needs leaving alone. + {nf} imul $1<<15, %dx, %cx + {nf} imul $-1<<15, (%rdx), %cx + {nf} imul $1<<15, %cx, %cx + {nf} imul $-1<<15, %cx + {nf} imulzu $1<<15, %cx + .endif + + .ifeqs "\r","e" + {nf} imul $1<<31, %edx, %ecx + {nf} imul $-1<<31, (%rdx), %ecx + .endif + + .ifeqs "\r","r" + {nf} imul $1<<30, %rdx, %rcx + # Needs leaving alone. + {nf} imul $-1<<31, %rdx, %rcx + .endif .endr diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index 934b456..42c52e4 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -419,21 +419,21 @@ imul, 0xfaf, i386, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Reg16|Reg32|Reg64|U imul, 0xaf, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4|NF, { Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } imul, 0x6b, i186, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } imul, 0x6b, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4|NF|Optimize, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } -imulzu, 0x6b, APX_F, Modrm|No_bSuf|No_sSuf|EVexMap4|NF|ZU, { Imm8S, Reg16|Unspecified|BaseIndex, Reg16 } +imulzu, 0x6b, APX_F, Modrm|No_bSuf|No_sSuf|EVexMap4|NF|ZU|Optimize, { Imm8S, Reg16|Unspecified|BaseIndex, Reg16 } imul, 0x69, i186, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } -imul, 0x69, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4|NF, { Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } -imulzu, 0x69, APX_F, Modrm|No_bSuf|No_sSuf|EVexMap4|NF|ZU, { Imm16, Reg16|Unspecified|BaseIndex, Reg16 } +imul, 0x69, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4|NF|Optimize, { Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } +imulzu, 0x69, APX_F, Modrm|No_bSuf|No_sSuf|EVexMap4|NF|ZU|Optimize, { Imm16, Reg16|Unspecified|BaseIndex, Reg16 } // imul with 2 operands mimics imul with 3 by putting the register in // both i.rm.reg & i.rm.regmem fields. RegKludge enables this // transformation. imul, 0x6b, i186, Modrm|No_bSuf|No_sSuf|RegKludge, { Imm8S, Reg16|Reg32|Reg64 } imul, 0x6b, APX_F, Modrm|No_bSuf|No_sSuf|RegKludge|EVexMap4|NF|Optimize, { Imm8S, Reg16|Reg32|Reg64 } imul, 0x69, i186, Modrm|No_bSuf|No_sSuf|RegKludge, { Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64 } -imul, 0x69, APX_F, Modrm|No_bSuf|No_sSuf|RegKludge|EVexMap4|NF, { Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64 } +imul, 0x69, APX_F, Modrm|No_bSuf|No_sSuf|RegKludge|EVexMap4|NF|Optimize, { Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64 } // ZU is omitted here, for colliding with RegKludge. process_operands() will // replace the constraint value after processing RegKludge. -imulzu, 0x6b, APX_F, Modrm|No_bSuf|No_sSuf|RegKludge|EVexMap4|NF/*|ZU*/, { Imm8S, Reg16 } -imulzu, 0x69, APX_F, Modrm|No_bSuf|No_sSuf|RegKludge|EVexMap4|NF/*|ZU*/, { Imm16, Reg16 } +imulzu, 0x6b, APX_F, Modrm|No_bSuf|No_sSuf|RegKludge|EVexMap4|NF/*|ZU*/|Optimize, { Imm8S, Reg16 } +imulzu, 0x69, APX_F, Modrm|No_bSuf|No_sSuf|RegKludge|EVexMap4|NF/*|ZU*/|Optimize, { Imm16, Reg16 } diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h index 0cd2b89..885fbc4 100644 --- a/opcodes/i386-tbl.h +++ b/opcodes/i386-tbl.h @@ -3452,7 +3452,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0 } } } }, { MN_imul, 0x69, 3, SPACE_EVEXMAP4, None, { 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0 }, { { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -3494,7 +3494,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0 } } } }, { MN_imul, 0x69, 2, SPACE_EVEXMAP4, None, { 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0 }, { { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -3504,7 +3504,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0 } } } }, { MN_imulzu, 0x6b, 3, SPACE_EVEXMAP4, None, { 0, 0, 0, 1, 0, 0, 0, 0, 10, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0 }, { { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -3516,7 +3516,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0 } } } }, { MN_imulzu, 0x69, 3, SPACE_EVEXMAP4, None, { 0, 0, 0, 1, 0, 0, 0, 0, 10, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0 }, { { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -3528,7 +3528,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0 } } } }, { MN_imulzu, 0x6b, 2, SPACE_EVEXMAP4, None, { 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0 }, { { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -3538,7 +3538,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0 } } } }, { MN_imulzu, 0x69, 2, SPACE_EVEXMAP4, None, { 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0 }, { { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, -- cgit v1.1