aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@suse.com>2024-06-28 08:22:39 +0200
committerJan Beulich <jbeulich@suse.com>2024-06-28 08:22:39 +0200
commit82e06fa803fd00c0640dd4b2078f829f832847fd (patch)
tree06a6e2a5444425bb4dde267dba5be9cce1221dad
parent2a7f257afb4297394ea4239ed97c507d83220dd7 (diff)
downloadgdb-82e06fa803fd00c0640dd4b2078f829f832847fd.zip
gdb-82e06fa803fd00c0640dd4b2078f829f832847fd.tar.gz
gdb-82e06fa803fd00c0640dd4b2078f829f832847fd.tar.bz2
x86/APX: optimize {nf}-form IMUL-by-power-of-2 to SHL
..., for differing only in the resulting EFLAGS, which are left untouched anyway. That's a shorter encoding, available as long as certain constraints on operands are met; see code comments. (SHL-by-1 forms may then be subject to further optimization that was introduced earlier.) Note that kind of as a side effect this also converts multiplication by 1 to shift by 0, which is a plain move or even no-op anyway. That could be further shrunk (as could be presence of shifts/rotates by 0 in the original code as well as a fair set of other {nf}-form insns), yet the expectation (for now) is that people won't write such code in the first place.
-rw-r--r--gas/config/tc-i386.c70
-rw-r--r--gas/testsuite/gas/i386/x86-64-apx-nf-optimize-size.d73
-rw-r--r--gas/testsuite/gas/i386/x86-64-apx-nf-optimize.d73
-rw-r--r--gas/testsuite/gas/i386/x86-64-apx-nf.s36
-rw-r--r--opcodes/i386-opc.tbl12
-rw-r--r--opcodes/i386-tbl.h12
6 files changed, 264 insertions, 12 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 9452b3c..7ad0267 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -5452,6 +5452,75 @@ optimize_nf_encoding (void)
i.tm.operand_types[0].bitfield.imm1 = 1;
i.imm_operands = 0;
}
+ else if ((i.tm.base_opcode | 2) == 0x6b
+ && i.op[0].imms->X_op == O_constant
+ && (i.op[0].imms->X_add_number > 0
+ ? !(i.op[0].imms->X_add_number & (i.op[0].imms->X_add_number - 1))
+ /* optimize_imm() converts to sign-extended representation where
+ possible (and input can also come with these specific numbers). */
+ : (i.types[i.operands - 1].bitfield.word
+ && i.op[0].imms->X_add_number == -0x8000)
+ || (i.types[i.operands - 1].bitfield.dword
+ && i.op[0].imms->X_add_number + 1 == -0x7fffffff))
+ /* 16-bit 3-operand non-ZU forms need leaviong alone, to prevent
+ zero-extension of the result. Unless, of course, both non-
+ immediate operands match (which can be converted to the non-NDD
+ form). */
+ && (i.operands < 3
+ || !i.types[2].bitfield.word
+ || i.tm.mnem_off == MN_imulzu
+ || i.op[2].regs == i.op[1].regs)
+ /* When merely optimizing for size, exclude cases where we'd convert
+ from Imm8S to Imm8 encoding, thus not actually reducing size. */
+ && (!optimize_for_space
+ || i.tm.base_opcode == 0x69
+ || !(i.op[0].imms->X_add_number & 0x7d)))
+ {
+ /* Optimize: -O:
+ {nf} imul $1<<N, ... -> {nf} shl $N, ...
+ {nf} imulzu $1<<N, ... -> {nf} shl $N, ...
+ */
+ if (i.op[0].imms->X_add_number != 2)
+ {
+ i.tm.base_opcode = 0xc0;
+ i.op[0].imms->X_add_number = ffs (i.op[0].imms->X_add_number) - 1;
+ i.tm.operand_types[0].bitfield.imm8 = 1;
+ i.tm.operand_types[0].bitfield.imm16 = 0;
+ i.tm.operand_types[0].bitfield.imm32 = 0;
+ i.tm.operand_types[0].bitfield.imm32s = 0;
+ }
+ else
+ {
+ i.tm.base_opcode = 0xd0;
+ i.tm.operand_types[0].bitfield.imm1 = 1;
+ }
+ i.types[0] = i.tm.operand_types[0];
+ i.tm.extension_opcode = 4;
+ i.tm.opcode_modifier.w = 1;
+ i.tm.opcode_modifier.operandconstraint = 0;
+ if (i.operands == 3)
+ {
+ if (i.op[2].regs == i.op[1].regs && i.tm.mnem_off != MN_imulzu)
+ {
+ /* Convert to non-NDD form. This is required for 16-bit insns
+ (to prevent zero-extension) and benign for others. */
+ i.operands = 2;
+ i.reg_operands = 1;
+ }
+ else
+ i.tm.opcode_modifier.vexvvvv = VexVVVV_DST;
+ }
+ else if (i.tm.mnem_off == MN_imulzu)
+ {
+ /* Convert to NDD form, to effect zero-extension of the result. */
+ i.tm.opcode_modifier.vexvvvv = VexVVVV_DST;
+ i.operands = 3;
+ i.reg_operands = 2;
+ i.op[2].regs = i.op[1].regs;
+ i.tm.operand_types[2] = i.tm.operand_types[1];
+ i.types[2] = i.types[1];
+ }
+ }
if (optimize_for_space
&& i.encoding != encoding_evex
@@ -5598,6 +5667,7 @@ optimize_nf_encoding (void)
else if (i.tm.base_opcode == 0x6b
&& !i.mem_operands
&& i.encoding != encoding_evex
+ && i.tm.mnem_off != MN_imulzu
&& is_plausible_suffix (1)
/* %rsp can't be the index. */
&& is_index (i.op[1].regs)
diff --git a/gas/testsuite/gas/i386/x86-64-apx-nf-optimize-size.d b/gas/testsuite/gas/i386/x86-64-apx-nf-optimize-size.d
index 412292d..1950508 100644
--- a/gas/testsuite/gas/i386/x86-64-apx-nf-optimize-size.d
+++ b/gas/testsuite/gas/i386/x86-64-apx-nf-optimize-size.d
@@ -1522,14 +1522,87 @@ Disassembly of section \.text:
[ ]*[a-f0-9]+:[ ]*62 e4 7d 0c 6b c5 03[ ]+\{nf\} imul \$0x3,%bp,%r16w
[ ]*[a-f0-9]+:[ ]*62 fc 7d 0c 6b d5 05[ ]+\{nf\} imul \$0x5,%r21w,%dx
[ ]*[a-f0-9]+:[ ]*62 ec 7d 0c 6b ed 09[ ]+\{nf\} imul \$0x9,%r21w,%r21w
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b ca 02 \{nf\} imul \$0x2,%dx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b 0a 02 \{nf\} imul \$0x2,\(%rdx\),%cx
+[ ]*[a-f0-9]+:[ ]*66 8d 0c 09[ ]+lea \(%rcx,%rcx,1\),%cx
+[ ]*[a-f0-9]+:[ ]*66 8d 0c 09[ ]+lea \(%rcx,%rcx,1\),%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c d1 e2 \{nf\} shl \$1,%dx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c d1 22 \{nf\} shl \$1,\(%rdx\),%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c d1 e1 \{nf\} shl \$1,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c d1 e1 \{nf\} shl \$1,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b ca 04 \{nf\} imul \$0x4,%dx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b 0a 04 \{nf\} imul \$0x4,\(%rdx\),%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b c9 04 \{nf\} imul \$0x4,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b c9 04 \{nf\} imul \$0x4,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c 6b ca 04 \{nf\} imulzu \$0x4,%dx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c 6b 0a 04 \{nf\} imulzu \$0x4,\(%rdx\),%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c 6b c9 04 \{nf\} imulzu \$0x4,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c 6b c9 04 \{nf\} imulzu \$0x4,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b ca 40 \{nf\} imul \$0x40,%dx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b 0a 40 \{nf\} imul \$0x40,\(%rdx\),%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b c9 40 \{nf\} imul \$0x40,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b c9 40 \{nf\} imul \$0x40,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c 6b ca 40 \{nf\} imulzu \$0x40,%dx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c 6b 0a 40 \{nf\} imulzu \$0x40,\(%rdx\),%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c 6b c9 40 \{nf\} imulzu \$0x40,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 1c 6b c9 40 \{nf\} imulzu \$0x40,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 69 ca 80 00 \{nf\} imul \$0x80,%dx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 69 0a 80 00 \{nf\} imul \$0x80,\(%rdx\),%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 07 \{nf\} shl \$0x7,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 07 \{nf\} shl \$0x7,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e2 07 \{nf\} shl \$0x7,%dx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 22 07 \{nf\} shl \$0x7,\(%rdx\),%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 07 \{nf\} shl \$0x7,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 07 \{nf\} shl \$0x7,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 69 ca 00 80 \{nf\} imul \$0x8000,%dx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 69 0a 00 80 \{nf\} imul \$0x8000,\(%rdx\),%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 0f \{nf\} shl \$0xf,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 0f \{nf\} shl \$0xf,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 0f \{nf\} shl \$0xf,%cx,%cx
[ ]*[a-f0-9]+:[ ]*8d 14 49[ ]+lea \(%rcx,%rcx,2\),%edx
[ ]*[a-f0-9]+:[ ]*8d 54 ad 00[ ]+lea 0x0\(%rbp,%rbp,4\),%edx
[ ]*[a-f0-9]+:[ ]*8d 2c c9[ ]+lea \(%rcx,%rcx,8\),%ebp
[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b d4 03[ ]+\{nf\} imul \$0x3,%esp,%edx
[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b e4 05[ ]+\{nf\} imul \$0x5,%esp,%esp
+[ ]*[a-f0-9]+:[ ]*8d 0c 12[ ]+lea \(%rdx,%rdx,1\),%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 74 1c d1 22 \{nf\} shl \$1,\(%rdx\),%ecx
+[ ]*[a-f0-9]+:[ ]*8d 0c 09[ ]+lea \(%rcx,%rcx,1\),%ecx
+[ ]*[a-f0-9]+:[ ]*8d 0c 09[ ]+lea \(%rcx,%rcx,1\),%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b ca 04 \{nf\} imul \$0x4,%edx,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b 0a 04 \{nf\} imul \$0x4,\(%rdx\),%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b c9 04 \{nf\} imul \$0x4,%ecx,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b c9 04 \{nf\} imul \$0x4,%ecx,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b ca 40 \{nf\} imul \$0x40,%edx,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b 0a 40 \{nf\} imul \$0x40,\(%rdx\),%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b c9 40 \{nf\} imul \$0x40,%ecx,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b c9 40 \{nf\} imul \$0x40,%ecx,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 e2 07 \{nf\} shl \$0x7,%edx,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 22 07 \{nf\} shl \$0x7,\(%rdx\),%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c c1 e1 07 \{nf\} shl \$0x7,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c c1 e1 07 \{nf\} shl \$0x7,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 e2 1f \{nf\} shl \$0x1f,%edx,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 22 1f \{nf\} shl \$0x1f,\(%rdx\),%ecx
[ ]*[a-f0-9]+:[ ]*48 8d 14 49[ ]+lea \(%rcx,%rcx,2\),%rdx
[ ]*[a-f0-9]+:[ ]*48 8d 54 ad 00[ ]+lea 0x0\(%rbp,%rbp,4\),%rdx
[ ]*[a-f0-9]+:[ ]*48 8d 2c c9[ ]+lea \(%rcx,%rcx,8\),%rbp
[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b d4 03[ ]+\{nf\} imul \$0x3,%rsp,%rdx
[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b e4 05[ ]+\{nf\} imul \$0x5,%rsp,%rsp
+[ ]*[a-f0-9]+:[ ]*48 8d 0c 12[ ]+lea \(%rdx,%rdx,1\),%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c d1 22 \{nf\} shl \$1,\(%rdx\),%rcx
+[ ]*[a-f0-9]+:[ ]*48 8d 0c 09[ ]+lea \(%rcx,%rcx,1\),%rcx
+[ ]*[a-f0-9]+:[ ]*48 8d 0c 09[ ]+lea \(%rcx,%rcx,1\),%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b ca 04 \{nf\} imul \$0x4,%rdx,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b 0a 04 \{nf\} imul \$0x4,\(%rdx\),%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b c9 04 \{nf\} imul \$0x4,%rcx,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b c9 04 \{nf\} imul \$0x4,%rcx,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b ca 40 \{nf\} imul \$0x40,%rdx,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b 0a 40 \{nf\} imul \$0x40,\(%rdx\),%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b c9 40 \{nf\} imul \$0x40,%rcx,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b c9 40 \{nf\} imul \$0x40,%rcx,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 e2 07 \{nf\} shl \$0x7,%rdx,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 22 07 \{nf\} shl \$0x7,\(%rdx\),%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c c1 e1 07 \{nf\} shl \$0x7,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c c1 e1 07 \{nf\} shl \$0x7,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 e2 1e \{nf\} shl \$0x1e,%rdx,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 69 ca 00 00 00 80 \{nf\} imul \$0xffffffff80000000,%rdx,%rcx
#pass
diff --git a/gas/testsuite/gas/i386/x86-64-apx-nf-optimize.d b/gas/testsuite/gas/i386/x86-64-apx-nf-optimize.d
index ba50ef1..2356c64 100644
--- a/gas/testsuite/gas/i386/x86-64-apx-nf-optimize.d
+++ b/gas/testsuite/gas/i386/x86-64-apx-nf-optimize.d
@@ -1522,14 +1522,87 @@ Disassembly of section \.text:
[ ]*[a-f0-9]+:[ ]*66 d5 40 8d 44 6d 00[ ]+lea 0x0\(%rbp,%rbp,2\),%r16w
[ ]*[a-f0-9]+:[ ]*66 d5 30 8d 54 ad 00[ ]+lea 0x0\(%r21,%r21,4\),%dx
[ ]*[a-f0-9]+:[ ]*66 d5 70 8d 6c ed 00[ ]+lea 0x0\(%r21,%r21,8\),%r21w
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b ca 02 \{nf\} imul \$0x2,%dx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b 0a 02 \{nf\} imul \$0x2,\(%rdx\),%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 01 c9 \{nf\} add %cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 01 c9 \{nf\} add %cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c 01 d2 \{nf\} add %dx,%dx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c d1 22 \{nf\} shl \$1,\(%rdx\),%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c 01 c9 \{nf\} add %cx,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c 01 c9 \{nf\} add %cx,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b ca 04 \{nf\} imul \$0x4,%dx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b 0a 04 \{nf\} imul \$0x4,\(%rdx\),%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 02 \{nf\} shl \$0x2,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 02 \{nf\} shl \$0x2,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e2 02 \{nf\} shl \$0x2,%dx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 22 02 \{nf\} shl \$0x2,\(%rdx\),%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 02 \{nf\} shl \$0x2,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 02 \{nf\} shl \$0x2,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b ca 40 \{nf\} imul \$0x40,%dx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 6b 0a 40 \{nf\} imul \$0x40,\(%rdx\),%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 06 \{nf\} shl \$0x6,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 06 \{nf\} shl \$0x6,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e2 06 \{nf\} shl \$0x6,%dx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 22 06 \{nf\} shl \$0x6,\(%rdx\),%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 06 \{nf\} shl \$0x6,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 06 \{nf\} shl \$0x6,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 69 ca 80 00 \{nf\} imul \$0x80,%dx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 69 0a 80 00 \{nf\} imul \$0x80,\(%rdx\),%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 07 \{nf\} shl \$0x7,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 07 \{nf\} shl \$0x7,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e2 07 \{nf\} shl \$0x7,%dx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 22 07 \{nf\} shl \$0x7,\(%rdx\),%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 07 \{nf\} shl \$0x7,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 07 \{nf\} shl \$0x7,%cx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 69 ca 00 80 \{nf\} imul \$0x8000,%dx,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c 69 0a 00 80 \{nf\} imul \$0x8000,\(%rdx\),%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 0f \{nf\} shl \$0xf,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 7d 0c c1 e1 0f \{nf\} shl \$0xf,%cx
+[ ]*[a-f0-9]+:[ ]*62 f4 75 1c c1 e1 0f \{nf\} shl \$0xf,%cx,%cx
[ ]*[a-f0-9]+:[ ]*8d 14 49[ ]+lea \(%rcx,%rcx,2\),%edx
[ ]*[a-f0-9]+:[ ]*8d 54 ad 00[ ]+lea 0x0\(%rbp,%rbp,4\),%edx
[ ]*[a-f0-9]+:[ ]*8d 2c c9[ ]+lea \(%rcx,%rcx,8\),%ebp
[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b d4 03[ ]+\{nf\} imul \$0x3,%esp,%edx
[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 6b e4 05[ ]+\{nf\} imul \$0x5,%esp,%esp
+[ ]*[a-f0-9]+:[ ]*62 f4 74 1c 01 d2 \{nf\} add %edx,%edx,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 74 1c d1 22 \{nf\} shl \$1,\(%rdx\),%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 01 c9 \{nf\} add %ecx,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c 01 c9 \{nf\} add %ecx,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 e2 02 \{nf\} shl \$0x2,%edx,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 22 02 \{nf\} shl \$0x2,\(%rdx\),%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c c1 e1 02 \{nf\} shl \$0x2,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c c1 e1 02 \{nf\} shl \$0x2,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 e2 06 \{nf\} shl \$0x6,%edx,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 22 06 \{nf\} shl \$0x6,\(%rdx\),%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c c1 e1 06 \{nf\} shl \$0x6,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c c1 e1 06 \{nf\} shl \$0x6,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 e2 07 \{nf\} shl \$0x7,%edx,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 22 07 \{nf\} shl \$0x7,\(%rdx\),%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c c1 e1 07 \{nf\} shl \$0x7,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 7c 0c c1 e1 07 \{nf\} shl \$0x7,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 e2 1f \{nf\} shl \$0x1f,%edx,%ecx
+[ ]*[a-f0-9]+:[ ]*62 f4 74 1c c1 22 1f \{nf\} shl \$0x1f,\(%rdx\),%ecx
[ ]*[a-f0-9]+:[ ]*48 8d 14 49[ ]+lea \(%rcx,%rcx,2\),%rdx
[ ]*[a-f0-9]+:[ ]*48 8d 54 ad 00[ ]+lea 0x0\(%rbp,%rbp,4\),%rdx
[ ]*[a-f0-9]+:[ ]*48 8d 2c c9[ ]+lea \(%rcx,%rcx,8\),%rbp
[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b d4 03[ ]+\{nf\} imul \$0x3,%rsp,%rdx
[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 6b e4 05[ ]+\{nf\} imul \$0x5,%rsp,%rsp
+[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c 01 d2 \{nf\} add %rdx,%rdx,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c d1 22 \{nf\} shl \$1,\(%rdx\),%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 01 c9 \{nf\} add %rcx,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 01 c9 \{nf\} add %rcx,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 e2 02 \{nf\} shl \$0x2,%rdx,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 22 02 \{nf\} shl \$0x2,\(%rdx\),%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c c1 e1 02 \{nf\} shl \$0x2,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c c1 e1 02 \{nf\} shl \$0x2,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 e2 06 \{nf\} shl \$0x6,%rdx,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 22 06 \{nf\} shl \$0x6,\(%rdx\),%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c c1 e1 06 \{nf\} shl \$0x6,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c c1 e1 06 \{nf\} shl \$0x6,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 e2 07 \{nf\} shl \$0x7,%rdx,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 22 07 \{nf\} shl \$0x7,\(%rdx\),%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c c1 e1 07 \{nf\} shl \$0x7,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c c1 e1 07 \{nf\} shl \$0x7,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 f4 1c c1 e2 1e \{nf\} shl \$0x1e,%rdx,%rcx
+[ ]*[a-f0-9]+:[ ]*62 f4 fc 0c 69 ca 00 00 00 80 \{nf\} imul \$0xffffffff80000000,%rdx,%rcx
#pass
diff --git a/gas/testsuite/gas/i386/x86-64-apx-nf.s b/gas/testsuite/gas/i386/x86-64-apx-nf.s
index fe3bee4..d30e011 100644
--- a/gas/testsuite/gas/i386/x86-64-apx-nf.s
+++ b/gas/testsuite/gas/i386/x86-64-apx-nf.s
@@ -1472,4 +1472,40 @@ optimize:
{nf} imul $5, %r21w, %dx
{nf} imul $9, %r21w
.endif
+
+ # Note: 2-6 want leaving alone with -Os.
+ .irp n, 1, 2, 6, 7
+ # Note: 16-bit 3-operand src!=dst non-ZU form needs leaving alone.
+ {nf} imul $1<<\n, %\r\()dx, %\r\()cx
+ {nf} imul $1<<\n, (%rdx), %\r\()cx
+ {nf} imul $1<<\n, %\r\()cx, %\r\()cx
+ {nf} imul $1<<\n, %\r\()cx
+
+ .ifeqs "\r",""
+ {nf} imulzu $1<<\n, %dx, %cx
+ {nf} imulzu $1<<\n, (%rdx), %cx
+ {nf} imulzu $1<<\n, %cx, %cx
+ {nf} imulzu $1<<\n, %cx
+ .endif
+ .endr
+
+ .ifeqs "\r",""
+ # Note: 3-operand src!=dst non-ZU form needs leaving alone.
+ {nf} imul $1<<15, %dx, %cx
+ {nf} imul $-1<<15, (%rdx), %cx
+ {nf} imul $1<<15, %cx, %cx
+ {nf} imul $-1<<15, %cx
+ {nf} imulzu $1<<15, %cx
+ .endif
+
+ .ifeqs "\r","e"
+ {nf} imul $1<<31, %edx, %ecx
+ {nf} imul $-1<<31, (%rdx), %ecx
+ .endif
+
+ .ifeqs "\r","r"
+ {nf} imul $1<<30, %rdx, %rcx
+ # Needs leaving alone.
+ {nf} imul $-1<<31, %rdx, %rcx
+ .endif
.endr
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 934b456..42c52e4 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -419,21 +419,21 @@ imul, 0xfaf, i386, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Reg16|Reg32|Reg64|U
imul, 0xaf, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4|NF, { Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
imul, 0x6b, i186, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
imul, 0x6b, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4|NF|Optimize, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
-imulzu, 0x6b, APX_F, Modrm|No_bSuf|No_sSuf|EVexMap4|NF|ZU, { Imm8S, Reg16|Unspecified|BaseIndex, Reg16 }
+imulzu, 0x6b, APX_F, Modrm|No_bSuf|No_sSuf|EVexMap4|NF|ZU|Optimize, { Imm8S, Reg16|Unspecified|BaseIndex, Reg16 }
imul, 0x69, i186, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
-imul, 0x69, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4|NF, { Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
-imulzu, 0x69, APX_F, Modrm|No_bSuf|No_sSuf|EVexMap4|NF|ZU, { Imm16, Reg16|Unspecified|BaseIndex, Reg16 }
+imul, 0x69, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4|NF|Optimize, { Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
+imulzu, 0x69, APX_F, Modrm|No_bSuf|No_sSuf|EVexMap4|NF|ZU|Optimize, { Imm16, Reg16|Unspecified|BaseIndex, Reg16 }
// imul with 2 operands mimics imul with 3 by putting the register in
// both i.rm.reg & i.rm.regmem fields. RegKludge enables this
// transformation.
imul, 0x6b, i186, Modrm|No_bSuf|No_sSuf|RegKludge, { Imm8S, Reg16|Reg32|Reg64 }
imul, 0x6b, APX_F, Modrm|No_bSuf|No_sSuf|RegKludge|EVexMap4|NF|Optimize, { Imm8S, Reg16|Reg32|Reg64 }
imul, 0x69, i186, Modrm|No_bSuf|No_sSuf|RegKludge, { Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64 }
-imul, 0x69, APX_F, Modrm|No_bSuf|No_sSuf|RegKludge|EVexMap4|NF, { Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64 }
+imul, 0x69, APX_F, Modrm|No_bSuf|No_sSuf|RegKludge|EVexMap4|NF|Optimize, { Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64 }
// ZU is omitted here, for colliding with RegKludge. process_operands() will
// replace the constraint value after processing RegKludge.
-imulzu, 0x6b, APX_F, Modrm|No_bSuf|No_sSuf|RegKludge|EVexMap4|NF/*|ZU*/, { Imm8S, Reg16 }
-imulzu, 0x69, APX_F, Modrm|No_bSuf|No_sSuf|RegKludge|EVexMap4|NF/*|ZU*/, { Imm16, Reg16 }
+imulzu, 0x6b, APX_F, Modrm|No_bSuf|No_sSuf|RegKludge|EVexMap4|NF/*|ZU*/|Optimize, { Imm8S, Reg16 }
+imulzu, 0x69, APX_F, Modrm|No_bSuf|No_sSuf|RegKludge|EVexMap4|NF/*|ZU*/|Optimize, { Imm16, Reg16 }
<mul>
diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h
index 0cd2b89..885fbc4 100644
--- a/opcodes/i386-tbl.h
+++ b/opcodes/i386-tbl.h
@@ -3452,7 +3452,7 @@ static const insn_template i386_optab[] =
0, 0, 0, 0, 0, 0 } } } },
{ MN_imul, 0x69, 3, SPACE_EVEXMAP4, None,
{ 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
1, 0 },
{ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
@@ -3494,7 +3494,7 @@ static const insn_template i386_optab[] =
0, 0, 0, 0, 0, 0 } } } },
{ MN_imul, 0x69, 2, SPACE_EVEXMAP4, None,
{ 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
1, 0 },
{ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
@@ -3504,7 +3504,7 @@ static const insn_template i386_optab[] =
0, 0, 0, 0, 0, 0 } } } },
{ MN_imulzu, 0x6b, 3, SPACE_EVEXMAP4, None,
{ 0, 0, 0, 1, 0, 0, 0, 0, 10, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
1, 0 },
{ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
@@ -3516,7 +3516,7 @@ static const insn_template i386_optab[] =
0, 0, 0, 0, 0, 0 } } } },
{ MN_imulzu, 0x69, 3, SPACE_EVEXMAP4, None,
{ 0, 0, 0, 1, 0, 0, 0, 0, 10, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
1, 0 },
{ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
@@ -3528,7 +3528,7 @@ static const insn_template i386_optab[] =
0, 0, 0, 0, 0, 0 } } } },
{ MN_imulzu, 0x6b, 2, SPACE_EVEXMAP4, None,
{ 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
1, 0 },
{ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
@@ -3538,7 +3538,7 @@ static const insn_template i386_optab[] =
0, 0, 0, 0, 0, 0 } } } },
{ MN_imulzu, 0x69, 2, SPACE_EVEXMAP4, None,
{ 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
1, 0 },
{ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },