From 2513312930b2b8a0b50fb681f2781372cce3c2f6 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 28 Jun 2024 08:24:45 +0200 Subject: x86/APX: apply NDD-to-legacy transformation to further CMOVcc forms With both sources being registers, these insns are almost commutative; the only extra adjustment needed is inversion of the encoded condition. --- gas/config/tc-i386.c | 17 ++++++++++++++++- gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d | 16 ++++++++++++++++ gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s | 16 ++++++++++++++++ 3 files changed, 48 insertions(+), 1 deletion(-) (limited to 'gas') diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 4de5c01..2e19431 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -456,6 +456,9 @@ struct _i386_insn /* Disable instruction size optimization. */ bool no_optimize; + /* Invert the condition encoded in a base opcode. */ + bool invert_cond; + /* How to encode instructions. */ enum { @@ -3918,6 +3921,11 @@ install_template (const insn_template *t) i.tm.base_opcode >>= 8; } + /* For CMOVcc having undergone NDD-to-legacy optimization with its source + operands being swapped, we need to invert the encoded condition. */ + if (i.invert_cond) + i.tm.base_opcode ^= 1; + /* Note that for pseudo prefixes this produces a length of 1. But for them the length isn't interesting at all. */ for (l = 1; l < 4; ++l) @@ -9845,7 +9853,14 @@ match_template (char mnem_suffix) && !i.op[i.operands - 1].regs->reg_type.bitfield.qword))) { if (i.operands > 2 && match_dest_op == i.operands - 3) - swap_2_operands (match_dest_op, i.operands - 2); + { + swap_2_operands (match_dest_op, i.operands - 2); + + /* CMOVcc is marked commutative, but then also needs its + encoded condition inverted. */ + if ((t->base_opcode | 0xf) == 0x4f) + i.invert_cond = true; + } --i.operands; --i.reg_operands; diff --git a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d index 795dfcf..7932b0e 100644 --- a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d +++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d @@ -118,6 +118,22 @@ Disassembly of section .text: \s*[a-f0-9]+:\s*67 0f 4d 90 90 90 90 90 cmovge -0x6f6f6f70\(%eax\),%edx \s*[a-f0-9]+:\s*67 0f 4e 90 90 90 90 90 cmovle -0x6f6f6f70\(%eax\),%edx \s*[a-f0-9]+:\s*67 0f 4f 90 90 90 90 90 cmovg -0x6f6f6f70\(%eax\),%edx +\s*[a-f0-9]+:\s*0f 41 d1 cmovno %ecx,%edx +\s*[a-f0-9]+:\s*0f 40 d1 cmovo %ecx,%edx +\s*[a-f0-9]+:\s*0f 43 d1 cmovae %ecx,%edx +\s*[a-f0-9]+:\s*0f 42 d1 cmovb %ecx,%edx +\s*[a-f0-9]+:\s*0f 45 d1 cmovne %ecx,%edx +\s*[a-f0-9]+:\s*0f 44 d1 cmove %ecx,%edx +\s*[a-f0-9]+:\s*0f 47 d1 cmova %ecx,%edx +\s*[a-f0-9]+:\s*0f 46 d1 cmovbe %ecx,%edx +\s*[a-f0-9]+:\s*0f 49 d1 cmovns %ecx,%edx +\s*[a-f0-9]+:\s*0f 48 d1 cmovs %ecx,%edx +\s*[a-f0-9]+:\s*0f 4b d1 cmovnp %ecx,%edx +\s*[a-f0-9]+:\s*0f 4a d1 cmovp %ecx,%edx +\s*[a-f0-9]+:\s*0f 4d d1 cmovge %ecx,%edx +\s*[a-f0-9]+:\s*0f 4c d1 cmovl %ecx,%edx +\s*[a-f0-9]+:\s*0f 4f d1 cmovg %ecx,%edx +\s*[a-f0-9]+:\s*0f 4e d1 cmovle %ecx,%edx \s*[a-f0-9]+:\s*62 f4 7d 08 60 c0 movbe %ax,%ax \s*[a-f0-9]+:\s*49 0f c8 bswap %r8 \s*[a-f0-9]+:\s*d5 98 c8 bswap %r16 diff --git a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s index bf75e64..7a2766a 100644 --- a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s +++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s @@ -112,6 +112,22 @@ cmovl 0x90909090(%eax),%edx,%edx cmovge 0x90909090(%eax),%edx,%edx cmovle 0x90909090(%eax),%edx,%edx cmovg 0x90909090(%eax),%edx,%edx +cmovo %edx,%ecx,%edx +cmovno %edx,%ecx,%edx +cmovc %edx,%ecx,%edx +cmovnc %edx,%ecx,%edx +cmovz %edx,%ecx,%edx +cmovnz %edx,%ecx,%edx +cmovna %edx,%ecx,%edx +cmovnbe %edx,%ecx,%edx +cmovs %edx,%ecx,%edx +cmovns %edx,%ecx,%edx +cmovpe %edx,%ecx,%edx +cmovpo %edx,%ecx,%edx +cmovnge %edx,%ecx,%edx +cmovnl %edx,%ecx,%edx +cmovng %edx,%ecx,%edx +cmovnle %edx,%ecx,%edx movbe %ax,%ax movbe %r8,%r8 movbe %r16,%r16 -- cgit v1.1