diff options
author | Jan Beulich <jbeulich@suse.com> | 2024-03-01 09:21:40 +0100 |
---|---|---|
committer | Jan Beulich <jbeulich@suse.com> | 2024-03-01 09:21:40 +0100 |
commit | c73a37b26800fc542512558f8403f980bb044435 (patch) | |
tree | b8ea425bfa94db8da466908948d7e7e55f97b0bc | |
parent | 6804f42c67d97401c7ef8438727eba788fc29329 (diff) | |
download | gdb-c73a37b26800fc542512558f8403f980bb044435.zip gdb-c73a37b26800fc542512558f8403f980bb044435.tar.gz gdb-c73a37b26800fc542512558f8403f980bb044435.tar.bz2 |
x86/APX: optimize certain XOR and SUB forms
While most logic in optimize_encoding() is already covering APX by way
of the earlier NDD->REX2 conversion, there's a remaining set of cases
which wants handling separately.
-rw-r--r-- | gas/config/tc-i386.c | 28 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-optimize-1.d | 24 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-optimize-1.s | 24 | ||||
-rw-r--r-- | opcodes/i386-opc.tbl | 4 | ||||
-rw-r--r-- | opcodes/i386-tbl.h | 4 |
5 files changed, 80 insertions, 4 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index d169274..5e7ebf7 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -4694,6 +4694,34 @@ optimize_encoding (void) } } } + else if (i.reg_operands == 3 + && i.op[0].regs == i.op[1].regs + && i.encoding != encoding_evex + && (i.tm.mnem_off == MN_xor + || i.tm.mnem_off == MN_sub)) + { + /* Optimize: -O: + xorb %rNb, %rNb, %rMb -> xorl %rMd, %rMd + xorw %rNw, %rNw, %rMw -> xorl %rMd, %rMd + xorl %rNd, %rNd, %rMd -> xorl %rMd, %rMd + xorq %rN, %rN, %rM -> xorl %rMd, %rMd + subb %rNb, %rNb, %rMb -> subl %rMd, %rMd + subw %rNw, %rNw, %rMw -> subl %rMd, %rMd + subl %rNd, %rNd, %rMd -> subl %rMd, %rMd + subq %rN, %rN, %rM -> subl %rMd, %rMd + */ + i.tm.opcode_space = SPACE_BASE; + i.tm.opcode_modifier.evex = 0; + i.tm.opcode_modifier.size = SIZE32; + i.types[0].bitfield.byte = 0; + i.types[0].bitfield.word = 0; + i.types[0].bitfield.dword = 1; + i.types[0].bitfield.qword = 0; + i.op[0].regs = i.op[2].regs; + i.types[1] = i.types[0]; + i.op[1].regs = i.op[2].regs; + i.reg_operands = 2; + } else if (optimize > 1 && !optimize_for_space && i.reg_operands == 2 diff --git a/gas/testsuite/gas/i386/x86-64-optimize-1.d b/gas/testsuite/gas/i386/x86-64-optimize-1.d index 8bcc90c..8fff3b7 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-1.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-1.d @@ -71,4 +71,28 @@ Disassembly of section .text: +[a-f0-9]+: 48 0f ba f0 1f btr \$0x1f,%rax +[a-f0-9]+: 66 0f ba e8 0f bts \$0xf,%ax +[a-f0-9]+: 48 0f ba e8 1f bts \$0x1f,%rax + +[a-f0-9]+: 31 c9 xor %ecx,%ecx + +[a-f0-9]+: 48 31 d1 xor %rdx,%rcx + +[a-f0-9]+: 31 c9 xor %ecx,%ecx + +[a-f0-9]+: 29 c9 sub %ecx,%ecx + +[a-f0-9]+: 48 29 d1 sub %rdx,%rcx + +[a-f0-9]+: 29 c9 sub %ecx,%ecx + +[a-f0-9]+: d5 50 31 c9 xor %r17d,%r17d + +[a-f0-9]+: d5 58 31 d1 xor %r18,%r17 + +[a-f0-9]+: d5 50 31 c9 xor %r17d,%r17d + +[a-f0-9]+: d5 50 29 c9 sub %r17d,%r17d + +[a-f0-9]+: d5 58 29 d1 sub %r18,%r17 + +[a-f0-9]+: d5 50 29 c9 sub %r17d,%r17d + +[a-f0-9]+: 31 c9 xor %ecx,%ecx + +[a-f0-9]+: 62 f4 75 18 31 d1 xor %dx,%cx,%cx + +[a-f0-9]+: 31 c9 xor %ecx,%ecx + +[a-f0-9]+: 29 c9 sub %ecx,%ecx + +[a-f0-9]+: 62 f4 75 18 29 d1 sub %dx,%cx,%cx + +[a-f0-9]+: 29 c9 sub %ecx,%ecx + +[a-f0-9]+: d5 50 31 c9 xor %r17d,%r17d + +[a-f0-9]+: 62 ec 74 10 30 d1 xor %r18b,%r17b,%r17b + +[a-f0-9]+: d5 50 31 c9 xor %r17d,%r17d + +[a-f0-9]+: d5 50 29 c9 sub %r17d,%r17d + +[a-f0-9]+: 62 ec 74 10 28 d1 sub %r18b,%r17b,%r17b + +[a-f0-9]+: d5 50 29 c9 sub %r17d,%r17d #pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-1.s b/gas/testsuite/gas/i386/x86-64-optimize-1.s index ed48412..82d5ec4 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-1.s +++ b/gas/testsuite/gas/i386/x86-64-optimize-1.s @@ -65,3 +65,27 @@ _start: btr $31, %rax bts $15, %ax bts $31, %rax + xor %rcx, %rcx, %rcx + xor %rdx, %rcx, %rcx + xor %rdx, %rdx, %rcx + sub %rcx, %rcx, %rcx + sub %rdx, %rcx, %rcx + sub %rdx, %rdx, %rcx + xor %r17, %r17, %r17 + xor %r18, %r17, %r17 + xor %r18, %r18, %r17 + sub %r17, %r17, %r17 + sub %r18, %r17, %r17 + sub %r18, %r18, %r17 + xor %cx, %cx, %cx + xor %dx, %cx, %cx + xor %dx, %dx, %cx + sub %cx, %cx, %cx + sub %dx, %cx, %cx + sub %dx, %dx, %cx + xor %r17b, %r17b, %r17b + xor %r18b, %r17b, %r17b + xor %r18b, %r18b, %r17b + sub %r17b, %r17b, %r17b + sub %r18b, %r17b, %r17b + sub %r18b, %r18b, %r17b diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index a709e7a..1c770e2 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -320,7 +320,7 @@ inc, 0x40, No64, No_bSuf|No_sSuf|No_qSuf, { Reg16|Reg32 } inc, 0xfe/0, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVexMap4|NF, {Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64} inc, 0xfe/0, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex } -sub, 0x28, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64, } +sub, 0x28, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64, } sub, 0x28, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex } sub, 0x83/5, APX_F, Modrm|No_bSuf|No_sSuf|DstVVVV|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } sub, 0x83/5, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex } @@ -366,7 +366,7 @@ or, 0xc, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword } or, 0x80/1, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVexMap4|NF, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 } or, 0x80/1, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex } -xor, 0x30, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 } +xor, 0x30, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 } xor, 0x30, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex } xor, 0x83/6, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } xor, 0x83/6, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex } diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h index 2646f7c..f17d94e 100644 --- a/opcodes/i386-tbl.h +++ b/opcodes/i386-tbl.h @@ -896,7 +896,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 1, 0 } } } }, { MN_sub, 0x28, 3, SPACE_EVEXMAP4, None, { 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0 }, { { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -1306,7 +1306,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 1, 0 } } } }, { MN_xor, 0x30, 3, SPACE_EVEXMAP4, None, { 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0 }, { { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |