aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@suse.com>2024-03-01 09:21:40 +0100
committerJan Beulich <jbeulich@suse.com>2024-03-01 09:21:40 +0100
commitc73a37b26800fc542512558f8403f980bb044435 (patch)
treeb8ea425bfa94db8da466908948d7e7e55f97b0bc
parent6804f42c67d97401c7ef8438727eba788fc29329 (diff)
downloadgdb-c73a37b26800fc542512558f8403f980bb044435.zip
gdb-c73a37b26800fc542512558f8403f980bb044435.tar.gz
gdb-c73a37b26800fc542512558f8403f980bb044435.tar.bz2
x86/APX: optimize certain XOR and SUB forms
While most logic in optimize_encoding() is already covering APX by way of the earlier NDD->REX2 conversion, there's a remaining set of cases which wants handling separately.
-rw-r--r--gas/config/tc-i386.c28
-rw-r--r--gas/testsuite/gas/i386/x86-64-optimize-1.d24
-rw-r--r--gas/testsuite/gas/i386/x86-64-optimize-1.s24
-rw-r--r--opcodes/i386-opc.tbl4
-rw-r--r--opcodes/i386-tbl.h4
5 files changed, 80 insertions, 4 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index d169274..5e7ebf7 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -4694,6 +4694,34 @@ optimize_encoding (void)
}
}
}
+ else if (i.reg_operands == 3
+ && i.op[0].regs == i.op[1].regs
+ && i.encoding != encoding_evex
+ && (i.tm.mnem_off == MN_xor
+ || i.tm.mnem_off == MN_sub))
+ {
+ /* Optimize: -O:
+ xorb %rNb, %rNb, %rMb -> xorl %rMd, %rMd
+ xorw %rNw, %rNw, %rMw -> xorl %rMd, %rMd
+ xorl %rNd, %rNd, %rMd -> xorl %rMd, %rMd
+ xorq %rN, %rN, %rM -> xorl %rMd, %rMd
+ subb %rNb, %rNb, %rMb -> subl %rMd, %rMd
+ subw %rNw, %rNw, %rMw -> subl %rMd, %rMd
+ subl %rNd, %rNd, %rMd -> subl %rMd, %rMd
+ subq %rN, %rN, %rM -> subl %rMd, %rMd
+ */
+ i.tm.opcode_space = SPACE_BASE;
+ i.tm.opcode_modifier.evex = 0;
+ i.tm.opcode_modifier.size = SIZE32;
+ i.types[0].bitfield.byte = 0;
+ i.types[0].bitfield.word = 0;
+ i.types[0].bitfield.dword = 1;
+ i.types[0].bitfield.qword = 0;
+ i.op[0].regs = i.op[2].regs;
+ i.types[1] = i.types[0];
+ i.op[1].regs = i.op[2].regs;
+ i.reg_operands = 2;
+ }
else if (optimize > 1
&& !optimize_for_space
&& i.reg_operands == 2
diff --git a/gas/testsuite/gas/i386/x86-64-optimize-1.d b/gas/testsuite/gas/i386/x86-64-optimize-1.d
index 8bcc90c..8fff3b7 100644
--- a/gas/testsuite/gas/i386/x86-64-optimize-1.d
+++ b/gas/testsuite/gas/i386/x86-64-optimize-1.d
@@ -71,4 +71,28 @@ Disassembly of section .text:
+[a-f0-9]+: 48 0f ba f0 1f btr \$0x1f,%rax
+[a-f0-9]+: 66 0f ba e8 0f bts \$0xf,%ax
+[a-f0-9]+: 48 0f ba e8 1f bts \$0x1f,%rax
+ +[a-f0-9]+: 31 c9 xor %ecx,%ecx
+ +[a-f0-9]+: 48 31 d1 xor %rdx,%rcx
+ +[a-f0-9]+: 31 c9 xor %ecx,%ecx
+ +[a-f0-9]+: 29 c9 sub %ecx,%ecx
+ +[a-f0-9]+: 48 29 d1 sub %rdx,%rcx
+ +[a-f0-9]+: 29 c9 sub %ecx,%ecx
+ +[a-f0-9]+: d5 50 31 c9 xor %r17d,%r17d
+ +[a-f0-9]+: d5 58 31 d1 xor %r18,%r17
+ +[a-f0-9]+: d5 50 31 c9 xor %r17d,%r17d
+ +[a-f0-9]+: d5 50 29 c9 sub %r17d,%r17d
+ +[a-f0-9]+: d5 58 29 d1 sub %r18,%r17
+ +[a-f0-9]+: d5 50 29 c9 sub %r17d,%r17d
+ +[a-f0-9]+: 31 c9 xor %ecx,%ecx
+ +[a-f0-9]+: 62 f4 75 18 31 d1 xor %dx,%cx,%cx
+ +[a-f0-9]+: 31 c9 xor %ecx,%ecx
+ +[a-f0-9]+: 29 c9 sub %ecx,%ecx
+ +[a-f0-9]+: 62 f4 75 18 29 d1 sub %dx,%cx,%cx
+ +[a-f0-9]+: 29 c9 sub %ecx,%ecx
+ +[a-f0-9]+: d5 50 31 c9 xor %r17d,%r17d
+ +[a-f0-9]+: 62 ec 74 10 30 d1 xor %r18b,%r17b,%r17b
+ +[a-f0-9]+: d5 50 31 c9 xor %r17d,%r17d
+ +[a-f0-9]+: d5 50 29 c9 sub %r17d,%r17d
+ +[a-f0-9]+: 62 ec 74 10 28 d1 sub %r18b,%r17b,%r17b
+ +[a-f0-9]+: d5 50 29 c9 sub %r17d,%r17d
#pass
diff --git a/gas/testsuite/gas/i386/x86-64-optimize-1.s b/gas/testsuite/gas/i386/x86-64-optimize-1.s
index ed48412..82d5ec4 100644
--- a/gas/testsuite/gas/i386/x86-64-optimize-1.s
+++ b/gas/testsuite/gas/i386/x86-64-optimize-1.s
@@ -65,3 +65,27 @@ _start:
btr $31, %rax
bts $15, %ax
bts $31, %rax
+ xor %rcx, %rcx, %rcx
+ xor %rdx, %rcx, %rcx
+ xor %rdx, %rdx, %rcx
+ sub %rcx, %rcx, %rcx
+ sub %rdx, %rcx, %rcx
+ sub %rdx, %rdx, %rcx
+ xor %r17, %r17, %r17
+ xor %r18, %r17, %r17
+ xor %r18, %r18, %r17
+ sub %r17, %r17, %r17
+ sub %r18, %r17, %r17
+ sub %r18, %r18, %r17
+ xor %cx, %cx, %cx
+ xor %dx, %cx, %cx
+ xor %dx, %dx, %cx
+ sub %cx, %cx, %cx
+ sub %dx, %cx, %cx
+ sub %dx, %dx, %cx
+ xor %r17b, %r17b, %r17b
+ xor %r18b, %r17b, %r17b
+ xor %r18b, %r18b, %r17b
+ sub %r17b, %r17b, %r17b
+ sub %r18b, %r17b, %r17b
+ sub %r18b, %r18b, %r17b
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index a709e7a..1c770e2 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -320,7 +320,7 @@ inc, 0x40, No64, No_bSuf|No_sSuf|No_qSuf, { Reg16|Reg32 }
inc, 0xfe/0, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVexMap4|NF, {Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64}
inc, 0xfe/0, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
-sub, 0x28, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64, }
+sub, 0x28, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64, }
sub, 0x28, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
sub, 0x83/5, APX_F, Modrm|No_bSuf|No_sSuf|DstVVVV|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
sub, 0x83/5, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex }
@@ -366,7 +366,7 @@ or, 0xc, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
or, 0x80/1, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVexMap4|NF, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
or, 0x80/1, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
-xor, 0x30, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
+xor, 0x30, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
xor, 0x30, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
xor, 0x83/6, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
xor, 0x83/6, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex }
diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h
index 2646f7c..f17d94e 100644
--- a/opcodes/i386-tbl.h
+++ b/opcodes/i386-tbl.h
@@ -896,7 +896,7 @@ static const insn_template i386_optab[] =
0, 0, 0, 0, 1, 0 } } } },
{ MN_sub, 0x28, 3, SPACE_EVEXMAP4, None,
{ 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+ 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
0 },
{ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
@@ -1306,7 +1306,7 @@ static const insn_template i386_optab[] =
0, 0, 0, 0, 1, 0 } } } },
{ MN_xor, 0x30, 3, SPACE_EVEXMAP4, None,
{ 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
+ 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1,
0 },
{ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 } },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },