aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@suse.com>2023-07-04 17:05:47 +0200
committerJan Beulich <jbeulich@suse.com>2023-07-04 17:05:47 +0200
commitb7bd93bc51c49c58d0e9cdddf6b4cc44bc4c76dd (patch)
tree632b15140fa5c8a3af68a87cdc35368f8f8fe9b5
parent151da91724c50b9c501b4b89857e675bc2dd8b52 (diff)
downloadgdb-b7bd93bc51c49c58d0e9cdddf6b4cc44bc4c76dd.zip
gdb-b7bd93bc51c49c58d0e9cdddf6b4cc44bc4c76dd.tar.gz
gdb-b7bd93bc51c49c58d0e9cdddf6b4cc44bc4c76dd.tar.bz2
x86: optimize pre-AVX512 {,V}PCMPEQQ with identical sources
The {,V}PCMPEQD alternative is 1 byte shorter in many cases.
-rw-r--r--gas/config/tc-i386.c17
-rw-r--r--gas/testsuite/gas/i386/optimize-2.d3
-rw-r--r--gas/testsuite/gas/i386/optimize-2.s4
-rw-r--r--gas/testsuite/gas/i386/optimize-2b.d3
-rw-r--r--gas/testsuite/gas/i386/x86-64-optimize-3.d6
-rw-r--r--gas/testsuite/gas/i386/x86-64-optimize-3.s8
-rw-r--r--gas/testsuite/gas/i386/x86-64-optimize-3b.d6
-rw-r--r--opcodes/i386-opc.tbl4
-rw-r--r--opcodes/i386-tbl.h6
9 files changed, 52 insertions, 5 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index de35ee2..eb3f128 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -4563,6 +4563,23 @@ optimize_encoding (void)
i.types[j].bitfield.disp8
= fits_in_disp8 (i.op[j].disps->X_add_number);
}
+ else if (optimize_for_space
+ && i.tm.base_opcode == 0x29
+ && i.tm.opcode_space == SPACE_0F38
+ && i.operands == i.reg_operands
+ && i.op[0].regs == i.op[1].regs
+ && (!i.tm.opcode_modifier.vex
+ || !(i.op[0].regs->reg_flags & RegRex))
+ && !is_evex_encoding (&i.tm))
+ {
+ /* Optimize: -Os:
+ pcmpeqq %xmmN, %xmmN -> pcmpeqd %xmmN, %xmmN
+ vpcmpeqq %xmmN, %xmmN, %xmmM -> vpcmpeqd %xmmN, %xmmN, %xmmM (N < 8)
+ vpcmpeqq %ymmN, %ymmN, %ymmM -> vpcmpeqd %ymmN, %ymmN, %ymmM (N < 8)
+ */
+ i.tm.opcode_space = SPACE_0F;
+ i.tm.base_opcode = 0x76;
+ }
}
/* Return non-zero for load instruction. */
diff --git a/gas/testsuite/gas/i386/optimize-2.d b/gas/testsuite/gas/i386/optimize-2.d
index 571059e..60a9069 100644
--- a/gas/testsuite/gas/i386/optimize-2.d
+++ b/gas/testsuite/gas/i386/optimize-2.d
@@ -161,4 +161,7 @@ Disassembly of section .text:
+[a-f0-9]+: 62 .* vporq \(%eax\)\{1to2\},%xmm2,%xmm3
+[a-f0-9]+: 62 .* vpxord \(%eax\)\{1to4\},%xmm2,%xmm3
+[a-f0-9]+: 62 .* vpxorq \(%eax\)\{1to4\},%ymm2,%ymm3
+ +[a-f0-9]+: 66 .* pcmpeqd %xmm2,%xmm2
+ +[a-f0-9]+: c5 .* vpcmpeqd %xmm2,%xmm2,%xmm0
+ +[a-f0-9]+: c5 .* vpcmpeqd %ymm2,%ymm2,%ymm0
#pass
diff --git a/gas/testsuite/gas/i386/optimize-2.s b/gas/testsuite/gas/i386/optimize-2.s
index 43af945..3cf41b1 100644
--- a/gas/testsuite/gas/i386/optimize-2.s
+++ b/gas/testsuite/gas/i386/optimize-2.s
@@ -180,3 +180,7 @@ _start:
vporq (%eax){1to2}, %xmm2, %xmm3
vpxord (%eax){1to4}, %xmm2, %xmm3
vpxorq (%eax){1to4}, %ymm2, %ymm3
+
+ pcmpeqq %xmm2, %xmm2
+ vpcmpeqq %xmm2, %xmm2, %xmm0
+ vpcmpeqq %ymm2, %ymm2, %ymm0
diff --git a/gas/testsuite/gas/i386/optimize-2b.d b/gas/testsuite/gas/i386/optimize-2b.d
index 5863688..0624cb4 100644
--- a/gas/testsuite/gas/i386/optimize-2b.d
+++ b/gas/testsuite/gas/i386/optimize-2b.d
@@ -162,4 +162,7 @@ Disassembly of section .text:
+[a-f0-9]+: 62 .* vporq \(%eax\)\{1to2\},%xmm2,%xmm3
+[a-f0-9]+: 62 .* vpxord \(%eax\)\{1to4\},%xmm2,%xmm3
+[a-f0-9]+: 62 .* vpxorq \(%eax\)\{1to4\},%ymm2,%ymm3
+ +[a-f0-9]+: 66 .* pcmpeqq %xmm2,%xmm2
+ +[a-f0-9]+: c4 .* vpcmpeqq %xmm2,%xmm2,%xmm0
+ +[a-f0-9]+: c4 .* vpcmpeqq %ymm2,%ymm2,%ymm0
#pass
diff --git a/gas/testsuite/gas/i386/x86-64-optimize-3.d b/gas/testsuite/gas/i386/x86-64-optimize-3.d
index 26122dd..fc5ec5e 100644
--- a/gas/testsuite/gas/i386/x86-64-optimize-3.d
+++ b/gas/testsuite/gas/i386/x86-64-optimize-3.d
@@ -199,4 +199,10 @@ Disassembly of section .text:
+[a-f0-9]+: 62 .* vporq \(%rax\)\{1to2\},%xmm2,%xmm3
+[a-f0-9]+: 62 .* vpxord \(%rax\)\{1to4\},%xmm2,%xmm3
+[a-f0-9]+: 62 .* vpxorq \(%rax\)\{1to4\},%ymm2,%ymm3
+ +[a-f0-9]+: 66 .* pcmpeqd %xmm2,%xmm2
+ +[a-f0-9]+: c5 .* vpcmpeqd %xmm2,%xmm2,%xmm0
+ +[a-f0-9]+: c5 .* vpcmpeqd %ymm2,%ymm2,%ymm0
+ +[a-f0-9]+: 66 .* pcmpeqd %xmm12,%xmm12
+ +[a-f0-9]+: c4 .* vpcmpeqq %xmm12,%xmm12,%xmm0
+ +[a-f0-9]+: c4 .* vpcmpeqq %ymm12,%ymm12,%ymm0
#pass
diff --git a/gas/testsuite/gas/i386/x86-64-optimize-3.s b/gas/testsuite/gas/i386/x86-64-optimize-3.s
index cbfed3b..d2bcbce 100644
--- a/gas/testsuite/gas/i386/x86-64-optimize-3.s
+++ b/gas/testsuite/gas/i386/x86-64-optimize-3.s
@@ -221,3 +221,11 @@ _start:
vporq (%rax){1to2}, %xmm2, %xmm3
vpxord (%rax){1to4}, %xmm2, %xmm3
vpxorq (%rax){1to4}, %ymm2, %ymm3
+
+ pcmpeqq %xmm2, %xmm2
+ vpcmpeqq %xmm2, %xmm2, %xmm0
+ vpcmpeqq %ymm2, %ymm2, %ymm0
+
+ pcmpeqq %xmm12, %xmm12
+ vpcmpeqq %xmm12, %xmm12, %xmm0
+ vpcmpeqq %ymm12, %ymm12, %ymm0
diff --git a/gas/testsuite/gas/i386/x86-64-optimize-3b.d b/gas/testsuite/gas/i386/x86-64-optimize-3b.d
index 00b65f3..abd48dc 100644
--- a/gas/testsuite/gas/i386/x86-64-optimize-3b.d
+++ b/gas/testsuite/gas/i386/x86-64-optimize-3b.d
@@ -200,4 +200,10 @@ Disassembly of section .text:
+[a-f0-9]+: 62 .* vporq \(%rax\)\{1to2\},%xmm2,%xmm3
+[a-f0-9]+: 62 .* vpxord \(%rax\)\{1to4\},%xmm2,%xmm3
+[a-f0-9]+: 62 .* vpxorq \(%rax\)\{1to4\},%ymm2,%ymm3
+ +[a-f0-9]+: 66 .* pcmpeqq %xmm2,%xmm2
+ +[a-f0-9]+: c4 .* vpcmpeqq %xmm2,%xmm2,%xmm0
+ +[a-f0-9]+: c4 .* vpcmpeqq %ymm2,%ymm2,%ymm0
+ +[a-f0-9]+: 66 .* pcmpeqq %xmm12,%xmm12
+ +[a-f0-9]+: c4 .* vpcmpeqq %xmm12,%xmm12,%xmm0
+ +[a-f0-9]+: c4 .* vpcmpeqq %ymm12,%ymm12,%ymm0
#pass
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 26fee21..607a438 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -1363,7 +1363,7 @@ pblendvb, 0x664c, AVX, Modrm|Vex128|Space0F3A|VexVVVV|VexW0|NoSuf|Implicit1stXmm
pblendvb, 0x660f3810, SSE4_1, Modrm|NoSuf, { Acc|Xmmword, RegXMM|Unspecified|BaseIndex, RegXMM }
pblendvb, 0x660f3810, SSE4_1, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pblendw<sse41>, 0x660f3a0e, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-pcmpeqq<sse41>, 0x660f3829, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pcmpeqq<sse41>, 0x660f3829, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf|Optimize, { RegXMM|Unspecified|BaseIndex, RegXMM }
pextr<bw><sse41>, 0x660f3a14 | <bw:opc>, <sse41:cpu>, RegMem|<sse41:attr>|NoSuf|IgnoreSize|NoRex64, { Imm8, RegXMM, Reg32|Reg64 }
pextr<bw><sse41>, 0x660f3a14 | <bw:opc>, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Imm8, RegXMM, <bw:elem>|Unspecified|BaseIndex }
pextrd<sse41>, 0x660f3a16, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf|IgnoreSize, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
@@ -1592,7 +1592,7 @@ vpblendvb, 0x664c, AVX|AVX2, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|CheckOperandSize
vpblendw, 0x660e, AVX|AVX2, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
vpcmpeq<bw>, 0x6674 | <bw:opc>, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
vpcmpeqd, 0x6676, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpcmpeqq, 0x6629, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpcmpeqq, 0x6629, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
vpcmpestri, 0x6661, AVX|No64, Modrm|Vex|Space0F3A|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegXMM }
vpcmpestri, 0x6661, AVX|x64, Modrm|Vex|Space0F3A|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { Imm8, Xmmword|Unspecified|BaseIndex|RegXMM, RegXMM }
vpcmpestrm, 0x6660, AVX|No64, Modrm|Vex|Space0F3A|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegXMM }
diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h
index cecb084..4a6c92a 100644
--- a/opcodes/i386-tbl.h
+++ b/opcodes/i386-tbl.h
@@ -17966,7 +17966,7 @@ static const insn_template i386_optab[] =
1, 0, 0, 0, 0, 0 } } } },
{ MN_pcmpeqq, 0x29, 2, SPACE_0F38, None,
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
- 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -17980,7 +17980,7 @@ static const insn_template i386_optab[] =
1, 0, 0, 0, 0, 0 } } } },
{ MN_pcmpeqq, 0x29, 2, SPACE_0F38, None,
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -30132,7 +30132,7 @@ static const insn_template i386_optab[] =
0, 0, 0, 0, 0, 0 } } } },
{ MN_vpcmpeqq, 0x29, 3, SPACE_0F38, None,
{ 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
- 0, 0, 0, 1, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ 0, 0, 0, 1, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,