diff options
-rw-r--r-- | gas/config/tc-i386.c | 17 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/optimize-2.d | 3 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/optimize-2.s | 4 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/optimize-2b.d | 3 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-optimize-3.d | 6 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-optimize-3.s | 8 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-optimize-3b.d | 6 | ||||
-rw-r--r-- | opcodes/i386-opc.tbl | 4 | ||||
-rw-r--r-- | opcodes/i386-tbl.h | 6 |
9 files changed, 52 insertions, 5 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index de35ee2..eb3f128 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -4563,6 +4563,23 @@ optimize_encoding (void) i.types[j].bitfield.disp8 = fits_in_disp8 (i.op[j].disps->X_add_number); } + else if (optimize_for_space + && i.tm.base_opcode == 0x29 + && i.tm.opcode_space == SPACE_0F38 + && i.operands == i.reg_operands + && i.op[0].regs == i.op[1].regs + && (!i.tm.opcode_modifier.vex + || !(i.op[0].regs->reg_flags & RegRex)) + && !is_evex_encoding (&i.tm)) + { + /* Optimize: -Os: + pcmpeqq %xmmN, %xmmN -> pcmpeqd %xmmN, %xmmN + vpcmpeqq %xmmN, %xmmN, %xmmM -> vpcmpeqd %xmmN, %xmmN, %xmmM (N < 8) + vpcmpeqq %ymmN, %ymmN, %ymmM -> vpcmpeqd %ymmN, %ymmN, %ymmM (N < 8) + */ + i.tm.opcode_space = SPACE_0F; + i.tm.base_opcode = 0x76; + } } /* Return non-zero for load instruction. */ diff --git a/gas/testsuite/gas/i386/optimize-2.d b/gas/testsuite/gas/i386/optimize-2.d index 571059e..60a9069 100644 --- a/gas/testsuite/gas/i386/optimize-2.d +++ b/gas/testsuite/gas/i386/optimize-2.d @@ -161,4 +161,7 @@ Disassembly of section .text: +[a-f0-9]+: 62 .* vporq \(%eax\)\{1to2\},%xmm2,%xmm3 +[a-f0-9]+: 62 .* vpxord \(%eax\)\{1to4\},%xmm2,%xmm3 +[a-f0-9]+: 62 .* vpxorq \(%eax\)\{1to4\},%ymm2,%ymm3 + +[a-f0-9]+: 66 .* pcmpeqd %xmm2,%xmm2 + +[a-f0-9]+: c5 .* vpcmpeqd %xmm2,%xmm2,%xmm0 + +[a-f0-9]+: c5 .* vpcmpeqd %ymm2,%ymm2,%ymm0 #pass diff --git a/gas/testsuite/gas/i386/optimize-2.s b/gas/testsuite/gas/i386/optimize-2.s index 43af945..3cf41b1 100644 --- a/gas/testsuite/gas/i386/optimize-2.s +++ b/gas/testsuite/gas/i386/optimize-2.s @@ -180,3 +180,7 @@ _start: vporq (%eax){1to2}, %xmm2, %xmm3 vpxord (%eax){1to4}, %xmm2, %xmm3 vpxorq (%eax){1to4}, %ymm2, %ymm3 + + pcmpeqq %xmm2, %xmm2 + vpcmpeqq %xmm2, %xmm2, %xmm0 + vpcmpeqq %ymm2, %ymm2, %ymm0 diff --git a/gas/testsuite/gas/i386/optimize-2b.d b/gas/testsuite/gas/i386/optimize-2b.d index 5863688..0624cb4 100644 --- a/gas/testsuite/gas/i386/optimize-2b.d +++ b/gas/testsuite/gas/i386/optimize-2b.d @@ -162,4 +162,7 @@ Disassembly of section .text: +[a-f0-9]+: 62 .* vporq \(%eax\)\{1to2\},%xmm2,%xmm3 +[a-f0-9]+: 62 .* vpxord \(%eax\)\{1to4\},%xmm2,%xmm3 +[a-f0-9]+: 62 .* vpxorq \(%eax\)\{1to4\},%ymm2,%ymm3 + +[a-f0-9]+: 66 .* pcmpeqq %xmm2,%xmm2 + +[a-f0-9]+: c4 .* vpcmpeqq %xmm2,%xmm2,%xmm0 + +[a-f0-9]+: c4 .* vpcmpeqq %ymm2,%ymm2,%ymm0 #pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-3.d b/gas/testsuite/gas/i386/x86-64-optimize-3.d index 26122dd..fc5ec5e 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-3.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-3.d @@ -199,4 +199,10 @@ Disassembly of section .text: +[a-f0-9]+: 62 .* vporq \(%rax\)\{1to2\},%xmm2,%xmm3 +[a-f0-9]+: 62 .* vpxord \(%rax\)\{1to4\},%xmm2,%xmm3 +[a-f0-9]+: 62 .* vpxorq \(%rax\)\{1to4\},%ymm2,%ymm3 + +[a-f0-9]+: 66 .* pcmpeqd %xmm2,%xmm2 + +[a-f0-9]+: c5 .* vpcmpeqd %xmm2,%xmm2,%xmm0 + +[a-f0-9]+: c5 .* vpcmpeqd %ymm2,%ymm2,%ymm0 + +[a-f0-9]+: 66 .* pcmpeqd %xmm12,%xmm12 + +[a-f0-9]+: c4 .* vpcmpeqq %xmm12,%xmm12,%xmm0 + +[a-f0-9]+: c4 .* vpcmpeqq %ymm12,%ymm12,%ymm0 #pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-3.s b/gas/testsuite/gas/i386/x86-64-optimize-3.s index cbfed3b..d2bcbce 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-3.s +++ b/gas/testsuite/gas/i386/x86-64-optimize-3.s @@ -221,3 +221,11 @@ _start: vporq (%rax){1to2}, %xmm2, %xmm3 vpxord (%rax){1to4}, %xmm2, %xmm3 vpxorq (%rax){1to4}, %ymm2, %ymm3 + + pcmpeqq %xmm2, %xmm2 + vpcmpeqq %xmm2, %xmm2, %xmm0 + vpcmpeqq %ymm2, %ymm2, %ymm0 + + pcmpeqq %xmm12, %xmm12 + vpcmpeqq %xmm12, %xmm12, %xmm0 + vpcmpeqq %ymm12, %ymm12, %ymm0 diff --git a/gas/testsuite/gas/i386/x86-64-optimize-3b.d b/gas/testsuite/gas/i386/x86-64-optimize-3b.d index 00b65f3..abd48dc 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-3b.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-3b.d @@ -200,4 +200,10 @@ Disassembly of section .text: +[a-f0-9]+: 62 .* vporq \(%rax\)\{1to2\},%xmm2,%xmm3 +[a-f0-9]+: 62 .* vpxord \(%rax\)\{1to4\},%xmm2,%xmm3 +[a-f0-9]+: 62 .* vpxorq \(%rax\)\{1to4\},%ymm2,%ymm3 + +[a-f0-9]+: 66 .* pcmpeqq %xmm2,%xmm2 + +[a-f0-9]+: c4 .* vpcmpeqq %xmm2,%xmm2,%xmm0 + +[a-f0-9]+: c4 .* vpcmpeqq %ymm2,%ymm2,%ymm0 + +[a-f0-9]+: 66 .* pcmpeqq %xmm12,%xmm12 + +[a-f0-9]+: c4 .* vpcmpeqq %xmm12,%xmm12,%xmm0 + +[a-f0-9]+: c4 .* vpcmpeqq %ymm12,%ymm12,%ymm0 #pass diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index 26fee21..607a438 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -1363,7 +1363,7 @@ pblendvb, 0x664c, AVX, Modrm|Vex128|Space0F3A|VexVVVV|VexW0|NoSuf|Implicit1stXmm pblendvb, 0x660f3810, SSE4_1, Modrm|NoSuf, { Acc|Xmmword, RegXMM|Unspecified|BaseIndex, RegXMM } pblendvb, 0x660f3810, SSE4_1, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM } pblendw<sse41>, 0x660f3a0e, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM } -pcmpeqq<sse41>, 0x660f3829, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM } +pcmpeqq<sse41>, 0x660f3829, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf|Optimize, { RegXMM|Unspecified|BaseIndex, RegXMM } pextr<bw><sse41>, 0x660f3a14 | <bw:opc>, <sse41:cpu>, RegMem|<sse41:attr>|NoSuf|IgnoreSize|NoRex64, { Imm8, RegXMM, Reg32|Reg64 } pextr<bw><sse41>, 0x660f3a14 | <bw:opc>, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Imm8, RegXMM, <bw:elem>|Unspecified|BaseIndex } pextrd<sse41>, 0x660f3a16, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf|IgnoreSize, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex } @@ -1592,7 +1592,7 @@ vpblendvb, 0x664c, AVX|AVX2, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|CheckOperandSize vpblendw, 0x660e, AVX|AVX2, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM } vpcmpeq<bw>, 0x6674 | <bw:opc>, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM } vpcmpeqd, 0x6676, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM } -vpcmpeqq, 0x6629, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM } +vpcmpeqq, 0x6629, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM } vpcmpestri, 0x6661, AVX|No64, Modrm|Vex|Space0F3A|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegXMM } vpcmpestri, 0x6661, AVX|x64, Modrm|Vex|Space0F3A|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { Imm8, Xmmword|Unspecified|BaseIndex|RegXMM, RegXMM } vpcmpestrm, 0x6660, AVX|No64, Modrm|Vex|Space0F3A|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegXMM } diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h index cecb084..4a6c92a 100644 --- a/opcodes/i386-tbl.h +++ b/opcodes/i386-tbl.h @@ -17966,7 +17966,7 @@ static const insn_template i386_optab[] = 1, 0, 0, 0, 0, 0 } } } }, { MN_pcmpeqq, 0x29, 2, SPACE_0F38, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -17980,7 +17980,7 @@ static const insn_template i386_optab[] = 1, 0, 0, 0, 0, 0 } } } }, { MN_pcmpeqq, 0x29, 2, SPACE_0F38, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -30132,7 +30132,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0 } } } }, { MN_vpcmpeqq, 0x29, 3, SPACE_0F38, None, { 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + 0, 0, 0, 1, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |