aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@suse.com>2021-03-29 12:06:09 +0200
committerJan Beulich <jbeulich@suse.com>2021-03-29 12:06:09 +0200
commitc8cad9d389b76d19449d9190122472fa465476e5 (patch)
treea681f1e1c8e2db9d78bcd94ee540479e12067423
parent5cdaf100252746303a09e904b37aafea4153d12c (diff)
downloadgdb-c8cad9d389b76d19449d9190122472fa465476e5.zip
gdb-c8cad9d389b76d19449d9190122472fa465476e5.tar.gz
gdb-c8cad9d389b76d19449d9190122472fa465476e5.tar.bz2
x86: VPSADBW's source operands are also commutative
In commit 79dec6b7baa2 ("x86-64: optimize certain commutative VEX-encoded insns") I missed the fact that there being subtraction involved here doesn't matter, as absolute differences get summed up.
-rw-r--r--gas/ChangeLog6
-rw-r--r--gas/testsuite/gas/i386/x86-64-avx-swap-2.d4
-rw-r--r--gas/testsuite/gas/i386/x86-64-sse2avx.d1
-rw-r--r--gas/testsuite/gas/i386/x86-64-sse2avx.s1
-rw-r--r--opcodes/ChangeLog6
-rw-r--r--opcodes/i386-opc.tbl6
-rw-r--r--opcodes/i386-tbl.h6
7 files changed, 22 insertions, 8 deletions
diff --git a/gas/ChangeLog b/gas/ChangeLog
index 01ab2e8..f733134 100644
--- a/gas/ChangeLog
+++ b/gas/ChangeLog
@@ -1,5 +1,11 @@
2021-03-29 Jan Beulich <jbeulich@suse.com>
+ * testsuite/gas/i386/x86-64-sse2avx.s: Add vpsadbw case.
+ * testsuite/gas/i386/x86-64-avx-swap-2.d.
+ testsuite/gas/i386/x86-64-sse2avx.d: Adjust expectations.
+
+2021-03-29 Jan Beulich <jbeulich@suse.com>
+
* config/tc-i386.c (optimize_encoding): Replace VEX-encoding
checks by opcodespace ones.
(insert_lfence_before): Likewise.
diff --git a/gas/testsuite/gas/i386/x86-64-avx-swap-2.d b/gas/testsuite/gas/i386/x86-64-avx-swap-2.d
index 513e80d..c34c658 100644
--- a/gas/testsuite/gas/i386/x86-64-avx-swap-2.d
+++ b/gas/testsuite/gas/i386/x86-64-avx-swap-2.d
@@ -69,7 +69,7 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: c5 8d f4 d6 vpmuludq %ymm6,%ymm14,%ymm2
[ ]*[a-f0-9]+: c4 c2 4d 28 d6 vpmuldq %ymm14,%ymm6,%ymm2
[ ]*[a-f0-9]+: c5 8d eb d6 vpor %ymm6,%ymm14,%ymm2
-[ ]*[a-f0-9]+: c4 c1 4d f6 d6 vpsadbw %ymm14,%ymm6,%ymm2
+[ ]*[a-f0-9]+: c5 8d f6 d6 vpsadbw %ymm6,%ymm14,%ymm2
[ ]*[a-f0-9]+: c4 c1 4d f8 d6 vpsubb %ymm14,%ymm6,%ymm2
[ ]*[a-f0-9]+: c4 c1 4d f9 d6 vpsubw %ymm14,%ymm6,%ymm2
[ ]*[a-f0-9]+: c4 c1 4d fa d6 vpsubd %ymm14,%ymm6,%ymm2
@@ -211,7 +211,7 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: c5 89 f4 d6 vpmuludq %xmm6,%xmm14,%xmm2
[ ]*[a-f0-9]+: c4 c2 49 28 d6 vpmuldq %xmm14,%xmm6,%xmm2
[ ]*[a-f0-9]+: c5 89 eb d6 vpor %xmm6,%xmm14,%xmm2
-[ ]*[a-f0-9]+: c4 c1 49 f6 d6 vpsadbw %xmm14,%xmm6,%xmm2
+[ ]*[a-f0-9]+: c5 89 f6 d6 vpsadbw %xmm6,%xmm14,%xmm2
[ ]*[a-f0-9]+: c4 c1 49 f8 d6 vpsubb %xmm14,%xmm6,%xmm2
[ ]*[a-f0-9]+: c4 c1 49 f9 d6 vpsubw %xmm14,%xmm6,%xmm2
[ ]*[a-f0-9]+: c4 c1 49 fa d6 vpsubd %xmm14,%xmm6,%xmm2
diff --git a/gas/testsuite/gas/i386/x86-64-sse2avx.d b/gas/testsuite/gas/i386/x86-64-sse2avx.d
index c904a2b..d0e50c5 100644
--- a/gas/testsuite/gas/i386/x86-64-sse2avx.d
+++ b/gas/testsuite/gas/i386/x86-64-sse2avx.d
@@ -273,6 +273,7 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: c5 89 eb f6 vpor %xmm6,%xmm14,%xmm6
[ ]*[a-f0-9]+: c5 c9 eb 31 vpor \(%rcx\),%xmm6,%xmm6
[ ]*[a-f0-9]+: c5 c9 f6 f4 vpsadbw %xmm4,%xmm6,%xmm6
+[ ]*[a-f0-9]+: c5 89 f6 f6 vpsadbw %xmm6,%xmm14,%xmm6
[ ]*[a-f0-9]+: c5 c9 f6 31 vpsadbw \(%rcx\),%xmm6,%xmm6
[ ]*[a-f0-9]+: c4 e2 49 00 f4 vpshufb %xmm4,%xmm6,%xmm6
[ ]*[a-f0-9]+: c4 e2 49 00 31 vpshufb \(%rcx\),%xmm6,%xmm6
diff --git a/gas/testsuite/gas/i386/x86-64-sse2avx.s b/gas/testsuite/gas/i386/x86-64-sse2avx.s
index 6b25314..8a45b1e 100644
--- a/gas/testsuite/gas/i386/x86-64-sse2avx.s
+++ b/gas/testsuite/gas/i386/x86-64-sse2avx.s
@@ -280,6 +280,7 @@ _start:
por %xmm14,%xmm6
por (%rcx),%xmm6
psadbw %xmm4,%xmm6
+ psadbw %xmm14,%xmm6
psadbw (%rcx),%xmm6
pshufb %xmm4,%xmm6
pshufb (%rcx),%xmm6
diff --git a/opcodes/ChangeLog b/opcodes/ChangeLog
index bc62fac..1928cf7 100644
--- a/opcodes/ChangeLog
+++ b/opcodes/ChangeLog
@@ -1,5 +1,11 @@
2021-03-29 Jan Beulich <jbeulich@suse.com>
+ * i386-opc.tbl (psadbw): Add <sse2:comm>.
+ (vpsadbw): Add C.
+ * i386-tbl.h: Re-generate.
+
+2021-03-29 Jan Beulich <jbeulich@suse.com>
+
* i386-opc.tbl (mmx, sse, sse2, sse3, ssse3, sse41, sse42, aes,
pclmul, gfni): New templates. Use them wherever possible. Move
SSE4.1 pextrw into respective section.
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index fe9f141..51f206c 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -1131,7 +1131,7 @@ prefetcht0, 0xf18, 1, CpuSSE|Cpu3dnowA, Modrm|Anysize|IgnoreSize|No_bSuf|No_wSuf
prefetcht1, 0xf18, 2, CpuSSE|Cpu3dnowA, Modrm|Anysize|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoAVX, { BaseIndex }
prefetcht2, 0xf18, 3, CpuSSE|Cpu3dnowA, Modrm|Anysize|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoAVX, { BaseIndex }
psadbw, 0xff6, None, CpuSSE|Cpu3dnowA, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64|NoAVX, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
-psadbw<sse2>, 0x660ff6, None, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+psadbw<sse2>, 0x660ff6, None, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|<sse2:comm>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pshufw, 0xf70, None, CpuSSE|Cpu3dnowA, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64|NoAVX, { Imm8, Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
rcpps<sse>, 0x0f53, None, <sse:cpu>, Modrm|<sse:attr>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
rcpss<sse>, 0xf30f53, None, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
@@ -1703,7 +1703,7 @@ vpmulld, 0x6640, None, CpuAVX, Modrm|Vex|Space0F38|VexVVVV=1|VexWIG|No_bSuf|No_w
vpmullw, 0x66d5, None, CpuAVX, Modrm|C|Vex|Space0F|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
vpmuludq, 0x66f4, None, CpuAVX, Modrm|C|Vex|Space0F|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
vpor, 0x66eb, None, CpuAVX, Modrm|C|Vex|Space0F|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
-vpsadbw, 0x66f6, None, CpuAVX, Modrm|Vex|Space0F|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vpsadbw, 0x66f6, None, CpuAVX, Modrm|Vex|Space0F|VexVVVV=1|VexWIG|C|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
vpshufb, 0x6600, None, CpuAVX, Modrm|Vex|Space0F38|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
vpshufd, 0x6670, None, CpuAVX, Modrm|Vex|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegXMM }
vpshufhw, 0xf370, None, CpuAVX, Modrm|Vex|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegXMM }
@@ -1854,7 +1854,7 @@ vpmulld, 0x6640, None, CpuAVX2, Modrm|Vex=2|Space0F38|VexVVVV=1|VexWIG|No_bSuf|N
vpmullw, 0x66d5, None, CpuAVX2, Modrm|C|Vex=2|Space0F|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
vpmuludq, 0x66f4, None, CpuAVX2, Modrm|C|Vex=2|Space0F|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
vpor, 0x66eb, None, CpuAVX2, Modrm|C|Vex=2|Space0F|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
-vpsadbw, 0x66f6, None, CpuAVX2, Modrm|Vex=2|Space0F|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
+vpsadbw, 0x66f6, None, CpuAVX2, Modrm|Vex=2|Space0F|VexVVVV=1|VexWIG|C|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
vpshufb, 0x6600, None, CpuAVX2, Modrm|Vex=2|Space0F38|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
vpshufd, 0x6670, None, CpuAVX2, Modrm|Vex=2|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Unspecified|BaseIndex|RegYMM, RegYMM }
vpshufhw, 0xf370, None, CpuAVX2, Modrm|Vex=2|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Unspecified|BaseIndex|RegYMM, RegYMM }
diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h
index dfc4c66..0f5ea46 100644
--- a/opcodes/i386-tbl.h
+++ b/opcodes/i386-tbl.h
@@ -14578,7 +14578,7 @@ const insn_template i386_optab[] =
{ "psadbw", 0xf6, None, 2,
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -40433,7 +40433,7 @@ const insn_template i386_optab[] =
{ "vpsadbw", 0xf6, None, 3,
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 1, 1, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -40450,7 +40450,7 @@ const insn_template i386_optab[] =
{ "vpsadbw", 0xf6, None, 3,
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 3, 1, 1, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,