aboutsummaryrefslogtreecommitdiff
path: root/gas
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2018-03-01 06:08:04 -0800
committerH.J. Lu <hjl.tools@gmail.com>2018-03-01 06:08:31 -0800
commit8305403a1ffa8e551fd1c7bd88af1a65c0ba747c (patch)
treef7c1db859e0d9101de6a7a64a090bbf7fbda2fb4 /gas
parentb1223e789040e9e8cdc6869a8a1fd1fd7acc109d (diff)
downloadgdb-8305403a1ffa8e551fd1c7bd88af1a65c0ba747c.zip
gdb-8305403a1ffa8e551fd1c7bd88af1a65c0ba747c.tar.gz
gdb-8305403a1ffa8e551fd1c7bd88af1a65c0ba747c.tar.bz2
x86: Encode AVX256/AVX512 vpsub[bwdq] with VEX128/EVEX128
When 2 source registers are identical, AVX256 and AVX512 vpsub[bwdq] instructions can be encoded with VEX128 or EVEX128 encodings. gas/ * config/tc-i386.c (optimize_encoding): Optimize AVX256 and AVX512 vpsub[bwdq] instructions. * testsuite/gas/i386/optimize-1.s: Add tests for AVX256 and AVX512 vpsub[bwdq] instructions. * testsuite/gas/i386/x86-64-optimize-2.s: Likewise. * testsuite/gas/i386/optimize-1.d: Updated. * testsuite/gas/i386/x86-64-optimize-2.d: Likewise. opcodes/ * * i386-opc.tbl: Add "Optimize" to AVX256 and AVX512 vpsub[bwdq] instructions. * i386-tbl.h: Regenerated.
Diffstat (limited to 'gas')
-rw-r--r--gas/ChangeLog10
-rw-r--r--gas/config/tc-i386.c9
-rw-r--r--gas/testsuite/gas/i386/optimize-1.d16
-rw-r--r--gas/testsuite/gas/i386/optimize-1.s20
-rw-r--r--gas/testsuite/gas/i386/x86-64-optimize-2.d32
-rw-r--r--gas/testsuite/gas/i386/x86-64-optimize-2.s36
6 files changed, 121 insertions, 2 deletions
diff --git a/gas/ChangeLog b/gas/ChangeLog
index 913d9bc..da34e3b 100644
--- a/gas/ChangeLog
+++ b/gas/ChangeLog
@@ -1,3 +1,13 @@
+2018-03-01 H.J. Lu <hongjiu.lu@intel.com>
+
+ * config/tc-i386.c (optimize_encoding): Optimize AVX256 and
+ AVX512 vpsub[bwdq] instructions.
+ * testsuite/gas/i386/optimize-1.s: Add tests for AVX256 and
+ AVX512 vpsub[bwdq] instructions.
+ * testsuite/gas/i386/x86-64-optimize-2.s: Likewise.
+ * testsuite/gas/i386/optimize-1.d: Updated.
+ * testsuite/gas/i386/x86-64-optimize-2.d: Likewise.
+
2018-03-01 Alan Modra <amodra@gmail.com>
* configure.ac (ALL_LINGUAS): Add uk. Sort.
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index f9dccdb..4174d19 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -3856,11 +3856,16 @@ optimize_encoding (void)
|| i.tm.base_opcode == 0x66df
|| i.tm.base_opcode == 0x57
|| i.tm.base_opcode == 0x6657
- || i.tm.base_opcode == 0x66ef)
+ || i.tm.base_opcode == 0x66ef
+ || i.tm.base_opcode == 0x66f8
+ || i.tm.base_opcode == 0x66f9
+ || i.tm.base_opcode == 0x66fa
+ || i.tm.base_opcode == 0x66fb)
&& i.tm.extension_opcode == None))
{
/* Optimize: -O2:
- VOP, one of vandnps, vandnpd, vxorps and vxorpd:
+ VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
+ vpsubq and vpsubw:
EVEX VOP %zmmM, %zmmM, %zmmN
-> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
-> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
diff --git a/gas/testsuite/gas/i386/optimize-1.d b/gas/testsuite/gas/i386/optimize-1.d
index 80b1e83..3ea6e75 100644
--- a/gas/testsuite/gas/i386/optimize-1.d
+++ b/gas/testsuite/gas/i386/optimize-1.d
@@ -42,4 +42,20 @@ Disassembly of section .text:
+[a-f0-9]+: 62 f1 f5 af ef e9 vpxorq %ymm1,%ymm1,%ymm5\{%k7\}\{z\}
+[a-f0-9]+: c5 f1 ef e9 vpxor %xmm1,%xmm1,%xmm5
+[a-f0-9]+: c5 f1 ef e9 vpxor %xmm1,%xmm1,%xmm5
+ +[a-f0-9]+: 62 f1 75 4f f8 e9 vpsubb %zmm1,%zmm1,%zmm5\{%k7\}
+ +[a-f0-9]+: 62 f1 75 af f8 e9 vpsubb %ymm1,%ymm1,%ymm5\{%k7\}\{z\}
+ +[a-f0-9]+: c5 f1 f8 e9 vpsubb %xmm1,%xmm1,%xmm5
+ +[a-f0-9]+: c5 f1 f8 e9 vpsubb %xmm1,%xmm1,%xmm5
+ +[a-f0-9]+: 62 f1 75 4f f9 e9 vpsubw %zmm1,%zmm1,%zmm5\{%k7\}
+ +[a-f0-9]+: 62 f1 75 af f9 e9 vpsubw %ymm1,%ymm1,%ymm5\{%k7\}\{z\}
+ +[a-f0-9]+: c5 f1 f9 e9 vpsubw %xmm1,%xmm1,%xmm5
+ +[a-f0-9]+: c5 f1 f9 e9 vpsubw %xmm1,%xmm1,%xmm5
+ +[a-f0-9]+: 62 f1 75 4f fa e9 vpsubd %zmm1,%zmm1,%zmm5\{%k7\}
+ +[a-f0-9]+: 62 f1 75 af fa e9 vpsubd %ymm1,%ymm1,%ymm5\{%k7\}\{z\}
+ +[a-f0-9]+: c5 f1 fa e9 vpsubd %xmm1,%xmm1,%xmm5
+ +[a-f0-9]+: c5 f1 fa e9 vpsubd %xmm1,%xmm1,%xmm5
+ +[a-f0-9]+: 62 f1 f5 4f fb e9 vpsubq %zmm1,%zmm1,%zmm5\{%k7\}
+ +[a-f0-9]+: 62 f1 f5 af fb e9 vpsubq %ymm1,%ymm1,%ymm5\{%k7\}\{z\}
+ +[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5
+ +[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5
#pass
diff --git a/gas/testsuite/gas/i386/optimize-1.s b/gas/testsuite/gas/i386/optimize-1.s
index 042a004..f875f92 100644
--- a/gas/testsuite/gas/i386/optimize-1.s
+++ b/gas/testsuite/gas/i386/optimize-1.s
@@ -46,3 +46,23 @@ _start:
vpxorq %ymm1, %ymm1, %ymm5{z}{%k7}
vpxorq %zmm1, %zmm1, %zmm5
vpxorq %ymm1, %ymm1, %ymm5
+
+ vpsubb %zmm1, %zmm1, %zmm5{%k7}
+ vpsubb %ymm1, %ymm1, %ymm5{z}{%k7}
+ vpsubb %zmm1, %zmm1, %zmm5
+ vpsubb %ymm1, %ymm1, %ymm5
+
+ vpsubw %zmm1, %zmm1, %zmm5{%k7}
+ vpsubw %ymm1, %ymm1, %ymm5{z}{%k7}
+ vpsubw %zmm1, %zmm1, %zmm5
+ vpsubw %ymm1, %ymm1, %ymm5
+
+ vpsubd %zmm1, %zmm1, %zmm5{%k7}
+ vpsubd %ymm1, %ymm1, %ymm5{z}{%k7}
+ vpsubd %zmm1, %zmm1, %zmm5
+ vpsubd %ymm1, %ymm1, %ymm5
+
+ vpsubq %zmm1, %zmm1, %zmm5{%k7}
+ vpsubq %ymm1, %ymm1, %ymm5{z}{%k7}
+ vpsubq %zmm1, %zmm1, %zmm5
+ vpsubq %ymm1, %ymm1, %ymm5
diff --git a/gas/testsuite/gas/i386/x86-64-optimize-2.d b/gas/testsuite/gas/i386/x86-64-optimize-2.d
index f982f52..f59e9b6 100644
--- a/gas/testsuite/gas/i386/x86-64-optimize-2.d
+++ b/gas/testsuite/gas/i386/x86-64-optimize-2.d
@@ -74,4 +74,36 @@ Disassembly of section .text:
+[a-f0-9]+: 62 e1 f5 08 ef c1 vpxorq %xmm1,%xmm1,%xmm16
+[a-f0-9]+: 62 b1 f5 00 ef c9 vpxorq %xmm17,%xmm17,%xmm1
+[a-f0-9]+: 62 b1 f5 00 ef c9 vpxorq %xmm17,%xmm17,%xmm1
+ +[a-f0-9]+: 62 71 75 4f f8 f9 vpsubb %zmm1,%zmm1,%zmm15\{%k7\}
+ +[a-f0-9]+: 62 71 75 af f8 f9 vpsubb %ymm1,%ymm1,%ymm15\{%k7\}\{z\}
+ +[a-f0-9]+: c5 71 f8 f9 vpsubb %xmm1,%xmm1,%xmm15
+ +[a-f0-9]+: c5 71 f8 f9 vpsubb %xmm1,%xmm1,%xmm15
+ +[a-f0-9]+: 62 e1 75 08 f8 c1 vpsubb %xmm1,%xmm1,%xmm16
+ +[a-f0-9]+: 62 e1 75 08 f8 c1 vpsubb %xmm1,%xmm1,%xmm16
+ +[a-f0-9]+: 62 b1 75 00 f8 c9 vpsubb %xmm17,%xmm17,%xmm1
+ +[a-f0-9]+: 62 b1 75 00 f8 c9 vpsubb %xmm17,%xmm17,%xmm1
+ +[a-f0-9]+: 62 71 75 4f f9 f9 vpsubw %zmm1,%zmm1,%zmm15\{%k7\}
+ +[a-f0-9]+: 62 71 75 af f9 f9 vpsubw %ymm1,%ymm1,%ymm15\{%k7\}\{z\}
+ +[a-f0-9]+: c5 71 f9 f9 vpsubw %xmm1,%xmm1,%xmm15
+ +[a-f0-9]+: c5 71 f9 f9 vpsubw %xmm1,%xmm1,%xmm15
+ +[a-f0-9]+: 62 e1 75 08 f9 c1 vpsubw %xmm1,%xmm1,%xmm16
+ +[a-f0-9]+: 62 e1 75 08 f9 c1 vpsubw %xmm1,%xmm1,%xmm16
+ +[a-f0-9]+: 62 b1 75 00 f9 c9 vpsubw %xmm17,%xmm17,%xmm1
+ +[a-f0-9]+: 62 b1 75 00 f9 c9 vpsubw %xmm17,%xmm17,%xmm1
+ +[a-f0-9]+: 62 71 75 4f fa f9 vpsubd %zmm1,%zmm1,%zmm15\{%k7\}
+ +[a-f0-9]+: 62 71 75 af fa f9 vpsubd %ymm1,%ymm1,%ymm15\{%k7\}\{z\}
+ +[a-f0-9]+: c5 71 fa f9 vpsubd %xmm1,%xmm1,%xmm15
+ +[a-f0-9]+: c5 71 fa f9 vpsubd %xmm1,%xmm1,%xmm15
+ +[a-f0-9]+: 62 e1 75 08 fa c1 vpsubd %xmm1,%xmm1,%xmm16
+ +[a-f0-9]+: 62 e1 75 08 fa c1 vpsubd %xmm1,%xmm1,%xmm16
+ +[a-f0-9]+: 62 b1 75 00 fa c9 vpsubd %xmm17,%xmm17,%xmm1
+ +[a-f0-9]+: 62 b1 75 00 fa c9 vpsubd %xmm17,%xmm17,%xmm1
+ +[a-f0-9]+: 62 71 f5 4f fb f9 vpsubq %zmm1,%zmm1,%zmm15\{%k7\}
+ +[a-f0-9]+: 62 71 f5 af fb f9 vpsubq %ymm1,%ymm1,%ymm15\{%k7\}\{z\}
+ +[a-f0-9]+: c5 71 fb f9 vpsubq %xmm1,%xmm1,%xmm15
+ +[a-f0-9]+: c5 71 fb f9 vpsubq %xmm1,%xmm1,%xmm15
+ +[a-f0-9]+: 62 e1 f5 08 fb c1 vpsubq %xmm1,%xmm1,%xmm16
+ +[a-f0-9]+: 62 e1 f5 08 fb c1 vpsubq %xmm1,%xmm1,%xmm16
+ +[a-f0-9]+: 62 b1 f5 00 fb c9 vpsubq %xmm17,%xmm17,%xmm1
+ +[a-f0-9]+: 62 b1 f5 00 fb c9 vpsubq %xmm17,%xmm17,%xmm1
#pass
diff --git a/gas/testsuite/gas/i386/x86-64-optimize-2.s b/gas/testsuite/gas/i386/x86-64-optimize-2.s
index 6aa968b..4461c09 100644
--- a/gas/testsuite/gas/i386/x86-64-optimize-2.s
+++ b/gas/testsuite/gas/i386/x86-64-optimize-2.s
@@ -78,3 +78,39 @@ _start:
vpxorq %ymm1, %ymm1, %ymm16
vpxorq %zmm17, %zmm17, %zmm1
vpxorq %ymm17, %ymm17, %ymm1
+
+ vpsubb %zmm1, %zmm1, %zmm15{%k7}
+ vpsubb %ymm1, %ymm1, %ymm15{z}{%k7}
+ vpsubb %zmm1, %zmm1, %zmm15
+ vpsubb %ymm1, %ymm1, %ymm15
+ vpsubb %zmm1, %zmm1, %zmm16
+ vpsubb %ymm1, %ymm1, %ymm16
+ vpsubb %zmm17, %zmm17, %zmm1
+ vpsubb %ymm17, %ymm17, %ymm1
+
+ vpsubw %zmm1, %zmm1, %zmm15{%k7}
+ vpsubw %ymm1, %ymm1, %ymm15{z}{%k7}
+ vpsubw %zmm1, %zmm1, %zmm15
+ vpsubw %ymm1, %ymm1, %ymm15
+ vpsubw %zmm1, %zmm1, %zmm16
+ vpsubw %ymm1, %ymm1, %ymm16
+ vpsubw %zmm17, %zmm17, %zmm1
+ vpsubw %ymm17, %ymm17, %ymm1
+
+ vpsubd %zmm1, %zmm1, %zmm15{%k7}
+ vpsubd %ymm1, %ymm1, %ymm15{z}{%k7}
+ vpsubd %zmm1, %zmm1, %zmm15
+ vpsubd %ymm1, %ymm1, %ymm15
+ vpsubd %zmm1, %zmm1, %zmm16
+ vpsubd %ymm1, %ymm1, %ymm16
+ vpsubd %zmm17, %zmm17, %zmm1
+ vpsubd %ymm17, %ymm17, %ymm1
+
+ vpsubq %zmm1, %zmm1, %zmm15{%k7}
+ vpsubq %ymm1, %ymm1, %ymm15{z}{%k7}
+ vpsubq %zmm1, %zmm1, %zmm15
+ vpsubq %ymm1, %ymm1, %ymm15
+ vpsubq %zmm1, %zmm1, %zmm16
+ vpsubq %ymm1, %ymm1, %ymm16
+ vpsubq %zmm17, %zmm17, %zmm1
+ vpsubq %ymm17, %ymm17, %ymm1