diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2018-03-01 06:08:04 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2018-03-01 06:08:31 -0800 |
commit | 8305403a1ffa8e551fd1c7bd88af1a65c0ba747c (patch) | |
tree | f7c1db859e0d9101de6a7a64a090bbf7fbda2fb4 /gas | |
parent | b1223e789040e9e8cdc6869a8a1fd1fd7acc109d (diff) | |
download | gdb-8305403a1ffa8e551fd1c7bd88af1a65c0ba747c.zip gdb-8305403a1ffa8e551fd1c7bd88af1a65c0ba747c.tar.gz gdb-8305403a1ffa8e551fd1c7bd88af1a65c0ba747c.tar.bz2 |
x86: Encode AVX256/AVX512 vpsub[bwdq] with VEX128/EVEX128
When 2 source registers are identical, AVX256 and AVX512 vpsub[bwdq]
instructions can be encoded with VEX128 or EVEX128 encodings.
gas/
* config/tc-i386.c (optimize_encoding): Optimize AVX256 and
AVX512 vpsub[bwdq] instructions.
* testsuite/gas/i386/optimize-1.s: Add tests for AVX256 and
AVX512 vpsub[bwdq] instructions.
* testsuite/gas/i386/x86-64-optimize-2.s: Likewise.
* testsuite/gas/i386/optimize-1.d: Updated.
* testsuite/gas/i386/x86-64-optimize-2.d: Likewise.
opcodes/
* * i386-opc.tbl: Add "Optimize" to AVX256 and AVX512
vpsub[bwdq] instructions.
* i386-tbl.h: Regenerated.
Diffstat (limited to 'gas')
-rw-r--r-- | gas/ChangeLog | 10 | ||||
-rw-r--r-- | gas/config/tc-i386.c | 9 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/optimize-1.d | 16 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/optimize-1.s | 20 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-optimize-2.d | 32 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-optimize-2.s | 36 |
6 files changed, 121 insertions, 2 deletions
diff --git a/gas/ChangeLog b/gas/ChangeLog index 913d9bc..da34e3b 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,3 +1,13 @@ +2018-03-01 H.J. Lu <hongjiu.lu@intel.com> + + * config/tc-i386.c (optimize_encoding): Optimize AVX256 and + AVX512 vpsub[bwdq] instructions. + * testsuite/gas/i386/optimize-1.s: Add tests for AVX256 and + AVX512 vpsub[bwdq] instructions. + * testsuite/gas/i386/x86-64-optimize-2.s: Likewise. + * testsuite/gas/i386/optimize-1.d: Updated. + * testsuite/gas/i386/x86-64-optimize-2.d: Likewise. + 2018-03-01 Alan Modra <amodra@gmail.com> * configure.ac (ALL_LINGUAS): Add uk. Sort. diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index f9dccdb..4174d19 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -3856,11 +3856,16 @@ optimize_encoding (void) || i.tm.base_opcode == 0x66df || i.tm.base_opcode == 0x57 || i.tm.base_opcode == 0x6657 - || i.tm.base_opcode == 0x66ef) + || i.tm.base_opcode == 0x66ef + || i.tm.base_opcode == 0x66f8 + || i.tm.base_opcode == 0x66f9 + || i.tm.base_opcode == 0x66fa + || i.tm.base_opcode == 0x66fb) && i.tm.extension_opcode == None)) { /* Optimize: -O2: - VOP, one of vandnps, vandnpd, vxorps and vxorpd: + VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd, + vpsubq and vpsubw: EVEX VOP %zmmM, %zmmM, %zmmN -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16) -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) diff --git a/gas/testsuite/gas/i386/optimize-1.d b/gas/testsuite/gas/i386/optimize-1.d index 80b1e83..3ea6e75 100644 --- a/gas/testsuite/gas/i386/optimize-1.d +++ b/gas/testsuite/gas/i386/optimize-1.d @@ -42,4 +42,20 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 f5 af ef e9 vpxorq %ymm1,%ymm1,%ymm5\{%k7\}\{z\} +[a-f0-9]+: c5 f1 ef e9 vpxor %xmm1,%xmm1,%xmm5 +[a-f0-9]+: c5 f1 ef e9 vpxor %xmm1,%xmm1,%xmm5 + +[a-f0-9]+: 62 f1 75 4f f8 e9 vpsubb %zmm1,%zmm1,%zmm5\{%k7\} + +[a-f0-9]+: 62 f1 75 af f8 e9 vpsubb %ymm1,%ymm1,%ymm5\{%k7\}\{z\} + +[a-f0-9]+: c5 f1 f8 e9 vpsubb %xmm1,%xmm1,%xmm5 + +[a-f0-9]+: c5 f1 f8 e9 vpsubb %xmm1,%xmm1,%xmm5 + +[a-f0-9]+: 62 f1 75 4f f9 e9 vpsubw %zmm1,%zmm1,%zmm5\{%k7\} + +[a-f0-9]+: 62 f1 75 af f9 e9 vpsubw %ymm1,%ymm1,%ymm5\{%k7\}\{z\} + +[a-f0-9]+: c5 f1 f9 e9 vpsubw %xmm1,%xmm1,%xmm5 + +[a-f0-9]+: c5 f1 f9 e9 vpsubw %xmm1,%xmm1,%xmm5 + +[a-f0-9]+: 62 f1 75 4f fa e9 vpsubd %zmm1,%zmm1,%zmm5\{%k7\} + +[a-f0-9]+: 62 f1 75 af fa e9 vpsubd %ymm1,%ymm1,%ymm5\{%k7\}\{z\} + +[a-f0-9]+: c5 f1 fa e9 vpsubd %xmm1,%xmm1,%xmm5 + +[a-f0-9]+: c5 f1 fa e9 vpsubd %xmm1,%xmm1,%xmm5 + +[a-f0-9]+: 62 f1 f5 4f fb e9 vpsubq %zmm1,%zmm1,%zmm5\{%k7\} + +[a-f0-9]+: 62 f1 f5 af fb e9 vpsubq %ymm1,%ymm1,%ymm5\{%k7\}\{z\} + +[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5 + +[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5 #pass diff --git a/gas/testsuite/gas/i386/optimize-1.s b/gas/testsuite/gas/i386/optimize-1.s index 042a004..f875f92 100644 --- a/gas/testsuite/gas/i386/optimize-1.s +++ b/gas/testsuite/gas/i386/optimize-1.s @@ -46,3 +46,23 @@ _start: vpxorq %ymm1, %ymm1, %ymm5{z}{%k7} vpxorq %zmm1, %zmm1, %zmm5 vpxorq %ymm1, %ymm1, %ymm5 + + vpsubb %zmm1, %zmm1, %zmm5{%k7} + vpsubb %ymm1, %ymm1, %ymm5{z}{%k7} + vpsubb %zmm1, %zmm1, %zmm5 + vpsubb %ymm1, %ymm1, %ymm5 + + vpsubw %zmm1, %zmm1, %zmm5{%k7} + vpsubw %ymm1, %ymm1, %ymm5{z}{%k7} + vpsubw %zmm1, %zmm1, %zmm5 + vpsubw %ymm1, %ymm1, %ymm5 + + vpsubd %zmm1, %zmm1, %zmm5{%k7} + vpsubd %ymm1, %ymm1, %ymm5{z}{%k7} + vpsubd %zmm1, %zmm1, %zmm5 + vpsubd %ymm1, %ymm1, %ymm5 + + vpsubq %zmm1, %zmm1, %zmm5{%k7} + vpsubq %ymm1, %ymm1, %ymm5{z}{%k7} + vpsubq %zmm1, %zmm1, %zmm5 + vpsubq %ymm1, %ymm1, %ymm5 diff --git a/gas/testsuite/gas/i386/x86-64-optimize-2.d b/gas/testsuite/gas/i386/x86-64-optimize-2.d index f982f52..f59e9b6 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-2.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-2.d @@ -74,4 +74,36 @@ Disassembly of section .text: +[a-f0-9]+: 62 e1 f5 08 ef c1 vpxorq %xmm1,%xmm1,%xmm16 +[a-f0-9]+: 62 b1 f5 00 ef c9 vpxorq %xmm17,%xmm17,%xmm1 +[a-f0-9]+: 62 b1 f5 00 ef c9 vpxorq %xmm17,%xmm17,%xmm1 + +[a-f0-9]+: 62 71 75 4f f8 f9 vpsubb %zmm1,%zmm1,%zmm15\{%k7\} + +[a-f0-9]+: 62 71 75 af f8 f9 vpsubb %ymm1,%ymm1,%ymm15\{%k7\}\{z\} + +[a-f0-9]+: c5 71 f8 f9 vpsubb %xmm1,%xmm1,%xmm15 + +[a-f0-9]+: c5 71 f8 f9 vpsubb %xmm1,%xmm1,%xmm15 + +[a-f0-9]+: 62 e1 75 08 f8 c1 vpsubb %xmm1,%xmm1,%xmm16 + +[a-f0-9]+: 62 e1 75 08 f8 c1 vpsubb %xmm1,%xmm1,%xmm16 + +[a-f0-9]+: 62 b1 75 00 f8 c9 vpsubb %xmm17,%xmm17,%xmm1 + +[a-f0-9]+: 62 b1 75 00 f8 c9 vpsubb %xmm17,%xmm17,%xmm1 + +[a-f0-9]+: 62 71 75 4f f9 f9 vpsubw %zmm1,%zmm1,%zmm15\{%k7\} + +[a-f0-9]+: 62 71 75 af f9 f9 vpsubw %ymm1,%ymm1,%ymm15\{%k7\}\{z\} + +[a-f0-9]+: c5 71 f9 f9 vpsubw %xmm1,%xmm1,%xmm15 + +[a-f0-9]+: c5 71 f9 f9 vpsubw %xmm1,%xmm1,%xmm15 + +[a-f0-9]+: 62 e1 75 08 f9 c1 vpsubw %xmm1,%xmm1,%xmm16 + +[a-f0-9]+: 62 e1 75 08 f9 c1 vpsubw %xmm1,%xmm1,%xmm16 + +[a-f0-9]+: 62 b1 75 00 f9 c9 vpsubw %xmm17,%xmm17,%xmm1 + +[a-f0-9]+: 62 b1 75 00 f9 c9 vpsubw %xmm17,%xmm17,%xmm1 + +[a-f0-9]+: 62 71 75 4f fa f9 vpsubd %zmm1,%zmm1,%zmm15\{%k7\} + +[a-f0-9]+: 62 71 75 af fa f9 vpsubd %ymm1,%ymm1,%ymm15\{%k7\}\{z\} + +[a-f0-9]+: c5 71 fa f9 vpsubd %xmm1,%xmm1,%xmm15 + +[a-f0-9]+: c5 71 fa f9 vpsubd %xmm1,%xmm1,%xmm15 + +[a-f0-9]+: 62 e1 75 08 fa c1 vpsubd %xmm1,%xmm1,%xmm16 + +[a-f0-9]+: 62 e1 75 08 fa c1 vpsubd %xmm1,%xmm1,%xmm16 + +[a-f0-9]+: 62 b1 75 00 fa c9 vpsubd %xmm17,%xmm17,%xmm1 + +[a-f0-9]+: 62 b1 75 00 fa c9 vpsubd %xmm17,%xmm17,%xmm1 + +[a-f0-9]+: 62 71 f5 4f fb f9 vpsubq %zmm1,%zmm1,%zmm15\{%k7\} + +[a-f0-9]+: 62 71 f5 af fb f9 vpsubq %ymm1,%ymm1,%ymm15\{%k7\}\{z\} + +[a-f0-9]+: c5 71 fb f9 vpsubq %xmm1,%xmm1,%xmm15 + +[a-f0-9]+: c5 71 fb f9 vpsubq %xmm1,%xmm1,%xmm15 + +[a-f0-9]+: 62 e1 f5 08 fb c1 vpsubq %xmm1,%xmm1,%xmm16 + +[a-f0-9]+: 62 e1 f5 08 fb c1 vpsubq %xmm1,%xmm1,%xmm16 + +[a-f0-9]+: 62 b1 f5 00 fb c9 vpsubq %xmm17,%xmm17,%xmm1 + +[a-f0-9]+: 62 b1 f5 00 fb c9 vpsubq %xmm17,%xmm17,%xmm1 #pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-2.s b/gas/testsuite/gas/i386/x86-64-optimize-2.s index 6aa968b..4461c09 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-2.s +++ b/gas/testsuite/gas/i386/x86-64-optimize-2.s @@ -78,3 +78,39 @@ _start: vpxorq %ymm1, %ymm1, %ymm16 vpxorq %zmm17, %zmm17, %zmm1 vpxorq %ymm17, %ymm17, %ymm1 + + vpsubb %zmm1, %zmm1, %zmm15{%k7} + vpsubb %ymm1, %ymm1, %ymm15{z}{%k7} + vpsubb %zmm1, %zmm1, %zmm15 + vpsubb %ymm1, %ymm1, %ymm15 + vpsubb %zmm1, %zmm1, %zmm16 + vpsubb %ymm1, %ymm1, %ymm16 + vpsubb %zmm17, %zmm17, %zmm1 + vpsubb %ymm17, %ymm17, %ymm1 + + vpsubw %zmm1, %zmm1, %zmm15{%k7} + vpsubw %ymm1, %ymm1, %ymm15{z}{%k7} + vpsubw %zmm1, %zmm1, %zmm15 + vpsubw %ymm1, %ymm1, %ymm15 + vpsubw %zmm1, %zmm1, %zmm16 + vpsubw %ymm1, %ymm1, %ymm16 + vpsubw %zmm17, %zmm17, %zmm1 + vpsubw %ymm17, %ymm17, %ymm1 + + vpsubd %zmm1, %zmm1, %zmm15{%k7} + vpsubd %ymm1, %ymm1, %ymm15{z}{%k7} + vpsubd %zmm1, %zmm1, %zmm15 + vpsubd %ymm1, %ymm1, %ymm15 + vpsubd %zmm1, %zmm1, %zmm16 + vpsubd %ymm1, %ymm1, %ymm16 + vpsubd %zmm17, %zmm17, %zmm1 + vpsubd %ymm17, %ymm17, %ymm1 + + vpsubq %zmm1, %zmm1, %zmm15{%k7} + vpsubq %ymm1, %ymm1, %ymm15{z}{%k7} + vpsubq %zmm1, %zmm1, %zmm15 + vpsubq %ymm1, %ymm1, %ymm15 + vpsubq %zmm1, %zmm1, %zmm16 + vpsubq %ymm1, %ymm1, %ymm16 + vpsubq %zmm17, %zmm17, %zmm1 + vpsubq %ymm17, %ymm17, %ymm1 |