diff options
author | Jan Beulich <jbeulich@suse.com> | 2019-07-01 08:31:14 +0200 |
---|---|---|
committer | Jan Beulich <jbeulich@suse.com> | 2019-07-01 08:31:14 +0200 |
commit | a0a1771e895e6606a2a795c407e20aed73f69bd9 (patch) | |
tree | 9b75ffcdaa3d8b5f343c148821ddb4da987aa88d | |
parent | cd546e7bd2832c882e69809fdbeb7b376b62039e (diff) | |
download | gdb-a0a1771e895e6606a2a795c407e20aed73f69bd9.zip gdb-a0a1771e895e6606a2a795c407e20aed73f69bd9.tar.gz gdb-a0a1771e895e6606a2a795c407e20aed73f69bd9.tar.bz2 |
x86: optimize EVEX packed integer logical instructions
As long as there's no write mask as well as no broadcast, and as long
as the scaled Disp8 wouldn't result in a shorter EVEX encoding, encode
VPAND{D,Q}, VPANDN{D,Q}, VPOR{D,Q}, and VPXOR{D,Q} acting on only the
lower 16 XMM/YMM registers using their VEX equivalents with -O1.
Also take the opportunity and avoid looping twice over all operands
when dealing with memory-with-displacement ones.
27 files changed, 969 insertions, 25 deletions
diff --git a/gas/ChangeLog b/gas/ChangeLog index 0a7c2a0..b969177 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,5 +1,34 @@ 2019-07-01 Jan Beulich <jbeulich@suse.com> + * config/tc-i386.c (optimize_encoding): Make j unsigned. Handle + vpand{d,q}, vpandn{d,q}, vpor{d,q}, and vpxor{d,q}. Also check/ + clear broadcast. Eliminate a loop. + * doc/c-i386.texi: Update -O1 documentation. + * testsuite/gas/i386/optimize-1.s, + testsuite/gas/i386/optimize-2.s, + testsuite/gas/i386/optimize-3.s, + testsuite/gas/i386/optimize-5.s, + testsuite/gas/i386/x86-64-optimize-2.s, + testsuite/gas/i386/x86-64-optimize-3.s, + testsuite/gas/i386/x86-64-optimize-4.s, + testsuite/gas/i386/x86-64-optimize-6.s: Add vpand{d,q}, + vpandn{d,q}, vpor{d,q}, and vpxor{d,q} cases. + testsuite/gas/i386/optimize-1.d, + testsuite/gas/i386/optimize-1a.d, + testsuite/gas/i386/optimize-2.d, + testsuite/gas/i386/optimize-3.d, + testsuite/gas/i386/optimize-4.d, + testsuite/gas/i386/optimize-5.d, + testsuite/gas/i386/x86-64-optimize-2.d, + testsuite/gas/i386/x86-64-optimize-2a.d, + testsuite/gas/i386/x86-64-optimize-2b.d, + testsuite/gas/i386/x86-64-optimize-3.d, + testsuite/gas/i386/x86-64-optimize-4.d, + testsuite/gas/i386/x86-64-optimize-5.d, + testsuite/gas/i386/x86-64-optimize-6.d: Adjust expectations. + +2019-07-01 Jan Beulich <jbeulich@suse.com> + * testsuite/gas/i386/avx512f_vpclmulqdq.s, testsuite/gas/i386/avx512vl_vpclmulqdq.s, testsuite/gas/i386/vpclmulqdq.s, diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index f6c14c9..bc7d556 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -3897,7 +3897,7 @@ check_hle (void) static void optimize_encoding (void) { - int j; + unsigned int j; if (optimize_for_space && i.reg_operands == 1 @@ -4095,10 +4095,13 @@ optimize_encoding (void) && !i.types[0].bitfield.zmmword && !i.types[1].bitfield.zmmword && !i.mask + && !i.broadcast && is_evex_encoding (&i.tm) && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x666f || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf36f - || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f) + || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f + || (i.tm.base_opcode & ~4) == 0x66db + || (i.tm.base_opcode & ~4) == 0x66eb) && i.tm.extension_opcode == None) { /* Optimize: -O1: @@ -4116,8 +4119,17 @@ optimize_encoding (void) -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16) EVEX VOP mem, %ymmN -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16) + VOP, one of vpand, vpandn, vpor, vpxor: + EVEX VOP{d,q} %xmmL, %xmmM, %xmmN + -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16) + EVEX VOP{d,q} %ymmL, %ymmM, %ymmN + -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16) + EVEX VOP{d,q} mem, %xmmM, %xmmN + -> VEX VOP mem, %xmmM, %xmmN (M and N < 16) + EVEX VOP{d,q} mem, %ymmM, %ymmN + -> VEX VOP mem, %ymmM, %ymmN (M and N < 16) */ - for (j = 0; j < 2; j++) + for (j = 0; j < i.operands; j++) if (operand_type_check (i.types[j], disp) && i.op[j].disps->X_op == O_constant) { @@ -4147,16 +4159,12 @@ optimize_encoding (void) i.tm.opcode_modifier.vexw = VEXW0; i.tm.opcode_modifier.evex = 0; i.tm.opcode_modifier.masking = 0; + i.tm.opcode_modifier.broadcast = 0; i.tm.opcode_modifier.disp8memshift = 0; i.memshift = 0; - for (j = 0; j < 2; j++) - if (operand_type_check (i.types[j], disp) - && i.op[j].disps->X_op == O_constant) - { - i.types[j].bitfield.disp8 - = fits_in_disp8 (i.op[j].disps->X_add_number); - break; - } + if (j < i.operands) + i.types[j].bitfield.disp8 + = fits_in_disp8 (i.op[j].disps->X_add_number); } } diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi index 80bbcbe..ba20067 100644 --- a/gas/doc/c-i386.texi +++ b/gas/doc/c-i386.texi @@ -465,13 +465,17 @@ Optimize instruction encoding with smaller instruction size. @samp{-O} and @samp{-O1} encode 64-bit register load instructions with 64-bit immediate as 32-bit register load instructions with 31-bit or 32-bits immediates, encode 64-bit register clearing instructions with 32-bit -register clearing instructions and encode 256-bit/512-bit VEX/EVEX -vector register clearing instructions with 128-bit VEX vector register -clearing instructions as well as encode 128-bit/256-bit EVEX vector +register clearing instructions, encode 256-bit/512-bit VEX/EVEX vector +register clearing instructions with 128-bit VEX vector register +clearing instructions, encode 128-bit/256-bit EVEX vector register load/store instructions with VEX vector register load/store -instructions. @samp{-O2} includes @samp{-O1} optimization plus -encodes 256-bit/512-bit EVEX vector register clearing instructions with -128-bit EVEX vector register clearing instructions. +instructions, and encode 128-bit/256-bit EVEX packed integer logical +instructions with 128-bit/256-bit VEX packed integer logical. + +@samp{-O2} includes @samp{-O1} optimization plus encodes +256-bit/512-bit EVEX vector register clearing instructions with 128-bit +EVEX vector register clearing instructions. + @samp{-Os} includes @samp{-O2} optimization plus encodes 16-bit, 32-bit and 64-bit register tests with immediate as 8-bit register test with immediate. @samp{-O0} turns off this optimization. diff --git a/gas/testsuite/gas/i386/optimize-1.d b/gas/testsuite/gas/i386/optimize-1.d index 2f40c72..9c5f423 100644 --- a/gas/testsuite/gas/i386/optimize-1.d +++ b/gas/testsuite/gas/i386/optimize-1.d @@ -99,4 +99,52 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 7e 28 7f 48 04 vmovdqu32 %ymm1,0x80\(%eax\) +[a-f0-9]+: 62 f1 fe 28 7f 48 04 vmovdqu64 %ymm1,0x80\(%eax\) +[a-f0-9]+: 62 f1 7d 48 6f 10 vmovdqa32 \(%eax\),%zmm2 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%ymm2,%ymm3 #pass diff --git a/gas/testsuite/gas/i386/optimize-1.s b/gas/testsuite/gas/i386/optimize-1.s index 4c15d16..b41a960 100644 --- a/gas/testsuite/gas/i386/optimize-1.s +++ b/gas/testsuite/gas/i386/optimize-1.s @@ -116,3 +116,57 @@ _start: vmovdqu64 %ymm1, 128(%eax) vmovdqa32 (%eax), %zmm2 + + vpandd %xmm2, %xmm3, %xmm4 + vpandq %xmm2, %xmm3, %xmm4 + vpandnd %xmm2, %xmm3, %xmm4 + vpandnq %xmm2, %xmm3, %xmm4 + vpord %xmm2, %xmm3, %xmm4 + vporq %xmm2, %xmm3, %xmm4 + vpxord %xmm2, %xmm3, %xmm4 + vpxorq %xmm2, %xmm3, %xmm4 + + vpandd %ymm2, %ymm3, %ymm4 + vpandq %ymm2, %ymm3, %ymm4 + vpandnd %ymm2, %ymm3, %ymm4 + vpandnq %ymm2, %ymm3, %ymm4 + vpord %ymm2, %ymm3, %ymm4 + vporq %ymm2, %ymm3, %ymm4 + vpxord %ymm2, %ymm3, %ymm4 + vpxorq %ymm2, %ymm3, %ymm4 + + vpandd 112(%eax), %xmm2, %xmm3 + vpandq 112(%eax), %xmm2, %xmm3 + vpandnd 112(%eax), %xmm2, %xmm3 + vpandnq 112(%eax), %xmm2, %xmm3 + vpord 112(%eax), %xmm2, %xmm3 + vporq 112(%eax), %xmm2, %xmm3 + vpxord 112(%eax), %xmm2, %xmm3 + vpxorq 112(%eax), %xmm2, %xmm3 + + vpandd 128(%eax), %xmm2, %xmm3 + vpandq 128(%eax), %xmm2, %xmm3 + vpandnd 128(%eax), %xmm2, %xmm3 + vpandnq 128(%eax), %xmm2, %xmm3 + vpord 128(%eax), %xmm2, %xmm3 + vporq 128(%eax), %xmm2, %xmm3 + vpxord 128(%eax), %xmm2, %xmm3 + vpxorq 128(%eax), %xmm2, %xmm3 + + vpandd 96(%eax), %ymm2, %ymm3 + vpandq 96(%eax), %ymm2, %ymm3 + vpandnd 96(%eax), %ymm2, %ymm3 + vpandnq 96(%eax), %ymm2, %ymm3 + vpord 96(%eax), %ymm2, %ymm3 + vporq 96(%eax), %ymm2, %ymm3 + vpxord 96(%eax), %ymm2, %ymm3 + vpxorq 96(%eax), %ymm2, %ymm3 + + vpandd 128(%eax), %ymm2, %ymm3 + vpandq 128(%eax), %ymm2, %ymm3 + vpandnd 128(%eax), %ymm2, %ymm3 + vpandnq 128(%eax), %ymm2, %ymm3 + vpord 128(%eax), %ymm2, %ymm3 + vporq 128(%eax), %ymm2, %ymm3 + vpxord 128(%eax), %ymm2, %ymm3 + vpxorq 128(%eax), %ymm2, %ymm3 diff --git a/gas/testsuite/gas/i386/optimize-1a.d b/gas/testsuite/gas/i386/optimize-1a.d index d7c253a..bdac985 100644 --- a/gas/testsuite/gas/i386/optimize-1a.d +++ b/gas/testsuite/gas/i386/optimize-1a.d @@ -100,4 +100,52 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 7e 28 7f 48 04 vmovdqu32 %ymm1,0x80\(%eax\) +[a-f0-9]+: 62 f1 fe 28 7f 48 04 vmovdqu64 %ymm1,0x80\(%eax\) +[a-f0-9]+: 62 f1 7d 48 6f 10 vmovdqa32 \(%eax\),%zmm2 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%ymm2,%ymm3 #pass diff --git a/gas/testsuite/gas/i386/optimize-2.d b/gas/testsuite/gas/i386/optimize-2.d index ed61dec..ffc15f6 100644 --- a/gas/testsuite/gas/i386/optimize-2.d +++ b/gas/testsuite/gas/i386/optimize-2.d @@ -89,4 +89,68 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 ff 89 6f d1 vmovdqu16 %xmm1,%xmm2\{%k1\}\{z\} +[a-f0-9]+: 62 f1 7e 89 6f d1 vmovdqu32 %xmm1,%xmm2\{%k1\}\{z\} +[a-f0-9]+: 62 f1 fe 89 6f d1 vmovdqu64 %xmm1,%xmm2\{%k1\}\{z\} + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd %xmm2,%xmm3,%xmm4\{%k5\} + +[a-f0-9]+: 62 .* vpandq %ymm2,%ymm3,%ymm4\{%k5\} + +[a-f0-9]+: 62 .* vpandnd %ymm2,%ymm3,%ymm4\{%k5\} + +[a-f0-9]+: 62 .* vpandnq %xmm2,%xmm3,%xmm4\{%k5\} + +[a-f0-9]+: 62 .* vpord %xmm2,%xmm3,%xmm4\{%k5\} + +[a-f0-9]+: 62 .* vporq %ymm2,%ymm3,%ymm4\{%k5\} + +[a-f0-9]+: 62 .* vpxord %ymm2,%ymm3,%ymm4\{%k5\} + +[a-f0-9]+: 62 .* vpxorq %xmm2,%xmm3,%xmm4\{%k5\} + +[a-f0-9]+: 62 .* vpandd \(%eax\)\{1to8\},%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq \(%eax\)\{1to2\},%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd \(%eax\)\{1to4\},%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq \(%eax\)\{1to4\},%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord \(%eax\)\{1to8\},%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq \(%eax\)\{1to2\},%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord \(%eax\)\{1to4\},%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq \(%eax\)\{1to4\},%ymm2,%ymm3 #pass diff --git a/gas/testsuite/gas/i386/optimize-2.s b/gas/testsuite/gas/i386/optimize-2.s index 0a4fb23..c84840c 100644 --- a/gas/testsuite/gas/i386/optimize-2.s +++ b/gas/testsuite/gas/i386/optimize-2.s @@ -97,3 +97,75 @@ _start: vmovdqu16 %xmm1, %xmm2{%k1}{z} vmovdqu32 %xmm1, %xmm2{%k1}{z} vmovdqu64 %xmm1, %xmm2{%k1}{z} + + vpandd %xmm2, %xmm3, %xmm4 + vpandq %xmm2, %xmm3, %xmm4 + vpandnd %xmm2, %xmm3, %xmm4 + vpandnq %xmm2, %xmm3, %xmm4 + vpord %xmm2, %xmm3, %xmm4 + vporq %xmm2, %xmm3, %xmm4 + vpxord %xmm2, %xmm3, %xmm4 + vpxorq %xmm2, %xmm3, %xmm4 + + vpandd %ymm2, %ymm3, %ymm4 + vpandq %ymm2, %ymm3, %ymm4 + vpandnd %ymm2, %ymm3, %ymm4 + vpandnq %ymm2, %ymm3, %ymm4 + vpord %ymm2, %ymm3, %ymm4 + vporq %ymm2, %ymm3, %ymm4 + vpxord %ymm2, %ymm3, %ymm4 + vpxorq %ymm2, %ymm3, %ymm4 + + vpandd 112(%eax), %xmm2, %xmm3 + vpandq 112(%eax), %xmm2, %xmm3 + vpandnd 112(%eax), %xmm2, %xmm3 + vpandnq 112(%eax), %xmm2, %xmm3 + vpord 112(%eax), %xmm2, %xmm3 + vporq 112(%eax), %xmm2, %xmm3 + vpxord 112(%eax), %xmm2, %xmm3 + vpxorq 112(%eax), %xmm2, %xmm3 + + vpandd 128(%eax), %xmm2, %xmm3 + vpandq 128(%eax), %xmm2, %xmm3 + vpandnd 128(%eax), %xmm2, %xmm3 + vpandnq 128(%eax), %xmm2, %xmm3 + vpord 128(%eax), %xmm2, %xmm3 + vporq 128(%eax), %xmm2, %xmm3 + vpxord 128(%eax), %xmm2, %xmm3 + vpxorq 128(%eax), %xmm2, %xmm3 + + vpandd 96(%eax), %ymm2, %ymm3 + vpandq 96(%eax), %ymm2, %ymm3 + vpandnd 96(%eax), %ymm2, %ymm3 + vpandnq 96(%eax), %ymm2, %ymm3 + vpord 96(%eax), %ymm2, %ymm3 + vporq 96(%eax), %ymm2, %ymm3 + vpxord 96(%eax), %ymm2, %ymm3 + vpxorq 96(%eax), %ymm2, %ymm3 + + vpandd 128(%eax), %ymm2, %ymm3 + vpandq 128(%eax), %ymm2, %ymm3 + vpandnd 128(%eax), %ymm2, %ymm3 + vpandnq 128(%eax), %ymm2, %ymm3 + vpord 128(%eax), %ymm2, %ymm3 + vporq 128(%eax), %ymm2, %ymm3 + vpxord 128(%eax), %ymm2, %ymm3 + vpxorq 128(%eax), %ymm2, %ymm3 + + vpandd %xmm2, %xmm3, %xmm4{%k5} + vpandq %ymm2, %ymm3, %ymm4{%k5} + vpandnd %ymm2, %ymm3, %ymm4{%k5} + vpandnq %xmm2, %xmm3, %xmm4{%k5} + vpord %xmm2, %xmm3, %xmm4{%k5} + vporq %ymm2, %ymm3, %ymm4{%k5} + vpxord %ymm2, %ymm3, %ymm4{%k5} + vpxorq %xmm2, %xmm3, %xmm4{%k5} + + vpandd (%eax){1to8}, %ymm2, %ymm3 + vpandq (%eax){1to2}, %xmm2, %xmm3 + vpandnd (%eax){1to4}, %xmm2, %xmm3 + vpandnq (%eax){1to4}, %ymm2, %ymm3 + vpord (%eax){1to8}, %ymm2, %ymm3 + vporq (%eax){1to2}, %xmm2, %xmm3 + vpxord (%eax){1to4}, %xmm2, %xmm3 + vpxorq (%eax){1to4}, %ymm2, %ymm3 diff --git a/gas/testsuite/gas/i386/optimize-3.d b/gas/testsuite/gas/i386/optimize-3.d index cd43243..ea8a9b5 100644 --- a/gas/testsuite/gas/i386/optimize-3.d +++ b/gas/testsuite/gas/i386/optimize-3.d @@ -15,4 +15,12 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 ff 08 6f d1 vmovdqu16 %xmm1,%xmm2 +[a-f0-9]+: 62 f1 7e 08 6f d1 vmovdqu32 %xmm1,%xmm2 +[a-f0-9]+: 62 f1 fe 08 6f d1 vmovdqu64 %xmm1,%xmm2 + +[a-f0-9]+: 62 .* vpandd %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vpandq %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpandnd %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpandnq %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vpord %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vporq %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpxord %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpxorq %xmm2,%xmm3,%xmm4 #pass diff --git a/gas/testsuite/gas/i386/optimize-3.s b/gas/testsuite/gas/i386/optimize-3.s index a70893c..ec2a5b9 100644 --- a/gas/testsuite/gas/i386/optimize-3.s +++ b/gas/testsuite/gas/i386/optimize-3.s @@ -11,3 +11,12 @@ _start: {nooptimize} vmovdqu16 %xmm1, %xmm2 {nooptimize} vmovdqu32 %xmm1, %xmm2 {nooptimize} vmovdqu64 %xmm1, %xmm2 + + {nooptimize} vpandd %xmm2, %xmm3, %xmm4 + {nooptimize} vpandq %ymm2, %ymm3, %ymm4 + {nooptimize} vpandnd %ymm2, %ymm3, %ymm4 + {nooptimize} vpandnq %xmm2, %xmm3, %xmm4 + {nooptimize} vpord %xmm2, %xmm3, %xmm4 + {nooptimize} vporq %ymm2, %ymm3, %ymm4 + {nooptimize} vpxord %ymm2, %ymm3, %ymm4 + {nooptimize} vpxorq %xmm2, %xmm3, %xmm4 diff --git a/gas/testsuite/gas/i386/optimize-4.d b/gas/testsuite/gas/i386/optimize-4.d index f062ad7..d97718f 100644 --- a/gas/testsuite/gas/i386/optimize-4.d +++ b/gas/testsuite/gas/i386/optimize-4.d @@ -99,6 +99,54 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 7e 28 7f 48 04 vmovdqu32 %ymm1,0x80\(%eax\) +[a-f0-9]+: 62 f1 fe 28 7f 48 04 vmovdqu64 %ymm1,0x80\(%eax\) +[a-f0-9]+: 62 f1 7d 48 6f 10 vmovdqa32 \(%eax\),%zmm2 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%ymm2,%ymm3 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 #pass diff --git a/gas/testsuite/gas/i386/optimize-5.d b/gas/testsuite/gas/i386/optimize-5.d index fdf5561..ecab78c 100644 --- a/gas/testsuite/gas/i386/optimize-5.d +++ b/gas/testsuite/gas/i386/optimize-5.d @@ -99,6 +99,54 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 7e 28 7f 48 04 vmovdqu32 %ymm1,0x80\(%eax\) +[a-f0-9]+: 62 f1 fe 28 7f 48 04 vmovdqu64 %ymm1,0x80\(%eax\) +[a-f0-9]+: 62 f1 7d 48 6f 10 vmovdqa32 \(%eax\),%zmm2 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%ymm2,%ymm3 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 +[a-f0-9]+: 62 f1 7d 28 6f d1 vmovdqa32 %ymm1,%ymm2 @@ -107,4 +155,12 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 ff 08 6f d1 vmovdqu16 %xmm1,%xmm2 +[a-f0-9]+: 62 f1 7e 08 6f d1 vmovdqu32 %xmm1,%xmm2 +[a-f0-9]+: 62 f1 fe 08 6f d1 vmovdqu64 %xmm1,%xmm2 + +[a-f0-9]+: 62 .* vpandd %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vpandq %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpandnd %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpandnq %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vpord %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vporq %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpxord %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpxorq %xmm2,%xmm3,%xmm4 #pass diff --git a/gas/testsuite/gas/i386/optimize-5.s b/gas/testsuite/gas/i386/optimize-5.s index 77d60ed..e88fab1 100644 --- a/gas/testsuite/gas/i386/optimize-5.s +++ b/gas/testsuite/gas/i386/optimize-5.s @@ -13,3 +13,12 @@ {evex} vmovdqu16 %xmm1, %xmm2 {evex} vmovdqu32 %xmm1, %xmm2 {evex} vmovdqu64 %xmm1, %xmm2 + + {evex} vpandd %xmm2, %xmm3, %xmm4 + {evex} vpandq %ymm2, %ymm3, %ymm4 + {evex} vpandnd %ymm2, %ymm3, %ymm4 + {evex} vpandnq %xmm2, %xmm3, %xmm4 + {evex} vpord %xmm2, %xmm3, %xmm4 + {evex} vporq %ymm2, %ymm3, %ymm4 + {evex} vpxord %ymm2, %ymm3, %ymm4 + {evex} vpxorq %xmm2, %xmm3, %xmm4 diff --git a/gas/testsuite/gas/i386/x86-64-optimize-2.d b/gas/testsuite/gas/i386/x86-64-optimize-2.d index 45b98ae..0041b00 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-2.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-2.d @@ -155,4 +155,52 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 7e 28 7f 48 04 vmovdqu32 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 fe 28 7f 48 04 vmovdqu64 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 7d 48 6f 10 vmovdqa32 \(%rax\),%zmm2 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpand %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpor %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpand %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpor %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%ymm2,%ymm3 #pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-2.s b/gas/testsuite/gas/i386/x86-64-optimize-2.s index e5d2982..22dbd1c 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-2.s +++ b/gas/testsuite/gas/i386/x86-64-optimize-2.s @@ -172,3 +172,57 @@ _start: vmovdqu64 %ymm1, 128(%rax) vmovdqa32 (%rax), %zmm2 + + vpandd %xmm2, %xmm3, %xmm4 + vpandq %xmm12, %xmm3, %xmm4 + vpandnd %xmm2, %xmm13, %xmm4 + vpandnq %xmm2, %xmm3, %xmm14 + vpord %xmm2, %xmm3, %xmm4 + vporq %xmm12, %xmm3, %xmm4 + vpxord %xmm2, %xmm13, %xmm4 + vpxorq %xmm2, %xmm3, %xmm14 + + vpandd %ymm2, %ymm3, %ymm4 + vpandq %ymm12, %ymm3, %ymm4 + vpandnd %ymm2, %ymm13, %ymm4 + vpandnq %ymm2, %ymm3, %ymm14 + vpord %ymm2, %ymm3, %ymm4 + vporq %ymm12, %ymm3, %ymm4 + vpxord %ymm2, %ymm13, %ymm4 + vpxorq %ymm2, %ymm3, %ymm14 + + vpandd 112(%rax), %xmm2, %xmm3 + vpandq 112(%rax), %xmm2, %xmm3 + vpandnd 112(%rax), %xmm2, %xmm3 + vpandnq 112(%rax), %xmm2, %xmm3 + vpord 112(%rax), %xmm2, %xmm3 + vporq 112(%rax), %xmm2, %xmm3 + vpxord 112(%rax), %xmm2, %xmm3 + vpxorq 112(%rax), %xmm2, %xmm3 + + vpandd 128(%rax), %xmm2, %xmm3 + vpandq 128(%rax), %xmm2, %xmm3 + vpandnd 128(%rax), %xmm2, %xmm3 + vpandnq 128(%rax), %xmm2, %xmm3 + vpord 128(%rax), %xmm2, %xmm3 + vporq 128(%rax), %xmm2, %xmm3 + vpxord 128(%rax), %xmm2, %xmm3 + vpxorq 128(%rax), %xmm2, %xmm3 + + vpandd 96(%rax), %ymm2, %ymm3 + vpandq 96(%rax), %ymm2, %ymm3 + vpandnd 96(%rax), %ymm2, %ymm3 + vpandnq 96(%rax), %ymm2, %ymm3 + vpord 96(%rax), %ymm2, %ymm3 + vporq 96(%rax), %ymm2, %ymm3 + vpxord 96(%rax), %ymm2, %ymm3 + vpxorq 96(%rax), %ymm2, %ymm3 + + vpandd 128(%rax), %ymm2, %ymm3 + vpandq 128(%rax), %ymm2, %ymm3 + vpandnd 128(%rax), %ymm2, %ymm3 + vpandnq 128(%rax), %ymm2, %ymm3 + vpord 128(%rax), %ymm2, %ymm3 + vporq 128(%rax), %ymm2, %ymm3 + vpxord 128(%rax), %ymm2, %ymm3 + vpxorq 128(%rax), %ymm2, %ymm3 diff --git a/gas/testsuite/gas/i386/x86-64-optimize-2a.d b/gas/testsuite/gas/i386/x86-64-optimize-2a.d index 39385b9..70a8ff3 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-2a.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-2a.d @@ -156,4 +156,52 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 7e 28 7f 48 04 vmovdqu32 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 fe 28 7f 48 04 vmovdqu64 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 7d 48 6f 10 vmovdqa32 \(%rax\),%zmm2 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpand %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpor %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpand %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpor %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%ymm2,%ymm3 #pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-2b.d b/gas/testsuite/gas/i386/x86-64-optimize-2b.d index 3eb3a59..b5c6cea 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-2b.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-2b.d @@ -155,4 +155,52 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 7e 28 7f 48 04 vmovdqu32 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 fe 28 7f 48 04 vmovdqu64 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 7d 48 6f 10 vmovdqa32 \(%rax\),%zmm2 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpand %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpor %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpand %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpor %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%ymm2,%ymm3 #pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-3.d b/gas/testsuite/gas/i386/x86-64-optimize-3.d index 5e2832d..fb73b1e 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-3.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-3.d @@ -115,4 +115,76 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 ff 89 6f d1 vmovdqu16 %xmm1,%xmm2\{%k1\}\{z\} +[a-f0-9]+: 62 f1 7e 89 6f d1 vmovdqu32 %xmm1,%xmm2\{%k1\}\{z\} +[a-f0-9]+: 62 f1 fe 89 6f d1 vmovdqu64 %xmm1,%xmm2\{%k1\}\{z\} + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpand %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpor %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpand %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpor %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd %xmm22,%xmm23,%xmm24 + +[a-f0-9]+: 62 .* vpandq %ymm22,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpandnd %ymm2,%ymm23,%ymm4 + +[a-f0-9]+: 62 .* vpandnq %xmm2,%xmm3,%xmm24 + +[a-f0-9]+: 62 .* vpord %xmm22,%xmm23,%xmm24 + +[a-f0-9]+: 62 .* vporq %ymm22,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpxord %ymm2,%ymm23,%ymm4 + +[a-f0-9]+: 62 .* vpxorq %xmm2,%xmm3,%xmm24 + +[a-f0-9]+: 62 .* vpandd %xmm2,%xmm3,%xmm4\{%k5\} + +[a-f0-9]+: 62 .* vpandq %ymm12,%ymm3,%ymm4\{%k5\} + +[a-f0-9]+: 62 .* vpandnd %ymm2,%ymm13,%ymm4\{%k5\} + +[a-f0-9]+: 62 .* vpandnq %xmm2,%xmm3,%xmm14\{%k5\} + +[a-f0-9]+: 62 .* vpord %xmm2,%xmm3,%xmm4\{%k5\} + +[a-f0-9]+: 62 .* vporq %ymm12,%ymm3,%ymm4\{%k5\} + +[a-f0-9]+: 62 .* vpxord %ymm2,%ymm13,%ymm4\{%k5\} + +[a-f0-9]+: 62 .* vpxorq %xmm2,%xmm3,%xmm14\{%k5\} + +[a-f0-9]+: 62 .* vpandd \(%rax\)\{1to8\},%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq \(%rax\)\{1to2\},%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd \(%rax\)\{1to4\},%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq \(%rax\)\{1to4\},%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord \(%rax\)\{1to8\},%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq \(%rax\)\{1to2\},%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord \(%rax\)\{1to4\},%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq \(%rax\)\{1to4\},%ymm2,%ymm3 #pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-3.s b/gas/testsuite/gas/i386/x86-64-optimize-3.s index d9c2eb8..56bda5c 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-3.s +++ b/gas/testsuite/gas/i386/x86-64-optimize-3.s @@ -126,3 +126,84 @@ _start: vmovdqu16 %xmm1, %xmm2{%k1}{z} vmovdqu32 %xmm1, %xmm2{%k1}{z} vmovdqu64 %xmm1, %xmm2{%k1}{z} + + vpandd %xmm2, %xmm3, %xmm4 + vpandq %xmm12, %xmm3, %xmm4 + vpandnd %xmm2, %xmm13, %xmm4 + vpandnq %xmm2, %xmm3, %xmm14 + vpord %xmm2, %xmm3, %xmm4 + vporq %xmm12, %xmm3, %xmm4 + vpxord %xmm2, %xmm13, %xmm4 + vpxorq %xmm2, %xmm3, %xmm14 + + vpandd %ymm2, %ymm3, %ymm4 + vpandq %ymm12, %ymm3, %ymm4 + vpandnd %ymm2, %ymm13, %ymm4 + vpandnq %ymm2, %ymm3, %ymm14 + vpord %ymm2, %ymm3, %ymm4 + vporq %ymm12, %ymm3, %ymm4 + vpxord %ymm2, %ymm13, %ymm4 + vpxorq %ymm2, %ymm3, %ymm14 + + vpandd 112(%rax), %xmm2, %xmm3 + vpandq 112(%rax), %xmm2, %xmm3 + vpandnd 112(%rax), %xmm2, %xmm3 + vpandnq 112(%rax), %xmm2, %xmm3 + vpord 112(%rax), %xmm2, %xmm3 + vporq 112(%rax), %xmm2, %xmm3 + vpxord 112(%rax), %xmm2, %xmm3 + vpxorq 112(%rax), %xmm2, %xmm3 + + vpandd 128(%rax), %xmm2, %xmm3 + vpandq 128(%rax), %xmm2, %xmm3 + vpandnd 128(%rax), %xmm2, %xmm3 + vpandnq 128(%rax), %xmm2, %xmm3 + vpord 128(%rax), %xmm2, %xmm3 + vporq 128(%rax), %xmm2, %xmm3 + vpxord 128(%rax), %xmm2, %xmm3 + vpxorq 128(%rax), %xmm2, %xmm3 + + vpandd 96(%rax), %ymm2, %ymm3 + vpandq 96(%rax), %ymm2, %ymm3 + vpandnd 96(%rax), %ymm2, %ymm3 + vpandnq 96(%rax), %ymm2, %ymm3 + vpord 96(%rax), %ymm2, %ymm3 + vporq 96(%rax), %ymm2, %ymm3 + vpxord 96(%rax), %ymm2, %ymm3 + vpxorq 96(%rax), %ymm2, %ymm3 + + vpandd 128(%rax), %ymm2, %ymm3 + vpandq 128(%rax), %ymm2, %ymm3 + vpandnd 128(%rax), %ymm2, %ymm3 + vpandnq 128(%rax), %ymm2, %ymm3 + vpord 128(%rax), %ymm2, %ymm3 + vporq 128(%rax), %ymm2, %ymm3 + vpxord 128(%rax), %ymm2, %ymm3 + vpxorq 128(%rax), %ymm2, %ymm3 + + vpandd %xmm22, %xmm23, %xmm24 + vpandq %ymm22, %ymm3, %ymm4 + vpandnd %ymm2, %ymm23, %ymm4 + vpandnq %xmm2, %xmm3, %xmm24 + vpord %xmm22, %xmm23, %xmm24 + vporq %ymm22, %ymm3, %ymm4 + vpxord %ymm2, %ymm23, %ymm4 + vpxorq %xmm2, %xmm3, %xmm24 + + vpandd %xmm2, %xmm3, %xmm4{%k5} + vpandq %ymm12, %ymm3, %ymm4{%k5} + vpandnd %ymm2, %ymm13, %ymm4{%k5} + vpandnq %xmm2, %xmm3, %xmm14{%k5} + vpord %xmm2, %xmm3, %xmm4{%k5} + vporq %ymm12, %ymm3, %ymm4{%k5} + vpxord %ymm2, %ymm13, %ymm4{%k5} + vpxorq %xmm2, %xmm3, %xmm14{%k5} + + vpandd (%rax){1to8}, %ymm2, %ymm3 + vpandq (%rax){1to2}, %xmm2, %xmm3 + vpandnd (%rax){1to4}, %xmm2, %xmm3 + vpandnq (%rax){1to4}, %ymm2, %ymm3 + vpord (%rax){1to8}, %ymm2, %ymm3 + vporq (%rax){1to2}, %xmm2, %xmm3 + vpxord (%rax){1to4}, %xmm2, %xmm3 + vpxorq (%rax){1to4}, %ymm2, %ymm3 diff --git a/gas/testsuite/gas/i386/x86-64-optimize-4.d b/gas/testsuite/gas/i386/x86-64-optimize-4.d index 18fdeb1..d25d24d 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-4.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-4.d @@ -15,4 +15,12 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 ff 08 6f d1 vmovdqu16 %xmm1,%xmm2 +[a-f0-9]+: 62 f1 7e 08 6f d1 vmovdqu32 %xmm1,%xmm2 +[a-f0-9]+: 62 f1 fe 08 6f d1 vmovdqu64 %xmm1,%xmm2 + +[a-f0-9]+: 62 .* vpandd %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vpandq %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpandnd %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: 62 .* vpandnq %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: 62 .* vpord %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vporq %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpxord %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: 62 .* vpxorq %xmm2,%xmm3,%xmm14 #pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-4.s b/gas/testsuite/gas/i386/x86-64-optimize-4.s index b6d872d..a3f69a3 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-4.s +++ b/gas/testsuite/gas/i386/x86-64-optimize-4.s @@ -11,3 +11,12 @@ _start: {nooptimize} vmovdqu16 %xmm1, %xmm2 {nooptimize} vmovdqu32 %xmm1, %xmm2 {nooptimize} vmovdqu64 %xmm1, %xmm2 + + {nooptimize} vpandd %xmm2, %xmm3, %xmm4 + {nooptimize} vpandq %ymm12, %ymm3, %ymm4 + {nooptimize} vpandnd %ymm2, %ymm13, %ymm4 + {nooptimize} vpandnq %xmm2, %xmm3, %xmm14 + {nooptimize} vpord %xmm2, %xmm3, %xmm4 + {nooptimize} vporq %ymm12, %ymm3, %ymm4 + {nooptimize} vpxord %ymm2, %ymm13, %ymm4 + {nooptimize} vpxorq %xmm2, %xmm3, %xmm14 diff --git a/gas/testsuite/gas/i386/x86-64-optimize-5.d b/gas/testsuite/gas/i386/x86-64-optimize-5.d index 5065d65..0fb20b3 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-5.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-5.d @@ -155,6 +155,54 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 7e 28 7f 48 04 vmovdqu32 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 fe 28 7f 48 04 vmovdqu64 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 7d 48 6f 10 vmovdqa32 \(%rax\),%zmm2 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpand %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpor %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpand %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpor %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%ymm2,%ymm3 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 +[a-f0-9]+: 62 f1 7d 28 6f d1 vmovdqa32 %ymm1,%ymm2 diff --git a/gas/testsuite/gas/i386/x86-64-optimize-6.d b/gas/testsuite/gas/i386/x86-64-optimize-6.d index 8ebd9b2..c9f7da6 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-6.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-6.d @@ -155,6 +155,54 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 7e 28 7f 48 04 vmovdqu32 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 fe 28 7f 48 04 vmovdqu64 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 7d 48 6f 10 vmovdqa32 \(%rax\),%zmm2 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpand %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpor %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpand %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpor %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%ymm2,%ymm3 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 +[a-f0-9]+: 62 f1 7d 28 6f d1 vmovdqa32 %ymm1,%ymm2 @@ -163,4 +211,12 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 ff 08 6f d1 vmovdqu16 %xmm1,%xmm2 +[a-f0-9]+: 62 f1 7e 08 6f d1 vmovdqu32 %xmm1,%xmm2 +[a-f0-9]+: 62 f1 fe 08 6f d1 vmovdqu64 %xmm1,%xmm2 + +[a-f0-9]+: 62 .* vpandd %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vpandq %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpandnd %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: 62 .* vpandnq %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: 62 .* vpord %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vporq %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpxord %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: 62 .* vpxorq %xmm2,%xmm3,%xmm14 #pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-6.s b/gas/testsuite/gas/i386/x86-64-optimize-6.s index 7c403fc..8f775b0 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-6.s +++ b/gas/testsuite/gas/i386/x86-64-optimize-6.s @@ -13,3 +13,12 @@ {evex} vmovdqu16 %xmm1, %xmm2 {evex} vmovdqu32 %xmm1, %xmm2 {evex} vmovdqu64 %xmm1, %xmm2 + + {evex} vpandd %xmm2, %xmm3, %xmm4 + {evex} vpandq %ymm12, %ymm3, %ymm4 + {evex} vpandnd %ymm2, %ymm13, %ymm4 + {evex} vpandnq %xmm2, %xmm3, %xmm14 + {evex} vpord %xmm2, %xmm3, %xmm4 + {evex} vporq %ymm12, %ymm3, %ymm4 + {evex} vpxord %ymm2, %ymm13, %ymm4 + {evex} vpxorq %xmm2, %xmm3, %xmm14 diff --git a/opcodes/ChangeLog b/opcodes/ChangeLog index a7322ae..c6a713b 100644 --- a/opcodes/ChangeLog +++ b/opcodes/ChangeLog @@ -1,5 +1,11 @@ 2019-07-01 Jan Beulich <jbeulich@suse.com> + * i386-opc.tbl (and, or): Add Optimize to forms allowing two + register operands. + * i386-tbl.h: Re-generate. + +2019-07-01 Jan Beulich <jbeulich@suse.com> + * i386-dis-evex-prefix.h: Use PCLMUL for vpclmulqdq. * i386-opc.tbl (vpclmullqlqdq, vpclmulhqlqdq, vpclmullqhqdq, vpclmulhqhqdq): Add CpuVPCLMULQDQ flavors. diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index b9c5e32..abc4155 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -3754,9 +3754,9 @@ vrcp14pd, 2, 0x664C, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=1|VexW=2|Bro vrsqrt14pd, 2, 0x664E, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM } vpaddd, 3, 0x66FE, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } -vpandd, 3, 0x66DB, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } +vpandd, 3, 0x66DB, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } vpandnd, 3, 0x66DF, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } -vpord, 3, 0x66EB, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } +vpord, 3, 0x66EB, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } vpsubd, 3, 0x66FA, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } vpunpckhdq, 3, 0x666A, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } vpunpckldq, 3, 0x6662, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } @@ -3764,9 +3764,9 @@ vpxord, 3, 0x66EF, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|Ve vpaddq, 3, 0x66D4, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } vpandnq, 3, 0x66DF, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } -vpandq, 3, 0x66DB, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } +vpandq, 3, 0x66DB, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } vpmuludq, 3, 0x66F4, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } -vporq, 3, 0x66EB, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } +vporq, 3, 0x66EB, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } vpsubq, 3, 0x66FB, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } vpunpckhqdq, 3, 0x666D, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } vpunpcklqdq, 3, 0x666C, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h index 3e874a6..e8c5eda 100644 --- a/opcodes/i386-tbl.h +++ b/opcodes/i386-tbl.h @@ -60419,7 +60419,7 @@ const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 1, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0 }, + 1, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 7, 0, 0, 1, 0, 0, 0, 0, 0 }, { { { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 } }, @@ -60457,7 +60457,7 @@ const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 1, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0 }, + 1, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 7, 0, 0, 1, 0, 0, 0, 0, 0 }, { { { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 } }, @@ -60514,7 +60514,7 @@ const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 2, 0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0 }, + 2, 0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 7, 0, 0, 1, 0, 0, 0, 0, 0 }, { { { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0 } }, @@ -60533,7 +60533,7 @@ const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 2, 0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0 }, + 2, 0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 7, 0, 0, 1, 0, 0, 0, 0, 0 }, { { { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0 } }, |