diff options
author | Jan Beulich <jbeulich@suse.com> | 2024-09-27 11:23:12 +0200 |
---|---|---|
committer | Jan Beulich <jbeulich@suse.com> | 2024-09-27 11:23:12 +0200 |
commit | ca6b6f9d6e65d27bc40c8f1e4c6d39fe04cfbdb2 (patch) | |
tree | 0eaf2affbf899f4c470debc6a457a95335cf0f84 | |
parent | f079b0c4b2923393c2ce58e72dfd5a1f2c7a339a (diff) | |
download | binutils-ca6b6f9d6e65d27bc40c8f1e4c6d39fe04cfbdb2.zip binutils-ca6b6f9d6e65d27bc40c8f1e4c6d39fe04cfbdb2.tar.gz binutils-ca6b6f9d6e65d27bc40c8f1e4c6d39fe04cfbdb2.tar.bz2 |
x86: optimize {,V}INSERTPS with certain immediates
They are equivalent to simple moves or xors, which are up to 3 bytes
shorter to encode (and maybe/likely also cheaper to execute).
-rw-r--r-- | gas/config/tc-i386.c | 96 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/optimize-1.d | 6 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/optimize-1.s | 8 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/optimize-1a.d | 6 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/optimize-4.d | 6 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/optimize-5.d | 6 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-optimize-insertps.d | 26 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-optimize-insertps.l | 26 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-optimize-insertps.s | 20 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64.exp | 2 | ||||
-rw-r--r-- | opcodes/i386-opc.tbl | 6 | ||||
-rw-r--r-- | opcodes/i386-tbl.h | 8 |
12 files changed, 209 insertions, 7 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 620dc9c..ed12c09 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -5661,6 +5661,102 @@ optimize_encoding (void) i.operands = 2; i.imm_operands = 0; } + else if (i.tm.base_opcode == 0x21 + && i.tm.opcode_space == SPACE_0F3A + && i.op[0].imms->X_op == O_constant + && (i.operands == i.reg_operands + 1 + ? i.op[0].imms->X_add_number == 0 + || (i.op[0].imms->X_add_number & 0xf) == 0xf + : (i.op[0].imms->X_add_number & 0x3f) == 0x0e + && (i.reg_operands == 1 || i.op[2].regs == i.op[3].regs))) + { + /* Optimize: -O: + insertps $0b....1111, %xmmN, %xmmM -> xorps %xmmM, %xmmM + insertps $0b00000000, %xmmN, %xmmM -> movss %xmmN, %xmmM + insertps $0b..001110, mem, %xmmN -> movss mem, %xmmN + vinsertps $0b....1111, %xmmN, %xmmM, %xmmK -> vxorps %xmm?, %xmm?, %xmmK + vinsertps $0b00000000, %xmmN, %xmmM, %xmmK -> vmovss %xmmN, %xmmM, %xmmK + vinsertps $0b..001110, mem, %xmmN, %xmmN -> vmovss mem, %xmmN + */ + i.tm.opcode_space = SPACE_0F; + if ((i.op[0].imms->X_add_number & 0xf) == 0xf) + { + i.tm.base_opcode = 0x57; + i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE; + + --i.operands; + + i.op[i.operands - 1].regs = i.op[i.operands].regs; + i.types[i.operands - 1] = i.types[i.operands]; + i.flags[i.operands - 1] = i.flags[i.operands]; + i.tm.operand_types[i.operands - 1] = i.tm.operand_types[i.operands]; + + i.op[1].regs = i.op[i.operands - 1].regs; + i.types[1] = i.types[i.operands - 1]; + i.flags[1] = i.flags[i.operands - 1]; + i.tm.operand_types[1] = i.tm.operand_types[i.operands - 1]; + + i.op[0].regs = i.op[1].regs; + i.types[0] = i.types[1]; + i.flags[0] = i.flags[1]; + i.tm.operand_types[0] = i.tm.operand_types[1]; + + /* Switch from EVEX to VEX encoding if possible. Sadly we can't + (always) tell use of the {evex} pseudo-prefix (which otherwise + we'd like to respect) from use of %xmm16-%xmm31. */ + if (pp.encoding == encoding_evex) + pp.encoding = encoding_default; + if (i.tm.opcode_modifier.evex + && pp.encoding <= encoding_vex3 + && !(i.op[0].regs->reg_flags & RegVRex)) + { + i.tm.opcode_modifier.evex = 0; + i.tm.opcode_modifier.vex = VEX128; + } + + /* Switch from VEX3 to VEX2 encoding if possible. */ + if (i.tm.opcode_modifier.vex + && pp.encoding <= encoding_vex + && (i.op[0].regs->reg_flags & RegRex)) + { + i.op[0].regs -= 8; + i.op[1].regs = i.op[0].regs; + } + } + else + { + i.tm.base_opcode = 0x10; + i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3; + + if (i.op[0].imms->X_add_number == 0) + { + i.op[0].regs = i.op[1].regs; + --i.operands; + } + else + { + i.op[0].disps = i.op[1].disps; + i.reloc[0] = i.reloc[1]; + i.operands = 2; + i.tm.opcode_modifier.vexvvvv = 0; + } + i.types[0] = i.types[1]; + i.flags[0] = i.flags[1]; + i.tm.operand_types[0] = i.tm.operand_types[1]; + + i.op[1].regs = i.op[2].regs; + i.types[1] = i.types[2]; + i.flags[1] = i.flags[2]; + i.tm.operand_types[1] = i.tm.operand_types[2]; + + i.op[2].regs = i.op[3].regs; + i.types[2] = i.types[3]; + i.flags[2] = i.flags[3]; + i.tm.operand_types[2] = i.tm.operand_types[3]; + } + + i.imm_operands = 0; + } } /* Check whether the promoted (to address size) register is usable as index diff --git a/gas/testsuite/gas/i386/optimize-1.d b/gas/testsuite/gas/i386/optimize-1.d index fffa655..d50814d 100644 --- a/gas/testsuite/gas/i386/optimize-1.d +++ b/gas/testsuite/gas/i386/optimize-1.d @@ -190,6 +190,12 @@ Disassembly of section .text: +[a-f0-9]+: c5 .* vmovupd %ymm1,\(%edx\) +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2 +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\) + +[a-f0-9]+: f3 .* movss %xmm1,%xmm2 + +[a-f0-9]+: f3 .* movss \(%ecx\),%xmm2 + +[a-f0-9]+: 0f .* xorps %xmm2,%xmm2 + +[a-f0-9]+: c5 .* vmovss %xmm1,%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vmovss \(%ecx\),%xmm2 + +[a-f0-9]+: c5 .* vxorps %xmm3,%xmm3,%xmm3 +[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax +[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax +[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax diff --git a/gas/testsuite/gas/i386/optimize-1.s b/gas/testsuite/gas/i386/optimize-1.s index afdeed4..fad6393 100644 --- a/gas/testsuite/gas/i386/optimize-1.s +++ b/gas/testsuite/gas/i386/optimize-1.s @@ -224,6 +224,14 @@ _start: vextracti64x4 $0, %zmm1, %ymm2 vextracti64x4 $0, %zmm1, (%edx) + insertps $0, %xmm1, %xmm2 + insertps $0xce, (%ecx), %xmm2 + insertps $0xff, %xmm1, %xmm2 + + vinsertps $0, %xmm1, %xmm2, %xmm3 + vinsertps $0xce, (%ecx), %xmm2, %xmm2 + vinsertps $0xff, %xmm1, %xmm2, %xmm3 + bt $15, %ax bt $16, %ax btc $15, %ax diff --git a/gas/testsuite/gas/i386/optimize-1a.d b/gas/testsuite/gas/i386/optimize-1a.d index aff3768..a59c616 100644 --- a/gas/testsuite/gas/i386/optimize-1a.d +++ b/gas/testsuite/gas/i386/optimize-1a.d @@ -191,6 +191,12 @@ Disassembly of section .text: +[a-f0-9]+: c5 .* vmovupd %ymm1,\(%edx\) +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2 +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\) + +[a-f0-9]+: f3 .* movss %xmm1,%xmm2 + +[a-f0-9]+: f3 .* movss \(%ecx\),%xmm2 + +[a-f0-9]+: 0f .* xorps %xmm2,%xmm2 + +[a-f0-9]+: c5 .* vmovss %xmm1,%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vmovss \(%ecx\),%xmm2 + +[a-f0-9]+: c5 .* vxorps %xmm3,%xmm3,%xmm3 +[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax +[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax +[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax diff --git a/gas/testsuite/gas/i386/optimize-4.d b/gas/testsuite/gas/i386/optimize-4.d index fbc142a..8d29aab 100644 --- a/gas/testsuite/gas/i386/optimize-4.d +++ b/gas/testsuite/gas/i386/optimize-4.d @@ -190,6 +190,12 @@ Disassembly of section .text: +[a-f0-9]+: c5 .* vmovupd %ymm1,\(%edx\) +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2 +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\) + +[a-f0-9]+: f3 .* movss %xmm1,%xmm2 + +[a-f0-9]+: f3 .* movss \(%ecx\),%xmm2 + +[a-f0-9]+: 0f .* xorps %xmm2,%xmm2 + +[a-f0-9]+: c5 .* vmovss %xmm1,%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vmovss \(%ecx\),%xmm2 + +[a-f0-9]+: c5 .* vxorps %xmm3,%xmm3,%xmm3 +[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax +[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax +[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax diff --git a/gas/testsuite/gas/i386/optimize-5.d b/gas/testsuite/gas/i386/optimize-5.d index a40022a..162a97f 100644 --- a/gas/testsuite/gas/i386/optimize-5.d +++ b/gas/testsuite/gas/i386/optimize-5.d @@ -190,6 +190,12 @@ Disassembly of section .text: +[a-f0-9]+: c5 .* vmovupd %ymm1,\(%edx\) +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2 +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\) + +[a-f0-9]+: f3 .* movss %xmm1,%xmm2 + +[a-f0-9]+: f3 .* movss \(%ecx\),%xmm2 + +[a-f0-9]+: 0f .* xorps %xmm2,%xmm2 + +[a-f0-9]+: c5 .* vmovss %xmm1,%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vmovss \(%ecx\),%xmm2 + +[a-f0-9]+: c5 .* vxorps %xmm3,%xmm3,%xmm3 +[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax +[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax +[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax diff --git a/gas/testsuite/gas/i386/x86-64-optimize-insertps.d b/gas/testsuite/gas/i386/x86-64-optimize-insertps.d new file mode 100644 index 0000000..97300d0 --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-optimize-insertps.d @@ -0,0 +1,26 @@ +#as: -O -msse2avx +#objdump: -drw +#name: x86-64 EXTRACTPS optimized encoding with -msse2avx + +.*: +file format .* + + +Disassembly of section .text: + +0+ <insertps>: + +[a-f0-9]+: c5 ea 10 d1 vmovss %xmm1,%xmm2,%xmm2 + +[a-f0-9]+: c5 fa 10 11 vmovss \(%rcx\),%xmm2 + +[a-f0-9]+: 62 f9 7e 08 10 11 vmovss \(%r17\),%xmm2 + +[a-f0-9]+: c5 e8 57 d2 vxorps %xmm2,%xmm2,%xmm2 + +[a-f0-9]+: c5 ea 10 d9 vmovss %xmm1,%xmm2,%xmm3 + +[a-f0-9]+: c5 fa 10 11 vmovss \(%rcx\),%xmm2 + +[a-f0-9]+: c5 e0 57 db vxorps %xmm3,%xmm3,%xmm3 + +[a-f0-9]+: c5 e0 57 db vxorps %xmm3,%xmm3,%xmm3 + +[a-f0-9]+: c5 e0 57 db vxorps %xmm3,%xmm3,%xmm3 + +[a-f0-9]+: c5 60 57 db vxorps %xmm3,%xmm3,%xmm11 + +[a-f0-9]+: 62 b1 6e 08 10 d9 vmovss %xmm17,%xmm2,%xmm3 + +[a-f0-9]+: 62 e1 7e 08 10 11 vmovss \(%rcx\),%xmm18 + +[a-f0-9]+: c5 e0 57 db vxorps %xmm3,%xmm3,%xmm3 + +[a-f0-9]+: c5 e0 57 db vxorps %xmm3,%xmm3,%xmm3 + +[a-f0-9]+: 62 a1 64 00 57 db vxorps %xmm19,%xmm19,%xmm19 +#pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-insertps.l b/gas/testsuite/gas/i386/x86-64-optimize-insertps.l new file mode 100644 index 0000000..08eb0c1 --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-optimize-insertps.l @@ -0,0 +1,26 @@ +.*: Assembler messages: +.*:5: Error: .* +[ ]*[0-9a-f]+[ ]+\.text +[ ]*[0-9a-f]+[ ]+insertps: +[ ]*[0-9a-f]+[ ]+\?\?\?\? F30F10D1[ ]+insertps \$0, %xmm1, %xmm2 +[ ]*[0-9a-f]+[ ]+\?\?\?\? F30F1011[ ]+insertps \$0xce, \(%rcx\), %xmm2 +[ ]*[0-9a-f]+[ ]+insertps \$0x0e, \(%r17\), %xmm2 +[ ]*[0-9a-f]+[ ]+\?\?\?\? 0F57D2[ ]+insertps \$0xff, %xmm1, %xmm2 +[ ]*[0-9a-f]+[ ]+ +[ ]*[0-9a-f]+[ ]+\?\?\?\? C5EA10D9[ ]+vinsertps \$0, %xmm1, %xmm2, %xmm3 +[ ]*[0-9a-f]+[ ]+\?\?\?\? C5FA1011[ ]+vinsertps \$0xce, \(%rcx\), %xmm2, %xmm2 +[ ]*[0-9a-f]+[ ]+\?\?\?\? C5E057DB[ ]+vinsertps \$0xff, %xmm1, %xmm2, %xmm3 +[ ]*[0-9a-f]+[ ]+ +[ ]*[0-9a-f]+[ ]+\?\?\?\? C5E057DB[ ]+vinsertps \$0xbf, %xmm9, %xmm2, %xmm3 +[ ]*[0-9a-f]+[ ]+\?\?\?\? C5E057DB[ ]+vinsertps \$0x7f, %xmm1, %xmm10, %xmm3 +[ ]*[0-9a-f]+[ ]+\?\?\?\? C56057DB[ ]+vinsertps \$0x3f, %xmm1, %xmm2, %xmm11 +[ ]*[0-9a-f]+[ ]+ +[ ]*[0-9a-f]+[ ]+\?\?\?\? 62B16E08[ ]+vinsertps \$0, %xmm17, %xmm2, %xmm3 +[ ]*[0-9a-f]+[ ]+10D9 +[ ]*[0-9a-f]+[ ]+\?\?\?\? 62E17E08[ ]+vinsertps \$0xce, \(%rcx\), %xmm18, %xmm18 +[ ]*[0-9a-f]+[ ]+1011 +[ ]*[0-9a-f]+[ ]+\?\?\?\? C5E057DB[ ]+vinsertps \$0xff, %xmm17, %xmm2, %xmm3 +[ ]*[0-9a-f]+[ ]+\?\?\?\? C5E057DB[ ]+vinsertps \$0xff, %xmm1, %xmm18, %xmm3 +[ ]*[0-9a-f]+[ ]+\?\?\?\? 62A16400[ ]+vinsertps \$0xff, %xmm1, %xmm2, %xmm19 +[ ]*[0-9a-f]+[ ]+57DB +#pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-insertps.s b/gas/testsuite/gas/i386/x86-64-optimize-insertps.s new file mode 100644 index 0000000..98240ac --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-optimize-insertps.s @@ -0,0 +1,20 @@ + .text +insertps: + insertps $0, %xmm1, %xmm2 + insertps $0xce, (%rcx), %xmm2 + insertps $0x0e, (%r17), %xmm2 + insertps $0xff, %xmm1, %xmm2 + + vinsertps $0, %xmm1, %xmm2, %xmm3 + vinsertps $0xce, (%rcx), %xmm2, %xmm2 + vinsertps $0xff, %xmm1, %xmm2, %xmm3 + + vinsertps $0xbf, %xmm9, %xmm2, %xmm3 + vinsertps $0x7f, %xmm1, %xmm10, %xmm3 + vinsertps $0x3f, %xmm1, %xmm2, %xmm11 + + vinsertps $0, %xmm17, %xmm2, %xmm3 + vinsertps $0xce, (%rcx), %xmm18, %xmm18 + vinsertps $0xff, %xmm17, %xmm2, %xmm3 + vinsertps $0xff, %xmm1, %xmm18, %xmm3 + vinsertps $0xff, %xmm1, %xmm2, %xmm19 diff --git a/gas/testsuite/gas/i386/x86-64.exp b/gas/testsuite/gas/i386/x86-64.exp index ad713dc..3ffeaaf 100644 --- a/gas/testsuite/gas/i386/x86-64.exp +++ b/gas/testsuite/gas/i386/x86-64.exp @@ -597,6 +597,8 @@ run_dump_test "x86-64-optimize-pextr" run_list_test "x86-64-optimize-extractps" "-O -aln" run_dump_test "x86-64-optimize-extractps" run_dump_test "x86-64-optimize-vextractNN" +run_list_test "x86-64-optimize-insertps" "-O -aln" +run_dump_test "x86-64-optimize-insertps" run_dump_test "x86-64-apx-ndd-optimize" run_dump_test "x86-64-align-branch-1a" run_dump_test "x86-64-align-branch-1b" diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index 29289ca..0cd8538 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -1495,7 +1495,7 @@ extractps, 0x6617, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F3A|VexW0|Disp8MemShi extractps, 0x6617, x64&(AVX|AVX512F), RegMem|Vex128|EVex128|Space0F3A|VexW1|NoSuf|SSE2AVX|Optimize, { Imm8, RegXMM, Reg64 } extractps, 0x660f3a17, SSE4_1, Modrm|IgnoreSize|NoSuf|Optimize, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex } extractps, 0x660f3a17, SSE4_1&x64, RegMem|NoSuf|Optimize|NoRex64, { Imm8, RegXMM, Reg64 } -insertps<SSE41D>, 0x660f3a21, <SSE41D:cpu>, Modrm|<SSE41D:attr>|<SSE41D:vvvv>|Disp8MemShift|NoSuf, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM } +insertps<SSE41D>, 0x660f3a21, <SSE41D:cpu>, Modrm|<SSE41D:attr>|<SSE41D:vvvv>|Disp8MemShift|NoSuf|Optimize, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM } movntdqa<SSE41D>, 0x660f382a, <SSE41D:cpu>, Modrm|<SSE41D:attr>|NoSuf, { Xmmword|Unspecified|BaseIndex, RegXMM } mpsadbw<sse41>, 0x660f3a42, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM } packusdw<SSE41BW>, 0x660f382b, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|<SSE41BW:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM } @@ -1672,7 +1672,7 @@ vhaddps, 0xf27c, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, vhsubpd, 0x667d, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM } vhsubps, 0xf27d, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM } vinsertf128, 0x6618, AVX, Modrm|Vex256|Space0F3A|Src1VVVV|VexW0|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegYMM, RegYMM } -vinsertps, 0x6621, AVX, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|NoSuf, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM } +vinsertps, 0x6621, AVX, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|NoSuf|Optimize, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM } vlddqu, 0xf2f0, AVX, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Xmmword|Ymmword|Unspecified|BaseIndex, RegXMM|RegYMM } vldmxcsr, 0xae/2, AVX, Modrm|Vex128|Space0F|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex } vmaskmovdqu, 0x66f7, AVX, Modrm|Vex|Space0F|VexWIG|NoSuf, { RegXMM, RegXMM } @@ -2394,7 +2394,7 @@ vinserti32x4, 0x6638, AVX512F, Modrm|Masking|Space0F3A|Src1VVVV|VexW0|Disp8MemSh vinsertf64x4, 0x661A, AVX512F, Modrm|EVex=1|Masking|Space0F3A|Src1VVVV|VexW1|Disp8MemShift=5|NoSuf, { Imm8, RegYMM|Unspecified|BaseIndex, RegZMM, RegZMM } vinserti64x4, 0x663A, AVX512F, Modrm|EVex=1|Masking|Space0F3A|Src1VVVV|VexW1|Disp8MemShift=5|NoSuf, { Imm8, RegYMM|Unspecified|BaseIndex, RegZMM, RegZMM } -vinsertps, 0x6621, AVX512F, Modrm|EVex128|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=2|NoSuf, { Imm8, RegXMM|Dword|Unspecified|BaseIndex, RegXMM, RegXMM } +vinsertps, 0x6621, AVX512F, Modrm|EVex128|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=2|NoSuf|Optimize, { Imm8, RegXMM|Dword|Unspecified|BaseIndex, RegXMM, RegXMM } vmovap<sd>, 0x<sd:ppfx>28, AVX512F, D|Modrm|Masking|Space0F|<sd:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM } vmovntp<sd>, 0x<sd:ppfx>2B, AVX512F, Modrm|Space0F|<sd:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM, XMMword|YMMword|ZMMword|Unspecified|BaseIndex } diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h index 5e04c5d..4de9578 100644 --- a/opcodes/i386-tbl.h +++ b/opcodes/i386-tbl.h @@ -18062,7 +18062,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0 } } } }, { MN_insertps, 0x21, 3, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 1, 1, 1, 0, 1, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 1, 0, 1, 2, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0 } }, @@ -18074,7 +18074,7 @@ static const insn_template i386_optab[] = 1, 0, 0, 0, 0, 0 } } } }, { MN_insertps, 0x21, 3, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0 }, { { 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -26050,7 +26050,7 @@ static const insn_template i386_optab[] = 0, 1, 0, 0, 0, 0 } } } }, { MN_vinsertps, 0x21, 4, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 1, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0 }, { { 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -26064,7 +26064,7 @@ static const insn_template i386_optab[] = 1, 0, 0, 0, 0, 0 } } } }, { MN_vinsertps, 0x21, 4, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |