aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@suse.com>2024-09-27 11:23:12 +0200
committerJan Beulich <jbeulich@suse.com>2024-09-27 11:23:12 +0200
commitca6b6f9d6e65d27bc40c8f1e4c6d39fe04cfbdb2 (patch)
tree0eaf2affbf899f4c470debc6a457a95335cf0f84
parentf079b0c4b2923393c2ce58e72dfd5a1f2c7a339a (diff)
downloadbinutils-ca6b6f9d6e65d27bc40c8f1e4c6d39fe04cfbdb2.zip
binutils-ca6b6f9d6e65d27bc40c8f1e4c6d39fe04cfbdb2.tar.gz
binutils-ca6b6f9d6e65d27bc40c8f1e4c6d39fe04cfbdb2.tar.bz2
x86: optimize {,V}INSERTPS with certain immediates
They are equivalent to simple moves or xors, which are up to 3 bytes shorter to encode (and maybe/likely also cheaper to execute).
-rw-r--r--gas/config/tc-i386.c96
-rw-r--r--gas/testsuite/gas/i386/optimize-1.d6
-rw-r--r--gas/testsuite/gas/i386/optimize-1.s8
-rw-r--r--gas/testsuite/gas/i386/optimize-1a.d6
-rw-r--r--gas/testsuite/gas/i386/optimize-4.d6
-rw-r--r--gas/testsuite/gas/i386/optimize-5.d6
-rw-r--r--gas/testsuite/gas/i386/x86-64-optimize-insertps.d26
-rw-r--r--gas/testsuite/gas/i386/x86-64-optimize-insertps.l26
-rw-r--r--gas/testsuite/gas/i386/x86-64-optimize-insertps.s20
-rw-r--r--gas/testsuite/gas/i386/x86-64.exp2
-rw-r--r--opcodes/i386-opc.tbl6
-rw-r--r--opcodes/i386-tbl.h8
12 files changed, 209 insertions, 7 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 620dc9c..ed12c09 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -5661,6 +5661,102 @@ optimize_encoding (void)
i.operands = 2;
i.imm_operands = 0;
}
+ else if (i.tm.base_opcode == 0x21
+ && i.tm.opcode_space == SPACE_0F3A
+ && i.op[0].imms->X_op == O_constant
+ && (i.operands == i.reg_operands + 1
+ ? i.op[0].imms->X_add_number == 0
+ || (i.op[0].imms->X_add_number & 0xf) == 0xf
+ : (i.op[0].imms->X_add_number & 0x3f) == 0x0e
+ && (i.reg_operands == 1 || i.op[2].regs == i.op[3].regs)))
+ {
+ /* Optimize: -O:
+ insertps $0b....1111, %xmmN, %xmmM -> xorps %xmmM, %xmmM
+ insertps $0b00000000, %xmmN, %xmmM -> movss %xmmN, %xmmM
+ insertps $0b..001110, mem, %xmmN -> movss mem, %xmmN
+ vinsertps $0b....1111, %xmmN, %xmmM, %xmmK -> vxorps %xmm?, %xmm?, %xmmK
+ vinsertps $0b00000000, %xmmN, %xmmM, %xmmK -> vmovss %xmmN, %xmmM, %xmmK
+ vinsertps $0b..001110, mem, %xmmN, %xmmN -> vmovss mem, %xmmN
+ */
+ i.tm.opcode_space = SPACE_0F;
+ if ((i.op[0].imms->X_add_number & 0xf) == 0xf)
+ {
+ i.tm.base_opcode = 0x57;
+ i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
+
+ --i.operands;
+
+ i.op[i.operands - 1].regs = i.op[i.operands].regs;
+ i.types[i.operands - 1] = i.types[i.operands];
+ i.flags[i.operands - 1] = i.flags[i.operands];
+ i.tm.operand_types[i.operands - 1] = i.tm.operand_types[i.operands];
+
+ i.op[1].regs = i.op[i.operands - 1].regs;
+ i.types[1] = i.types[i.operands - 1];
+ i.flags[1] = i.flags[i.operands - 1];
+ i.tm.operand_types[1] = i.tm.operand_types[i.operands - 1];
+
+ i.op[0].regs = i.op[1].regs;
+ i.types[0] = i.types[1];
+ i.flags[0] = i.flags[1];
+ i.tm.operand_types[0] = i.tm.operand_types[1];
+
+ /* Switch from EVEX to VEX encoding if possible. Sadly we can't
+ (always) tell use of the {evex} pseudo-prefix (which otherwise
+ we'd like to respect) from use of %xmm16-%xmm31. */
+ if (pp.encoding == encoding_evex)
+ pp.encoding = encoding_default;
+ if (i.tm.opcode_modifier.evex
+ && pp.encoding <= encoding_vex3
+ && !(i.op[0].regs->reg_flags & RegVRex))
+ {
+ i.tm.opcode_modifier.evex = 0;
+ i.tm.opcode_modifier.vex = VEX128;
+ }
+
+ /* Switch from VEX3 to VEX2 encoding if possible. */
+ if (i.tm.opcode_modifier.vex
+ && pp.encoding <= encoding_vex
+ && (i.op[0].regs->reg_flags & RegRex))
+ {
+ i.op[0].regs -= 8;
+ i.op[1].regs = i.op[0].regs;
+ }
+ }
+ else
+ {
+ i.tm.base_opcode = 0x10;
+ i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
+
+ if (i.op[0].imms->X_add_number == 0)
+ {
+ i.op[0].regs = i.op[1].regs;
+ --i.operands;
+ }
+ else
+ {
+ i.op[0].disps = i.op[1].disps;
+ i.reloc[0] = i.reloc[1];
+ i.operands = 2;
+ i.tm.opcode_modifier.vexvvvv = 0;
+ }
+ i.types[0] = i.types[1];
+ i.flags[0] = i.flags[1];
+ i.tm.operand_types[0] = i.tm.operand_types[1];
+
+ i.op[1].regs = i.op[2].regs;
+ i.types[1] = i.types[2];
+ i.flags[1] = i.flags[2];
+ i.tm.operand_types[1] = i.tm.operand_types[2];
+
+ i.op[2].regs = i.op[3].regs;
+ i.types[2] = i.types[3];
+ i.flags[2] = i.flags[3];
+ i.tm.operand_types[2] = i.tm.operand_types[3];
+ }
+
+ i.imm_operands = 0;
+ }
}
/* Check whether the promoted (to address size) register is usable as index
diff --git a/gas/testsuite/gas/i386/optimize-1.d b/gas/testsuite/gas/i386/optimize-1.d
index fffa655..d50814d 100644
--- a/gas/testsuite/gas/i386/optimize-1.d
+++ b/gas/testsuite/gas/i386/optimize-1.d
@@ -190,6 +190,12 @@ Disassembly of section .text:
+[a-f0-9]+: c5 .* vmovupd %ymm1,\(%edx\)
+[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2
+[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\)
+ +[a-f0-9]+: f3 .* movss %xmm1,%xmm2
+ +[a-f0-9]+: f3 .* movss \(%ecx\),%xmm2
+ +[a-f0-9]+: 0f .* xorps %xmm2,%xmm2
+ +[a-f0-9]+: c5 .* vmovss %xmm1,%xmm2,%xmm3
+ +[a-f0-9]+: c5 .* vmovss \(%ecx\),%xmm2
+ +[a-f0-9]+: c5 .* vxorps %xmm3,%xmm3,%xmm3
+[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax
+[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax
+[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax
diff --git a/gas/testsuite/gas/i386/optimize-1.s b/gas/testsuite/gas/i386/optimize-1.s
index afdeed4..fad6393 100644
--- a/gas/testsuite/gas/i386/optimize-1.s
+++ b/gas/testsuite/gas/i386/optimize-1.s
@@ -224,6 +224,14 @@ _start:
vextracti64x4 $0, %zmm1, %ymm2
vextracti64x4 $0, %zmm1, (%edx)
+ insertps $0, %xmm1, %xmm2
+ insertps $0xce, (%ecx), %xmm2
+ insertps $0xff, %xmm1, %xmm2
+
+ vinsertps $0, %xmm1, %xmm2, %xmm3
+ vinsertps $0xce, (%ecx), %xmm2, %xmm2
+ vinsertps $0xff, %xmm1, %xmm2, %xmm3
+
bt $15, %ax
bt $16, %ax
btc $15, %ax
diff --git a/gas/testsuite/gas/i386/optimize-1a.d b/gas/testsuite/gas/i386/optimize-1a.d
index aff3768..a59c616 100644
--- a/gas/testsuite/gas/i386/optimize-1a.d
+++ b/gas/testsuite/gas/i386/optimize-1a.d
@@ -191,6 +191,12 @@ Disassembly of section .text:
+[a-f0-9]+: c5 .* vmovupd %ymm1,\(%edx\)
+[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2
+[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\)
+ +[a-f0-9]+: f3 .* movss %xmm1,%xmm2
+ +[a-f0-9]+: f3 .* movss \(%ecx\),%xmm2
+ +[a-f0-9]+: 0f .* xorps %xmm2,%xmm2
+ +[a-f0-9]+: c5 .* vmovss %xmm1,%xmm2,%xmm3
+ +[a-f0-9]+: c5 .* vmovss \(%ecx\),%xmm2
+ +[a-f0-9]+: c5 .* vxorps %xmm3,%xmm3,%xmm3
+[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax
+[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax
+[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax
diff --git a/gas/testsuite/gas/i386/optimize-4.d b/gas/testsuite/gas/i386/optimize-4.d
index fbc142a..8d29aab 100644
--- a/gas/testsuite/gas/i386/optimize-4.d
+++ b/gas/testsuite/gas/i386/optimize-4.d
@@ -190,6 +190,12 @@ Disassembly of section .text:
+[a-f0-9]+: c5 .* vmovupd %ymm1,\(%edx\)
+[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2
+[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\)
+ +[a-f0-9]+: f3 .* movss %xmm1,%xmm2
+ +[a-f0-9]+: f3 .* movss \(%ecx\),%xmm2
+ +[a-f0-9]+: 0f .* xorps %xmm2,%xmm2
+ +[a-f0-9]+: c5 .* vmovss %xmm1,%xmm2,%xmm3
+ +[a-f0-9]+: c5 .* vmovss \(%ecx\),%xmm2
+ +[a-f0-9]+: c5 .* vxorps %xmm3,%xmm3,%xmm3
+[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax
+[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax
+[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax
diff --git a/gas/testsuite/gas/i386/optimize-5.d b/gas/testsuite/gas/i386/optimize-5.d
index a40022a..162a97f 100644
--- a/gas/testsuite/gas/i386/optimize-5.d
+++ b/gas/testsuite/gas/i386/optimize-5.d
@@ -190,6 +190,12 @@ Disassembly of section .text:
+[a-f0-9]+: c5 .* vmovupd %ymm1,\(%edx\)
+[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2
+[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\)
+ +[a-f0-9]+: f3 .* movss %xmm1,%xmm2
+ +[a-f0-9]+: f3 .* movss \(%ecx\),%xmm2
+ +[a-f0-9]+: 0f .* xorps %xmm2,%xmm2
+ +[a-f0-9]+: c5 .* vmovss %xmm1,%xmm2,%xmm3
+ +[a-f0-9]+: c5 .* vmovss \(%ecx\),%xmm2
+ +[a-f0-9]+: c5 .* vxorps %xmm3,%xmm3,%xmm3
+[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax
+[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax
+[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax
diff --git a/gas/testsuite/gas/i386/x86-64-optimize-insertps.d b/gas/testsuite/gas/i386/x86-64-optimize-insertps.d
new file mode 100644
index 0000000..97300d0
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-optimize-insertps.d
@@ -0,0 +1,26 @@
+#as: -O -msse2avx
+#objdump: -drw
+#name: x86-64 EXTRACTPS optimized encoding with -msse2avx
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+0+ <insertps>:
+ +[a-f0-9]+: c5 ea 10 d1 vmovss %xmm1,%xmm2,%xmm2
+ +[a-f0-9]+: c5 fa 10 11 vmovss \(%rcx\),%xmm2
+ +[a-f0-9]+: 62 f9 7e 08 10 11 vmovss \(%r17\),%xmm2
+ +[a-f0-9]+: c5 e8 57 d2 vxorps %xmm2,%xmm2,%xmm2
+ +[a-f0-9]+: c5 ea 10 d9 vmovss %xmm1,%xmm2,%xmm3
+ +[a-f0-9]+: c5 fa 10 11 vmovss \(%rcx\),%xmm2
+ +[a-f0-9]+: c5 e0 57 db vxorps %xmm3,%xmm3,%xmm3
+ +[a-f0-9]+: c5 e0 57 db vxorps %xmm3,%xmm3,%xmm3
+ +[a-f0-9]+: c5 e0 57 db vxorps %xmm3,%xmm3,%xmm3
+ +[a-f0-9]+: c5 60 57 db vxorps %xmm3,%xmm3,%xmm11
+ +[a-f0-9]+: 62 b1 6e 08 10 d9 vmovss %xmm17,%xmm2,%xmm3
+ +[a-f0-9]+: 62 e1 7e 08 10 11 vmovss \(%rcx\),%xmm18
+ +[a-f0-9]+: c5 e0 57 db vxorps %xmm3,%xmm3,%xmm3
+ +[a-f0-9]+: c5 e0 57 db vxorps %xmm3,%xmm3,%xmm3
+ +[a-f0-9]+: 62 a1 64 00 57 db vxorps %xmm19,%xmm19,%xmm19
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-optimize-insertps.l b/gas/testsuite/gas/i386/x86-64-optimize-insertps.l
new file mode 100644
index 0000000..08eb0c1
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-optimize-insertps.l
@@ -0,0 +1,26 @@
+.*: Assembler messages:
+.*:5: Error: .*
+[ ]*[0-9a-f]+[ ]+\.text
+[ ]*[0-9a-f]+[ ]+insertps:
+[ ]*[0-9a-f]+[ ]+\?\?\?\? F30F10D1[ ]+insertps \$0, %xmm1, %xmm2
+[ ]*[0-9a-f]+[ ]+\?\?\?\? F30F1011[ ]+insertps \$0xce, \(%rcx\), %xmm2
+[ ]*[0-9a-f]+[ ]+insertps \$0x0e, \(%r17\), %xmm2
+[ ]*[0-9a-f]+[ ]+\?\?\?\? 0F57D2[ ]+insertps \$0xff, %xmm1, %xmm2
+[ ]*[0-9a-f]+[ ]+
+[ ]*[0-9a-f]+[ ]+\?\?\?\? C5EA10D9[ ]+vinsertps \$0, %xmm1, %xmm2, %xmm3
+[ ]*[0-9a-f]+[ ]+\?\?\?\? C5FA1011[ ]+vinsertps \$0xce, \(%rcx\), %xmm2, %xmm2
+[ ]*[0-9a-f]+[ ]+\?\?\?\? C5E057DB[ ]+vinsertps \$0xff, %xmm1, %xmm2, %xmm3
+[ ]*[0-9a-f]+[ ]+
+[ ]*[0-9a-f]+[ ]+\?\?\?\? C5E057DB[ ]+vinsertps \$0xbf, %xmm9, %xmm2, %xmm3
+[ ]*[0-9a-f]+[ ]+\?\?\?\? C5E057DB[ ]+vinsertps \$0x7f, %xmm1, %xmm10, %xmm3
+[ ]*[0-9a-f]+[ ]+\?\?\?\? C56057DB[ ]+vinsertps \$0x3f, %xmm1, %xmm2, %xmm11
+[ ]*[0-9a-f]+[ ]+
+[ ]*[0-9a-f]+[ ]+\?\?\?\? 62B16E08[ ]+vinsertps \$0, %xmm17, %xmm2, %xmm3
+[ ]*[0-9a-f]+[ ]+10D9
+[ ]*[0-9a-f]+[ ]+\?\?\?\? 62E17E08[ ]+vinsertps \$0xce, \(%rcx\), %xmm18, %xmm18
+[ ]*[0-9a-f]+[ ]+1011
+[ ]*[0-9a-f]+[ ]+\?\?\?\? C5E057DB[ ]+vinsertps \$0xff, %xmm17, %xmm2, %xmm3
+[ ]*[0-9a-f]+[ ]+\?\?\?\? C5E057DB[ ]+vinsertps \$0xff, %xmm1, %xmm18, %xmm3
+[ ]*[0-9a-f]+[ ]+\?\?\?\? 62A16400[ ]+vinsertps \$0xff, %xmm1, %xmm2, %xmm19
+[ ]*[0-9a-f]+[ ]+57DB
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-optimize-insertps.s b/gas/testsuite/gas/i386/x86-64-optimize-insertps.s
new file mode 100644
index 0000000..98240ac
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-optimize-insertps.s
@@ -0,0 +1,20 @@
+ .text
+insertps:
+ insertps $0, %xmm1, %xmm2
+ insertps $0xce, (%rcx), %xmm2
+ insertps $0x0e, (%r17), %xmm2
+ insertps $0xff, %xmm1, %xmm2
+
+ vinsertps $0, %xmm1, %xmm2, %xmm3
+ vinsertps $0xce, (%rcx), %xmm2, %xmm2
+ vinsertps $0xff, %xmm1, %xmm2, %xmm3
+
+ vinsertps $0xbf, %xmm9, %xmm2, %xmm3
+ vinsertps $0x7f, %xmm1, %xmm10, %xmm3
+ vinsertps $0x3f, %xmm1, %xmm2, %xmm11
+
+ vinsertps $0, %xmm17, %xmm2, %xmm3
+ vinsertps $0xce, (%rcx), %xmm18, %xmm18
+ vinsertps $0xff, %xmm17, %xmm2, %xmm3
+ vinsertps $0xff, %xmm1, %xmm18, %xmm3
+ vinsertps $0xff, %xmm1, %xmm2, %xmm19
diff --git a/gas/testsuite/gas/i386/x86-64.exp b/gas/testsuite/gas/i386/x86-64.exp
index ad713dc..3ffeaaf 100644
--- a/gas/testsuite/gas/i386/x86-64.exp
+++ b/gas/testsuite/gas/i386/x86-64.exp
@@ -597,6 +597,8 @@ run_dump_test "x86-64-optimize-pextr"
run_list_test "x86-64-optimize-extractps" "-O -aln"
run_dump_test "x86-64-optimize-extractps"
run_dump_test "x86-64-optimize-vextractNN"
+run_list_test "x86-64-optimize-insertps" "-O -aln"
+run_dump_test "x86-64-optimize-insertps"
run_dump_test "x86-64-apx-ndd-optimize"
run_dump_test "x86-64-align-branch-1a"
run_dump_test "x86-64-align-branch-1b"
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 29289ca..0cd8538 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -1495,7 +1495,7 @@ extractps, 0x6617, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F3A|VexW0|Disp8MemShi
extractps, 0x6617, x64&(AVX|AVX512F), RegMem|Vex128|EVex128|Space0F3A|VexW1|NoSuf|SSE2AVX|Optimize, { Imm8, RegXMM, Reg64 }
extractps, 0x660f3a17, SSE4_1, Modrm|IgnoreSize|NoSuf|Optimize, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
extractps, 0x660f3a17, SSE4_1&x64, RegMem|NoSuf|Optimize|NoRex64, { Imm8, RegXMM, Reg64 }
-insertps<SSE41D>, 0x660f3a21, <SSE41D:cpu>, Modrm|<SSE41D:attr>|<SSE41D:vvvv>|Disp8MemShift|NoSuf, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+insertps<SSE41D>, 0x660f3a21, <SSE41D:cpu>, Modrm|<SSE41D:attr>|<SSE41D:vvvv>|Disp8MemShift|NoSuf|Optimize, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
movntdqa<SSE41D>, 0x660f382a, <SSE41D:cpu>, Modrm|<SSE41D:attr>|NoSuf, { Xmmword|Unspecified|BaseIndex, RegXMM }
mpsadbw<sse41>, 0x660f3a42, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
packusdw<SSE41BW>, 0x660f382b, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|<SSE41BW:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1672,7 +1672,7 @@ vhaddps, 0xf27c, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf,
vhsubpd, 0x667d, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
vhsubps, 0xf27d, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
vinsertf128, 0x6618, AVX, Modrm|Vex256|Space0F3A|Src1VVVV|VexW0|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegYMM, RegYMM }
-vinsertps, 0x6621, AVX, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|NoSuf, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+vinsertps, 0x6621, AVX, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|NoSuf|Optimize, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
vlddqu, 0xf2f0, AVX, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Xmmword|Ymmword|Unspecified|BaseIndex, RegXMM|RegYMM }
vldmxcsr, 0xae/2, AVX, Modrm|Vex128|Space0F|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex }
vmaskmovdqu, 0x66f7, AVX, Modrm|Vex|Space0F|VexWIG|NoSuf, { RegXMM, RegXMM }
@@ -2394,7 +2394,7 @@ vinserti32x4, 0x6638, AVX512F, Modrm|Masking|Space0F3A|Src1VVVV|VexW0|Disp8MemSh
vinsertf64x4, 0x661A, AVX512F, Modrm|EVex=1|Masking|Space0F3A|Src1VVVV|VexW1|Disp8MemShift=5|NoSuf, { Imm8, RegYMM|Unspecified|BaseIndex, RegZMM, RegZMM }
vinserti64x4, 0x663A, AVX512F, Modrm|EVex=1|Masking|Space0F3A|Src1VVVV|VexW1|Disp8MemShift=5|NoSuf, { Imm8, RegYMM|Unspecified|BaseIndex, RegZMM, RegZMM }
-vinsertps, 0x6621, AVX512F, Modrm|EVex128|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=2|NoSuf, { Imm8, RegXMM|Dword|Unspecified|BaseIndex, RegXMM, RegXMM }
+vinsertps, 0x6621, AVX512F, Modrm|EVex128|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=2|NoSuf|Optimize, { Imm8, RegXMM|Dword|Unspecified|BaseIndex, RegXMM, RegXMM }
vmovap<sd>, 0x<sd:ppfx>28, AVX512F, D|Modrm|Masking|Space0F|<sd:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
vmovntp<sd>, 0x<sd:ppfx>2B, AVX512F, Modrm|Space0F|<sd:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM, XMMword|YMMword|ZMMword|Unspecified|BaseIndex }
diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h
index 5e04c5d..4de9578 100644
--- a/opcodes/i386-tbl.h
+++ b/opcodes/i386-tbl.h
@@ -18062,7 +18062,7 @@ static const insn_template i386_optab[] =
0, 0, 0, 0, 0, 0 } } } },
{ MN_insertps, 0x21, 3, SPACE_0F3A, None,
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
- 0, 0, 0, 1, 1, 1, 1, 0, 1, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0,
+ 0, 0, 0, 1, 1, 1, 1, 0, 1, 2, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0,
0, 0 },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ { 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0 } },
@@ -18074,7 +18074,7 @@ static const insn_template i386_optab[] =
1, 0, 0, 0, 0, 0 } } } },
{ MN_insertps, 0x21, 3, SPACE_0F3A, None,
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+ 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
0, 0 },
{ { 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
@@ -26050,7 +26050,7 @@ static const insn_template i386_optab[] =
0, 1, 0, 0, 0, 0 } } } },
{ MN_vinsertps, 0x21, 4, SPACE_0F3A, None,
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
- 0, 0, 0, 1, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+ 0, 0, 0, 1, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
0, 0 },
{ { 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 } },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
@@ -26064,7 +26064,7 @@ static const insn_template i386_optab[] =
1, 0, 0, 0, 0, 0 } } } },
{ MN_vinsertps, 0x21, 4, SPACE_0F3A, None,
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0,
0, 0 },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 } },
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },