diff options
-rw-r--r-- | gas/ChangeLog | 7 | ||||
-rw-r--r-- | gas/config/tc-i386.c | 62 | ||||
-rw-r--r-- | gas/testsuite/ChangeLog | 9 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/i386.exp | 2 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/sse4_1.d | 102 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/sse4_1.s | 99 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-sse4_1.d | 110 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-sse4_1.s | 107 | ||||
-rw-r--r-- | opcodes/ChangeLog | 24 | ||||
-rw-r--r-- | opcodes/i386-dis.c | 560 | ||||
-rw-r--r-- | opcodes/i386-opc.c | 55 | ||||
-rw-r--r-- | opcodes/i386-opc.h | 8 |
12 files changed, 1074 insertions, 71 deletions
diff --git a/gas/ChangeLog b/gas/ChangeLog index 110f35b..05ccfc0 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,3 +1,10 @@ +2007-04-18 H.J. Lu <hongjiu.lu@intel.com> + + * config/tc-i386.c (cpu_arch): Add .sse4.1. + (process_operands): Adjust implicit operand for blendvpd, + blendvps and pblendvb in SSE4.1. + (output_insn): Support SSE4.1. + 2007-04-18 Paul Brook <paul@codesourcery.com> * config/tc-arm.c (do_t_rsb): Use 16-bit encoding when possible. diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index b70396d..6ee4010 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -498,6 +498,8 @@ static const arch_entry cpu_arch[] = CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3}, {".ssse3", PROCESSOR_UNKNOWN, CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3}, + {".sse4.1", PROCESSOR_UNKNOWN, + CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3|CpuSSE4_1}, {".3dnow", PROCESSOR_UNKNOWN, CpuMMX|Cpu3dnow}, {".3dnowa", PROCESSOR_UNKNOWN, @@ -3277,14 +3279,47 @@ process_operands (void) is converted into xor %reg, %reg. */ if (i.tm.opcode_modifier & regKludge) { - unsigned int first_reg_op = (i.types[0] & Reg) ? 0 : 1; - /* Pretend we saw the extra register operand. */ - assert (i.reg_operands == 1 - && i.op[first_reg_op + 1].regs == 0); - i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs; - i.types[first_reg_op + 1] = i.types[first_reg_op]; - i.operands++; - i.reg_operands++; + if ((i.tm.cpu_flags & CpuSSE4_1)) + { + /* The first operand in instruction blendvpd, blendvps and + pblendvb in SSE4.1 is implicit and must be xmm0. */ + assert (i.operands == 3 + && i.reg_operands >= 2 + && i.types[0] == RegXMM); + if (i.op[0].regs->reg_num != 0) + { + if (intel_syntax) + as_bad (_("the last operand of `%s' must be `%sxmm0'"), + i.tm.name, register_prefix); + else + as_bad (_("the first operand of `%s' must be `%sxmm0'"), + i.tm.name, register_prefix); + return 0; + } + i.op[0] = i.op[1]; + i.op[1] = i.op[2]; + i.types[0] = i.types[1]; + i.types[1] = i.types[2]; + i.operands--; + i.reg_operands--; + + /* We need to adjust fields in i.tm since they are used by + build_modrm_byte. */ + i.tm.operand_types [0] = i.tm.operand_types [1]; + i.tm.operand_types [1] = i.tm.operand_types [2]; + i.tm.operands--; + } + else + { + unsigned int first_reg_op = (i.types[0] & Reg) ? 0 : 1; + /* Pretend we saw the extra register operand. */ + assert (i.reg_operands == 1 + && i.op[first_reg_op + 1].regs == 0); + i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs; + i.types[first_reg_op + 1] = i.types[first_reg_op]; + i.operands++; + i.reg_operands++; + } } if (i.tm.opcode_modifier & ShortForm) @@ -3893,11 +3928,10 @@ output_insn (void) unsigned char *q; unsigned int prefix; - /* All opcodes on i386 have either 1 or 2 bytes. Supplemental - Streaming SIMD extensions 3 Instructions have 3 bytes. We may - use one more higher byte to specify a prefix the instruction - requires. */ - if ((i.tm.cpu_flags & CpuSSSE3) != 0) + /* All opcodes on i386 have either 1 or 2 bytes. SSSE3 and + SSE4.1 instructions have 3 bytes. We may use one more higher + byte to specify a prefix the instruction requires. */ + if ((i.tm.cpu_flags & (CpuSSSE3 | CpuSSE4_1)) != 0) { if (i.tm.base_opcode & 0xff000000) { @@ -3938,7 +3972,7 @@ output_insn (void) } else { - if ((i.tm.cpu_flags & CpuSSSE3) != 0) + if ((i.tm.cpu_flags & (CpuSSSE3 | CpuSSE4_1)) != 0) { p = frag_more (3); *p++ = (i.tm.base_opcode >> 16) & 0xff; diff --git a/gas/testsuite/ChangeLog b/gas/testsuite/ChangeLog index 8f6a390..0679212 100644 --- a/gas/testsuite/ChangeLog +++ b/gas/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2007-04-18 H.J. Lu <hongjiu.lu@intel.com> + + * gas/i386/i386.exp: Add sse4.1 and x86-64-sse4.1. + + * gas/i386/sse4_1.d: New file. + * gas/i386/sse4_1.s: Likewise. + * gas/i386/x86-64-sse4_1.d: Likewise. + * gas/i386/x86-64-sse4_1.s: Likewise. + 2007-04-18 Paul Brook <paul@codesourcery.com> * gas/arm/thumb2_add.s: Add rsb #0 test. diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp index cfef275..30053e5 100644 --- a/gas/testsuite/gas/i386/i386.exp +++ b/gas/testsuite/gas/i386/i386.exp @@ -86,6 +86,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_32_check]] run_dump_test "nops-3" run_dump_test "addr16" run_dump_test "addr32" + run_dump_test "sse4_1" # These tests require support for 8 and 16 bit relocs, # so we only run them for ELF and COFF targets. @@ -171,6 +172,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_64_check]] t run_dump_test "x86-64-nops-1-k8" run_dump_test "x86-64-nops-1-nocona" run_dump_test "x86-64-nops-1-merom" + run_dump_test "x86-64-sse4_1" if { ![istarget "*-*-aix*"] && ![istarget "*-*-beos*"] diff --git a/gas/testsuite/gas/i386/sse4_1.d b/gas/testsuite/gas/i386/sse4_1.d new file mode 100644 index 0000000..6797228 --- /dev/null +++ b/gas/testsuite/gas/i386/sse4_1.d @@ -0,0 +1,102 @@ +#objdump: -dw +#name: i386 SSE4.1 + +.*: file format .* + +Disassembly of section .text: + +0+000 <foo>: +[ ]*[0-9a-f]+: 66 0f 3a 0d 01 00 blendpd \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0d c1 00 blendpd \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0c 01 00 blendps \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0c c1 00 blendps \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 15 01 blendvpd %xmm0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 15 c1 blendvpd %xmm0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 14 01 blendvps %xmm0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 14 c1 blendvps %xmm0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 41 01 00 dppd \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 41 c1 00 dppd \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 40 01 00 dpps \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 40 c1 00 dpps \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 17 c1 00 extractps \$0x0,%xmm0,%ecx +[ ]*[0-9a-f]+: 66 0f 3a 17 01 00 extractps \$0x0,%xmm0,\(%ecx\) +[ ]*[0-9a-f]+: 66 0f 3a 21 c1 00 insertps \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 21 01 00 insertps \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 2a 01 movntdqa \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 42 01 00 mpsadbw \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 42 c1 00 mpsadbw \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 2b 01 packusdw \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 2b c1 packusdw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 10 01 pblendvb %xmm0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 10 c1 pblendvb %xmm0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0e 01 00 pblendw \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0e c1 00 pblendw \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 29 c1 pcmpeqq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 29 01 pcmpeqq \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 14 c1 00 pextrb \$0x0,%xmm0,%ecx +[ ]*[0-9a-f]+: 66 0f 3a 14 01 00 pextrb \$0x0,%xmm0,\(%ecx\) +[ ]*[0-9a-f]+: 66 0f 3a 16 c1 00 pextrd \$0x0,%xmm0,%ecx +[ ]*[0-9a-f]+: 66 0f 3a 16 01 00 pextrd \$0x0,%xmm0,\(%ecx\) +[ ]*[0-9a-f]+: 66 0f c5 c8 00 pextrw \$0x0,%xmm0,%ecx +[ ]*[0-9a-f]+: 66 0f 3a 15 01 00 pextrw \$0x0,%xmm0,\(%ecx\) +[ ]*[0-9a-f]+: 66 0f 38 41 c1 phminposuw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 41 01 phminposuw \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 20 01 00 pinsrb \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 20 c1 00 pinsrb \$0x0,%ecx,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 22 01 00 pinsrd \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 22 c1 00 pinsrd \$0x0,%ecx,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3c c1 pmaxsb %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3c 01 pmaxsb \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3d c1 pmaxsd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3d 01 pmaxsd \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3f c1 pmaxud %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3f 01 pmaxud \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3e c1 pmaxuw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3e 01 pmaxuw \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 38 c1 pminsb %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 38 01 pminsb \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 39 c1 pminsd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 39 01 pminsd \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3b c1 pminud %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3b 01 pminud \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3a c1 pminuw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3a 01 pminuw \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 20 c1 pmovsxbw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 20 01 pmovsxbw \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 21 c1 pmovsxbd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 21 01 pmovsxbd \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 22 c1 pmovsxbq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 22 01 pmovsxbq \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 23 c1 pmovsxwd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 23 01 pmovsxwd \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 24 c1 pmovsxwq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 24 01 pmovsxwq \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 25 c1 pmovsxdq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 25 01 pmovsxdq \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 30 c1 pmovzxbw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 30 01 pmovzxbw \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 31 c1 pmovzxbd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 31 01 pmovzxbd \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 32 c1 pmovzxbq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 32 01 pmovzxbq \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 33 c1 pmovzxwd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 33 01 pmovzxwd \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 34 c1 pmovzxwq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 34 01 pmovzxwq \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 35 c1 pmovzxdq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 35 01 pmovzxdq \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 28 c1 pmuldq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 28 01 pmuldq \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 40 c1 pmulld %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 40 01 pmulld \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 17 c1 ptest %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 17 01 ptest \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 09 01 00 roundpd \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 09 c1 00 roundpd \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 08 01 00 roundps \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 08 c1 00 roundps \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0b 01 00 roundsd \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0b c1 00 roundsd \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0a 01 00 roundss \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0a c1 00 roundss \$0x0,%xmm1,%xmm0 +#pass diff --git a/gas/testsuite/gas/i386/sse4_1.s b/gas/testsuite/gas/i386/sse4_1.s new file mode 100644 index 0000000..258dde8 --- /dev/null +++ b/gas/testsuite/gas/i386/sse4_1.s @@ -0,0 +1,99 @@ +# Streaming SIMD extensions 4.1 Instructions + + .text +foo: + blendpd $0,(%ecx),%xmm0 + blendpd $0,%xmm1,%xmm0 + blendps $0,(%ecx),%xmm0 + blendps $0,%xmm1,%xmm0 + blendvpd %xmm0,(%ecx),%xmm0 + blendvpd %xmm0,%xmm1,%xmm0 + blendvps %xmm0,(%ecx),%xmm0 + blendvps %xmm0,%xmm1,%xmm0 + dppd $0,(%ecx),%xmm0 + dppd $0,%xmm1,%xmm0 + dpps $0,(%ecx),%xmm0 + dpps $0,%xmm1,%xmm0 + extractps $0,%xmm0,%ecx + extractps $0,%xmm0,(%ecx) + insertps $0,%xmm1,%xmm0 + insertps $0,(%ecx),%xmm0 + movntdqa (%ecx),%xmm0 + mpsadbw $0,(%ecx),%xmm0 + mpsadbw $0,%xmm1,%xmm0 + packusdw (%ecx),%xmm0 + packusdw %xmm1,%xmm0 + pblendvb %xmm0,(%ecx),%xmm0 + pblendvb %xmm0,%xmm1,%xmm0 + pblendw $0,(%ecx),%xmm0 + pblendw $0,%xmm1,%xmm0 + pcmpeqq %xmm1,%xmm0 + pcmpeqq (%ecx),%xmm0 + pextrb $0,%xmm0,%ecx + pextrb $0,%xmm0,(%ecx) + pextrd $0,%xmm0,%ecx + pextrd $0,%xmm0,(%ecx) + pextrw $0,%xmm0,%ecx + pextrw $0,%xmm0,(%ecx) + phminposuw %xmm1,%xmm0 + phminposuw (%ecx),%xmm0 + pinsrb $0,(%ecx),%xmm0 + pinsrb $0,%ecx,%xmm0 + pinsrd $0,(%ecx),%xmm0 + pinsrd $0,%ecx,%xmm0 + pmaxsb %xmm1,%xmm0 + pmaxsb (%ecx),%xmm0 + pmaxsd %xmm1,%xmm0 + pmaxsd (%ecx),%xmm0 + pmaxud %xmm1,%xmm0 + pmaxud (%ecx),%xmm0 + pmaxuw %xmm1,%xmm0 + pmaxuw (%ecx),%xmm0 + pminsb %xmm1,%xmm0 + pminsb (%ecx),%xmm0 + pminsd %xmm1,%xmm0 + pminsd (%ecx),%xmm0 + pminud %xmm1,%xmm0 + pminud (%ecx),%xmm0 + pminuw %xmm1,%xmm0 + pminuw (%ecx),%xmm0 + pmovsxbw %xmm1,%xmm0 + pmovsxbw (%ecx),%xmm0 + pmovsxbd %xmm1,%xmm0 + pmovsxbd (%ecx),%xmm0 + pmovsxbq %xmm1,%xmm0 + pmovsxbq (%ecx),%xmm0 + pmovsxwd %xmm1,%xmm0 + pmovsxwd (%ecx),%xmm0 + pmovsxwq %xmm1,%xmm0 + pmovsxwq (%ecx),%xmm0 + pmovsxdq %xmm1,%xmm0 + pmovsxdq (%ecx),%xmm0 + pmovzxbw %xmm1,%xmm0 + pmovzxbw (%ecx),%xmm0 + pmovzxbd %xmm1,%xmm0 + pmovzxbd (%ecx),%xmm0 + pmovzxbq %xmm1,%xmm0 + pmovzxbq (%ecx),%xmm0 + pmovzxwd %xmm1,%xmm0 + pmovzxwd (%ecx),%xmm0 + pmovzxwq %xmm1,%xmm0 + pmovzxwq (%ecx),%xmm0 + pmovzxdq %xmm1,%xmm0 + pmovzxdq (%ecx),%xmm0 + pmuldq %xmm1,%xmm0 + pmuldq (%ecx),%xmm0 + pmulld %xmm1,%xmm0 + pmulld (%ecx),%xmm0 + ptest %xmm1,%xmm0 + ptest (%ecx),%xmm0 + roundpd $0,(%ecx),%xmm0 + roundpd $0,%xmm1,%xmm0 + roundps $0,(%ecx),%xmm0 + roundps $0,%xmm1,%xmm0 + roundsd $0,(%ecx),%xmm0 + roundsd $0,%xmm1,%xmm0 + roundss $0,(%ecx),%xmm0 + roundss $0,%xmm1,%xmm0 + + .p2align 4,0 diff --git a/gas/testsuite/gas/i386/x86-64-sse4_1.d b/gas/testsuite/gas/i386/x86-64-sse4_1.d new file mode 100644 index 0000000..6ed99e2 --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-sse4_1.d @@ -0,0 +1,110 @@ +#objdump: -dw +#name: x86-64 SSE4.1 + +.*: file format .* + +Disassembly of section .text: + +0+000 <foo>: +[ ]*[0-9a-f]+: 66 0f 3a 0d 01 00 blendpd \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0d c1 00 blendpd \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0c 01 00 blendps \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0c c1 00 blendps \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 15 01 blendvpd %xmm0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 15 c1 blendvpd %xmm0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 14 01 blendvps %xmm0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 14 c1 blendvps %xmm0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 41 01 00 dppd \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 41 c1 00 dppd \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 40 01 00 dpps \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 40 c1 00 dpps \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 48 0f 3a 17 c1 00 extractps \$0x0,%xmm0,%rcx +[ ]*[0-9a-f]+: 66 0f 3a 17 c1 00 extractps \$0x0,%xmm0,%ecx +[ ]*[0-9a-f]+: 66 0f 3a 17 01 00 extractps \$0x0,%xmm0,\(%rcx\) +[ ]*[0-9a-f]+: 66 0f 3a 21 c1 00 insertps \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 21 01 00 insertps \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 2a 01 movntdqa \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 42 01 00 mpsadbw \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 42 c1 00 mpsadbw \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 2b 01 packusdw \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 2b c1 packusdw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 10 01 pblendvb %xmm0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 10 c1 pblendvb %xmm0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0e 01 00 pblendw \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0e c1 00 pblendw \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 29 c1 pcmpeqq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 29 01 pcmpeqq \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 48 0f 3a 14 c1 00 pextrb \$0x0,%xmm0,%rcx +[ ]*[0-9a-f]+: 66 0f 3a 14 c1 00 pextrb \$0x0,%xmm0,%ecx +[ ]*[0-9a-f]+: 66 0f 3a 14 01 00 pextrb \$0x0,%xmm0,\(%rcx\) +[ ]*[0-9a-f]+: 66 0f 3a 16 c1 00 pextrd \$0x0,%xmm0,%ecx +[ ]*[0-9a-f]+: 66 0f 3a 16 01 00 pextrd \$0x0,%xmm0,\(%rcx\) +[ ]*[0-9a-f]+: 66 48 0f 3a 16 c1 00 pextrq \$0x0,%xmm0,%rcx +[ ]*[0-9a-f]+: 66 48 0f 3a 16 01 00 pextrq \$0x0,%xmm0,\(%rcx\) +[ ]*[0-9a-f]+: 66 48 0f c5 c8 00 pextrw \$0x0,%xmm0,%rcx +[ ]*[0-9a-f]+: 66 0f c5 c8 00 pextrw \$0x0,%xmm0,%ecx +[ ]*[0-9a-f]+: 66 0f 3a 15 01 00 pextrw \$0x0,%xmm0,\(%rcx\) +[ ]*[0-9a-f]+: 66 0f 38 41 c1 phminposuw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 41 01 phminposuw \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 20 01 00 pinsrb \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 20 c1 00 pinsrb \$0x0,%ecx,%xmm0 +[ ]*[0-9a-f]+: 66 48 0f 3a 20 c1 00 pinsrb \$0x0,%rcx,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 22 01 00 pinsrd \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 22 c1 00 pinsrd \$0x0,%ecx,%xmm0 +[ ]*[0-9a-f]+: 66 48 0f 3a 22 01 00 pinsrq \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 48 0f 3a 22 c1 00 pinsrq \$0x0,%rcx,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3c c1 pmaxsb %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3c 01 pmaxsb \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3d c1 pmaxsd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3d 01 pmaxsd \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3f c1 pmaxud %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3f 01 pmaxud \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3e c1 pmaxuw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3e 01 pmaxuw \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 38 c1 pminsb %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 38 01 pminsb \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 39 c1 pminsd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 39 01 pminsd \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3b c1 pminud %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3b 01 pminud \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3a c1 pminuw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3a 01 pminuw \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 20 c1 pmovsxbw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 20 01 pmovsxbw \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 21 c1 pmovsxbd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 21 01 pmovsxbd \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 22 c1 pmovsxbq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 22 01 pmovsxbq \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 23 c1 pmovsxwd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 23 01 pmovsxwd \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 24 c1 pmovsxwq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 24 01 pmovsxwq \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 25 c1 pmovsxdq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 25 01 pmovsxdq \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 30 c1 pmovzxbw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 30 01 pmovzxbw \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 31 c1 pmovzxbd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 31 01 pmovzxbd \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 32 c1 pmovzxbq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 32 01 pmovzxbq \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 33 c1 pmovzxwd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 33 01 pmovzxwd \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 34 c1 pmovzxwq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 34 01 pmovzxwq \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 35 c1 pmovzxdq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 35 01 pmovzxdq \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 28 c1 pmuldq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 28 01 pmuldq \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 40 c1 pmulld %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 40 01 pmulld \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 17 c1 ptest %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 17 01 ptest \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 09 01 00 roundpd \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 09 c1 00 roundpd \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 08 01 00 roundps \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 08 c1 00 roundps \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0b 01 00 roundsd \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0b c1 00 roundsd \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0a 01 00 roundss \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0a c1 00 roundss \$0x0,%xmm1,%xmm0 +#pass diff --git a/gas/testsuite/gas/i386/x86-64-sse4_1.s b/gas/testsuite/gas/i386/x86-64-sse4_1.s new file mode 100644 index 0000000..70c2394 --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-sse4_1.s @@ -0,0 +1,107 @@ +# Streaming SIMD extensions 4.1 Instructions + + .text +foo: + blendpd $0x0,(%rcx),%xmm0 + blendpd $0x0,%xmm1,%xmm0 + blendps $0x0,(%rcx),%xmm0 + blendps $0x0,%xmm1,%xmm0 + blendvpd %xmm0,(%rcx),%xmm0 + blendvpd %xmm0,%xmm1,%xmm0 + blendvps %xmm0,(%rcx),%xmm0 + blendvps %xmm0,%xmm1,%xmm0 + dppd $0x0,(%rcx),%xmm0 + dppd $0x0,%xmm1,%xmm0 + dpps $0x0,(%rcx),%xmm0 + dpps $0x0,%xmm1,%xmm0 + extractps $0x0,%xmm0,%rcx + extractps $0x0,%xmm0,%ecx + extractps $0x0,%xmm0,(%rcx) + insertps $0x0,%xmm1,%xmm0 + insertps $0x0,(%rcx),%xmm0 + movntdqa (%rcx),%xmm0 + mpsadbw $0x0,(%rcx),%xmm0 + mpsadbw $0x0,%xmm1,%xmm0 + packusdw (%rcx),%xmm0 + packusdw %xmm1,%xmm0 + pblendvb %xmm0,(%rcx),%xmm0 + pblendvb %xmm0,%xmm1,%xmm0 + pblendw $0x0,(%rcx),%xmm0 + pblendw $0x0,%xmm1,%xmm0 + pcmpeqq %xmm1,%xmm0 + pcmpeqq (%rcx),%xmm0 + pextrb $0x0,%xmm0,%rcx + pextrb $0x0,%xmm0,%ecx + pextrb $0x0,%xmm0,(%rcx) + pextrd $0x0,%xmm0,%ecx + pextrd $0x0,%xmm0,(%rcx) + pextrq $0x0,%xmm0,%rcx + pextrq $0x0,%xmm0,(%rcx) + pextrw $0x0,%xmm0,%rcx + pextrw $0x0,%xmm0,%ecx + pextrw $0x0,%xmm0,(%rcx) + phminposuw %xmm1,%xmm0 + phminposuw (%rcx),%xmm0 + pinsrb $0x0,(%rcx),%xmm0 + pinsrb $0x0,%ecx,%xmm0 + pinsrb $0x0,%rcx,%xmm0 + pinsrd $0x0,(%rcx),%xmm0 + pinsrd $0x0,%ecx,%xmm0 + pinsrq $0x0,(%rcx),%xmm0 + pinsrq $0x0,%rcx,%xmm0 + pmaxsb %xmm1,%xmm0 + pmaxsb (%rcx),%xmm0 + pmaxsd %xmm1,%xmm0 + pmaxsd (%rcx),%xmm0 + pmaxud %xmm1,%xmm0 + pmaxud (%rcx),%xmm0 + pmaxuw %xmm1,%xmm0 + pmaxuw (%rcx),%xmm0 + pminsb %xmm1,%xmm0 + pminsb (%rcx),%xmm0 + pminsd %xmm1,%xmm0 + pminsd (%rcx),%xmm0 + pminud %xmm1,%xmm0 + pminud (%rcx),%xmm0 + pminuw %xmm1,%xmm0 + pminuw (%rcx),%xmm0 + pmovsxbw %xmm1,%xmm0 + pmovsxbw (%rcx),%xmm0 + pmovsxbd %xmm1,%xmm0 + pmovsxbd (%rcx),%xmm0 + pmovsxbq %xmm1,%xmm0 + pmovsxbq (%rcx),%xmm0 + pmovsxwd %xmm1,%xmm0 + pmovsxwd (%rcx),%xmm0 + pmovsxwq %xmm1,%xmm0 + pmovsxwq (%rcx),%xmm0 + pmovsxdq %xmm1,%xmm0 + pmovsxdq (%rcx),%xmm0 + pmovzxbw %xmm1,%xmm0 + pmovzxbw (%rcx),%xmm0 + pmovzxbd %xmm1,%xmm0 + pmovzxbd (%rcx),%xmm0 + pmovzxbq %xmm1,%xmm0 + pmovzxbq (%rcx),%xmm0 + pmovzxwd %xmm1,%xmm0 + pmovzxwd (%rcx),%xmm0 + pmovzxwq %xmm1,%xmm0 + pmovzxwq (%rcx),%xmm0 + pmovzxdq %xmm1,%xmm0 + pmovzxdq (%rcx),%xmm0 + pmuldq %xmm1,%xmm0 + pmuldq (%rcx),%xmm0 + pmulld %xmm1,%xmm0 + pmulld (%rcx),%xmm0 + ptest %xmm1,%xmm0 + ptest (%rcx),%xmm0 + roundpd $0x0,(%rcx),%xmm0 + roundpd $0x0,%xmm1,%xmm0 + roundps $0x0,(%rcx),%xmm0 + roundps $0x0,%xmm1,%xmm0 + roundsd $0x0,(%rcx),%xmm0 + roundsd $0x0,%xmm1,%xmm0 + roundss $0x0,(%rcx),%xmm0 + roundss $0x0,%xmm1,%xmm0 + + .p2align 4,0 diff --git a/opcodes/ChangeLog b/opcodes/ChangeLog index e7acaca..e13bfd0 100644 --- a/opcodes/ChangeLog +++ b/opcodes/ChangeLog @@ -1,3 +1,27 @@ +2007-04-18 H.J. Lu <hongjiu.lu@intel.com> + + * i386-dis.c (XMM_Fixup): New. + (Edqb): New. + (Edqd): New. + (XMM0): New. + (dqb_mode): New. + (dqd_mode): New. + (PREGRP39 ... PREGRP85): New. + (threebyte_0x38_uses_DATA_prefix): Updated for SSE4. + (threebyte_0x3a_uses_DATA_prefix): Likewise. + (prefix_user_table): Add PREGRP39 ... PREGRP85. + (three_byte_table): Likewise. + (putop): Handle 'K'. + (intel_operand_size): Handle dqb_mode, dqd_mode): + (OP_E): Likewise. + (OP_G): Likewise. + + * i386-opc.c (i386_optab): Add SSE4.1 opcodes. + + * i386-opc.h (CpuSSE4_1): New. + (CpuUnknownFlags): Add CpuSSE4_1. + (regKludge): Update comment. + 2007-04-18 Matthias Klose <doko@ubuntu.com> * Makefile.am (libopcodes_la_LDFLAGS): Use bfd soversion. diff --git a/opcodes/i386-dis.c b/opcodes/i386-dis.c index 75a7403..aae7865 100644 --- a/opcodes/i386-dis.c +++ b/opcodes/i386-dis.c @@ -98,6 +98,7 @@ static void BadOp (void); static void VMX_Fixup (int, int); static void REP_Fixup (int, int); static void CMPXCHG8B_Fixup (int, int); +static void XMM_Fixup (int, int); struct dis_private { /* Points to first byte not fetched. */ @@ -200,6 +201,8 @@ fetch_data (struct disassemble_info *info, bfd_byte *addr) #define Ed { OP_E, d_mode } #define Edq { OP_E, dq_mode } #define Edqw { OP_E, dqw_mode } +#define Edqb { OP_E, dqb_mode } +#define Edqd { OP_E, dqd_mode } #define indirEv { OP_indirE, stack_v_mode } #define indirEp { OP_indirE, f_mode } #define stackEv { OP_E, stack_v_mode } @@ -309,6 +312,7 @@ fetch_data (struct disassemble_info *info, bfd_byte *addr) #define VM { OP_VMX, q_mode } #define OPSUF { OP_3DNowSuffix, 0 } #define OPSIMD { OP_SIMD_Suffix, 0 } +#define XMM0 { XMM_Fixup, 0 } /* Used handle "rep" prefix for string instructions. */ #define Xbr { REP_Fixup, eSI_reg } @@ -345,6 +349,8 @@ fetch_data (struct disassemble_info *info, bfd_byte *addr) #define stack_v_mode 15 /* v_mode for stack-related opcodes. */ #define z_mode 16 /* non-quad operand size depends on prefixes */ #define o_mode 17 /* 16-byte operand */ +#define dqb_mode 18 /* registers like dq_mode, memory like b_mode. */ +#define dqd_mode 19 /* registers like dq_mode, memory like d_mode. */ #define es_reg 100 #define cs_reg 101 @@ -468,6 +474,53 @@ fetch_data (struct disassemble_info *info, bfd_byte *addr) #define PREGRP36 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 36 } } #define PREGRP37 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 37 } } #define PREGRP38 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 38 } } +#define PREGRP39 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 39 } } +#define PREGRP40 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 40 } } +#define PREGRP41 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 41 } } +#define PREGRP42 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 42 } } +#define PREGRP43 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 43 } } +#define PREGRP44 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 44 } } +#define PREGRP45 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 45 } } +#define PREGRP46 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 46 } } +#define PREGRP47 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 47 } } +#define PREGRP48 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 48 } } +#define PREGRP49 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 49 } } +#define PREGRP50 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 50 } } +#define PREGRP51 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 51 } } +#define PREGRP52 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 52 } } +#define PREGRP53 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 53 } } +#define PREGRP54 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 54 } } +#define PREGRP55 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 55 } } +#define PREGRP56 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 56 } } +#define PREGRP57 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 57 } } +#define PREGRP58 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 58 } } +#define PREGRP59 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 59 } } +#define PREGRP60 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 60 } } +#define PREGRP61 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 61 } } +#define PREGRP62 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 62 } } +#define PREGRP63 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 63 } } +#define PREGRP64 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 64 } } +#define PREGRP65 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 65 } } +#define PREGRP66 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 66 } } +#define PREGRP67 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 67 } } +#define PREGRP68 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 68 } } +#define PREGRP69 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 69 } } +#define PREGRP70 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 70 } } +#define PREGRP71 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 71 } } +#define PREGRP72 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 72 } } +#define PREGRP73 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 73 } } +#define PREGRP74 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 74 } } +#define PREGRP75 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 75 } } +#define PREGRP76 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 76 } } +#define PREGRP77 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 77 } } +#define PREGRP78 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 78 } } +#define PREGRP79 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 79 } } +#define PREGRP80 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 80 } } +#define PREGRP81 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 81 } } +#define PREGRP82 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 82 } } +#define PREGRP83 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 83 } } +#define PREGRP84 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 84 } } +#define PREGRP85 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 85 } } #define X86_64_0 NULL, { { NULL, X86_64_SPECIAL }, { NULL, 0 } } @@ -503,6 +556,7 @@ struct dis386 { 'I' => honor following macro letter even in Intel mode (implemented only . for some of the macro letters) 'J' => print 'l' + 'K' => print 'd' or 'q' if rex prefix is present. 'L' => print 'l' if suffix_always is true 'N' => print 'n' if instruction has no wait "prefix" 'O' => print 'd' or 'o' (or 'q' in Intel mode) @@ -1231,10 +1285,10 @@ static const unsigned char threebyte_0x38_uses_DATA_prefix[256] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ------------------------------- */ /* 00 */ 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0, /* 0f */ - /* 10 */ 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0, /* 1f */ - /* 20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 2f */ - /* 30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 3f */ - /* 40 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 4f */ + /* 10 */ 0,0,0,0,1,1,0,1,0,0,0,0,1,1,1,0, /* 1f */ + /* 20 */ 1,1,1,1,1,1,0,0,1,1,1,1,0,0,0,0, /* 2f */ + /* 30 */ 1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1, /* 3f */ + /* 40 */ 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 4f */ /* 50 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 5f */ /* 60 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 6f */ /* 70 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 7f */ @@ -1302,11 +1356,11 @@ static const unsigned char threebyte_0x38_uses_REPZ_prefix[256] = { static const unsigned char threebyte_0x3a_uses_DATA_prefix[256] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ------------------------------- */ - /* 00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, /* 0f */ - /* 10 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 1f */ - /* 20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 2f */ + /* 00 */ 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1, /* 0f */ + /* 10 */ 0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0, /* 1f */ + /* 20 */ 1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 2f */ /* 30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 3f */ - /* 40 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 4f */ + /* 40 */ 1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 4f */ /* 50 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 5f */ /* 60 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 6f */ /* 70 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 7f */ @@ -2041,6 +2095,382 @@ static const struct dis386 prefix_user_table[][4] = { { "xchgS", { { NOP_Fixup1, eAX_reg }, { NOP_Fixup2, eAX_reg } } }, { "(bad)", { XX } }, }, + + /* PREGRP39 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pblendvb", {XM, EX, XMM0 } }, + { "(bad)", { XX } }, + }, + + /* PREGRP40 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "blendvps", {XM, EX, XMM0 } }, + { "(bad)", { XX } }, + }, + + /* PREGRP41 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "blendvpd", { XM, EX, XMM0 } }, + { "(bad)", { XX } }, + }, + + /* PREGRP42 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "ptest", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP43 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pmovsxbw", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP44 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pmovsxbd", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP45 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pmovsxbq", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP46 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pmovsxwd", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP47 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pmovsxwq", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP48 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pmovsxdq", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP49 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pmuldq", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP50 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pcmpeqq", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP51 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "movntdqa", { XM, EM } }, + { "(bad)", { XX } }, + }, + + /* PREGRP52 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "packusdw", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP53 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pmovzxbw", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP54 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pmovzxbd", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP55 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pmovzxbq", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP56 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pmovzxwd", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP57 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pmovzxwq", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP58 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pmovzxdq", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP59 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pminsb", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP60 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pminsd", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP61 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pminuw", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP62 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pminud", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP63 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pmaxsb", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP64 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pmaxsd", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP65 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pmaxuw", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP66 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pmaxud", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP67 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pmulld", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP68 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "phminposuw", { XM, EX } }, + { "(bad)", { XX } }, + }, + + /* PREGRP69 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "roundps", { XM, EX, Ib } }, + { "(bad)", { XX } }, + }, + + /* PREGRP70 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "roundpd", { XM, EX, Ib } }, + { "(bad)", { XX } }, + }, + + /* PREGRP71 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "roundss", { XM, EX, Ib } }, + { "(bad)", { XX } }, + }, + + /* PREGRP72 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "roundsd", { XM, EX, Ib } }, + { "(bad)", { XX } }, + }, + + /* PREGRP73 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "blendps", { XM, EX, Ib } }, + { "(bad)", { XX } }, + }, + + /* PREGRP74 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "blendpd", { XM, EX, Ib } }, + { "(bad)", { XX } }, + }, + + /* PREGRP75 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pblendw", { XM, EX, Ib } }, + { "(bad)", { XX } }, + }, + + /* PREGRP76 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pextrb", { Edqb, XM, Ib } }, + { "(bad)", { XX } }, + }, + + /* PREGRP77 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pextrw", { Edqw, XM, Ib } }, + { "(bad)", { XX } }, + }, + + /* PREGRP78 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pextrK", { Edq, XM, Ib } }, + { "(bad)", { XX } }, + }, + + /* PREGRP79 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "extractps", { Edqd, XM, Ib } }, + { "(bad)", { XX } }, + }, + + /* PREGRP80 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pinsrb", { XM, Edqb, Ib } }, + { "(bad)", { XX } }, + }, + + /* PREGRP81 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "insertps", { XM, EX, Ib } }, + { "(bad)", { XX } }, + }, + + /* PREGRP82 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "pinsrK", { XM, Edq, Ib } }, + { "(bad)", { XX } }, + }, + + /* PREGRP83 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "dpps", { XM, EX, Ib } }, + { "(bad)", { XX } }, + }, + + /* PREGRP84 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "dppd", { XM, EX, Ib } }, + { "(bad)", { XX } }, + }, + + /* PREGRP85 */ + { + { "(bad)", { XX } }, + { "(bad)", { XX } }, + { "mpsadbw", { XM, EX, Ib } }, + { "(bad)", { XX } }, + }, }; static const struct dis386 x86_64_table[][2] = { @@ -2084,14 +2514,14 @@ static const struct dis386 three_byte_table[][256] = { { "(bad)", { XX } }, { "(bad)", { XX } }, /* 10 */ + { PREGRP39 }, { "(bad)", { XX } }, { "(bad)", { XX } }, { "(bad)", { XX } }, + { PREGRP40 }, + { PREGRP41 }, { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, + { PREGRP42 }, /* 18 */ { "(bad)", { XX } }, { "(bad)", { XX } }, @@ -2102,44 +2532,44 @@ static const struct dis386 three_byte_table[][256] = { { "pabsd", { MX, EM } }, { "(bad)", { XX } }, /* 20 */ - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, + { PREGRP43 }, + { PREGRP44 }, + { PREGRP45 }, + { PREGRP46 }, + { PREGRP47 }, + { PREGRP48 }, { "(bad)", { XX } }, { "(bad)", { XX } }, /* 28 */ - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, + { PREGRP49 }, + { PREGRP50 }, + { PREGRP51 }, + { PREGRP52 }, { "(bad)", { XX } }, { "(bad)", { XX } }, { "(bad)", { XX } }, { "(bad)", { XX } }, /* 30 */ - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, + { PREGRP53 }, + { PREGRP54 }, + { PREGRP55 }, + { PREGRP56 }, + { PREGRP57 }, + { PREGRP58 }, { "(bad)", { XX } }, { "(bad)", { XX } }, /* 38 */ - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, + { PREGRP59 }, + { PREGRP60 }, + { PREGRP61 }, + { PREGRP62 }, + { PREGRP63 }, + { PREGRP64 }, + { PREGRP65 }, + { PREGRP66 }, /* 40 */ - { "(bad)", { XX } }, - { "(bad)", { XX } }, + { PREGRP67 }, + { PREGRP68 }, { "(bad)", { XX } }, { "(bad)", { XX } }, { "(bad)", { XX } }, @@ -2366,23 +2796,23 @@ static const struct dis386 three_byte_table[][256] = { { "(bad)", { XX } }, { "(bad)", { XX } }, /* 08 */ - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, + { PREGRP69 }, + { PREGRP70 }, + { PREGRP71 }, + { PREGRP72 }, + { PREGRP73 }, + { PREGRP74 }, + { PREGRP75 }, { "palignr", { MX, EM, Ib } }, /* 10 */ { "(bad)", { XX } }, { "(bad)", { XX } }, { "(bad)", { XX } }, { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, + { PREGRP76 }, + { PREGRP77 }, + { PREGRP78 }, + { PREGRP79 }, /* 18 */ { "(bad)", { XX } }, { "(bad)", { XX } }, @@ -2393,9 +2823,9 @@ static const struct dis386 three_byte_table[][256] = { { "(bad)", { XX } }, { "(bad)", { XX } }, /* 20 */ - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, + { PREGRP80 }, + { PREGRP81 }, + { PREGRP82 }, { "(bad)", { XX } }, { "(bad)", { XX } }, { "(bad)", { XX } }, @@ -2429,9 +2859,9 @@ static const struct dis386 three_byte_table[][256] = { { "(bad)", { XX } }, { "(bad)", { XX } }, /* 40 */ - { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, + { PREGRP83 }, + { PREGRP84 }, + { PREGRP85 }, { "(bad)", { XX } }, { "(bad)", { XX } }, { "(bad)", { XX } }, @@ -3839,6 +4269,13 @@ putop (const char *template, int sizeflag) break; *obufp++ = 'l'; break; + case 'K': + USED_REX (REX_W); + if (rex & REX_W) + *obufp++ = 'q'; + else + *obufp++ = 'd'; + break; case 'Z': if (intel_syntax) break; @@ -4128,6 +4565,7 @@ intel_operand_size (int bytemode, int sizeflag) switch (bytemode) { case b_mode: + case dqb_mode: oappend ("BYTE PTR "); break; case w_mode: @@ -4161,6 +4599,7 @@ intel_operand_size (int bytemode, int sizeflag) used_prefixes |= (prefixes & PREFIX_DATA); break; case d_mode: + case dqd_mode: oappend ("DWORD PTR "); break; case q_mode: @@ -4244,6 +4683,8 @@ OP_E (int bytemode, int sizeflag) /* FALLTHRU */ case v_mode: case dq_mode: + case dqb_mode: + case dqd_mode: case dqw_mode: USED_REX (REX_W); if (rex & REX_W) @@ -4498,6 +4939,8 @@ OP_G (int bytemode, int sizeflag) break; case v_mode: case dq_mode: + case dqb_mode: + case dqd_mode: case dqw_mode: USED_REX (REX_W); if (rex & REX_W) @@ -5797,3 +6240,10 @@ CMPXCHG8B_Fixup (int bytemode, int sizeflag) } OP_M (bytemode, sizeflag); } + +static void +XMM_Fixup (int reg, int sizeflag ATTRIBUTE_UNUSED) +{ + sprintf (scratchbuf, "%%xmm%d", reg); + oappend (scratchbuf + intel_syntax); +} diff --git a/opcodes/i386-opc.c b/opcodes/i386-opc.c index 7d9d44f..b1b62a6 100644 --- a/opcodes/i386-opc.c +++ b/opcodes/i386-opc.c @@ -1167,6 +1167,10 @@ const template i386_optab[] = {"pavgw", 2, 0x660fe3, X, CpuSSE2,NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, {"pextrw", 3, 0x0fc5, X, CpuMMX2,lq_Suf|IgnoreSize|Modrm, { Imm8, RegMMX, Reg32|Reg64 } }, {"pextrw", 3, 0x660fc5, X, CpuSSE2,lq_Suf|IgnoreSize|Modrm, { Imm8, RegXMM, Reg32|Reg64 } }, + +/* Streaming SIMD extensions 4.1 Instructions. */ +{"pextrw", 3, 0x660f3a15,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM, Reg32|Reg64|ShortMem } }, + {"pinsrw", 3, 0x0fc4, X, CpuMMX2,lq_Suf|IgnoreSize|Modrm, { Imm8, Reg32|Reg64|ShortMem, RegMMX } }, {"pinsrw", 3, 0x660fc4, X, CpuSSE2,lq_Suf|IgnoreSize|Modrm, { Imm8, Reg32|Reg64|ShortMem, RegXMM } }, {"pmaxsw", 2, 0x0fee, X, CpuMMX2,NoSuf|IgnoreSize|Modrm, { RegMMX|LLongMem, RegMMX, 0 } }, @@ -1380,6 +1384,57 @@ const template i386_optab[] = {"pabsd", 2, 0x0f381e,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } }, {"pabsd", 2, 0x660f381e,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +/* Streaming SIMD extensions 4.1 Instructions. */ + +{"blendpd", 3, 0x660f3a0d,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM|LLongMem, RegXMM } }, +{"blendps", 3, 0x660f3a0c,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM|LLongMem, RegXMM } }, +{"blendvpd", 3, 0x660f3815,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm|regKludge, { RegXMM, RegXMM|LLongMem, RegXMM } }, +{"blendvps", 3, 0x660f3814,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm|regKludge, { RegXMM, RegXMM|LLongMem, RegXMM } }, +{"dppd", 3, 0x660f3a41,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM|LLongMem, RegXMM } }, +{"dpps", 3, 0x660f3a40,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM|LLongMem, RegXMM } }, +{"extractps",3, 0x660f3a17,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM, Reg32|Reg64|LongMem } }, +{"insertps", 3, 0x660f3a21,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM|LongMem, RegXMM } }, +{"movntdqa", 2, 0x660f382a,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { LLongMem, RegXMM, 0 } }, +{"mpsadbw", 3, 0x660f3a42,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM|LLongMem, RegXMM } }, +{"packusdw", 2, 0x660f382b,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"pblendvb", 3, 0x660f3810,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm|regKludge, { RegXMM, RegXMM|LLongMem, RegXMM } }, +{"pblendw", 3, 0x660f3a0e,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM|LLongMem, RegXMM } }, +{"pcmpeqq", 2, 0x660f3829,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"pextrb", 3, 0x660f3a14,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM, Reg32|Reg64|ByteMem } }, +{"pextrd", 3, 0x660f3a16,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM, Reg32|LongMem } }, +{"pextrq", 3, 0x660f3a16,X, CpuSSE4_1|Cpu64, NoSuf|IgnoreSize|Modrm|Size64, { Imm8, RegXMM, Reg64|LLongMem } }, +{"phminposuw",2, 0x660f3841,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"pinsrb", 3, 0x660f3a20,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { Imm8, Reg32|Reg64|ByteMem, RegXMM } }, +{"pinsrd", 3, 0x660f3a22,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { Imm8, Reg32|LongMem, RegXMM } }, +{"pinsrq", 3, 0x660f3a22,X, CpuSSE4_1|Cpu64, NoSuf|IgnoreSize|Modrm|Size64, { Imm8, Reg64|LLongMem, RegXMM } }, +{"pmaxsb", 2, 0x660f383c,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"pmaxsd", 2, 0x660f383d,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"pmaxud", 2, 0x660f383f,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"pmaxuw", 2, 0x660f383e,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"pminsb", 2, 0x660f3838,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"pminsd", 2, 0x660f3839,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"pminud", 2, 0x660f383b,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"pminuw", 2, 0x660f383a,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"pmovsxbw", 2, 0x660f3820,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"pmovsxbd", 2, 0x660f3821,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LongMem, RegXMM, 0 } }, +{"pmovsxbq", 2, 0x660f3822,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|ShortMem, RegXMM, 0 } }, +{"pmovsxwd", 2, 0x660f3823,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"pmovsxwq", 2, 0x660f3824,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LongMem, RegXMM, 0 } }, +{"pmovsxdq", 2, 0x660f3825,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"pmovzxbw", 2, 0x660f3830,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"pmovzxbd", 2, 0x660f3831,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LongMem, RegXMM, 0 } }, +{"pmovzxbq", 2, 0x660f3832,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|ShortMem, RegXMM, 0 } }, +{"pmovzxwd", 2, 0x660f3833,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"pmovzxwq", 2, 0x660f3834,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LongMem, RegXMM, 0 } }, +{"pmovzxdq", 2, 0x660f3835,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"pmuldq", 2, 0x660f3828,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"pmulld", 2, 0x660f3840,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"ptest", 2, 0x660f3817,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } }, +{"roundpd", 3, 0x660f3a09,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM|LLongMem, RegXMM } }, +{"roundps", 3, 0x660f3a08,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM|LLongMem, RegXMM } }, +{"roundsd", 3, 0x660f3a0b,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM|LLongMem, RegXMM } }, +{"roundss", 3, 0x660f3a0a,X, CpuSSE4_1, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM|LongMem, RegXMM } }, + /* AMD 3DNow! instructions. */ {"prefetch", 1, 0x0f0d, 0, Cpu3dnow, NoSuf|IgnoreSize|Modrm, { ByteMem, 0, 0 } }, diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h index 8e0a842..94ef037 100644 --- a/opcodes/i386-opc.h +++ b/opcodes/i386-opc.h @@ -69,6 +69,7 @@ typedef struct template #define CpuSSSE3 0x80000 /* Supplemental Streaming SIMD extensions 3 required */ #define CpuSSE4a 0x100000 /* SSE4a New Instuctions required */ #define CpuABM 0x200000 /* ABM New Instructions required */ +#define CpuSSE4_1 0x400000 /* SSE4.1 Instructions required */ /* These flags are set by gas depending on the flag_code. */ #define Cpu64 0x4000000 /* 64bit support required */ @@ -77,7 +78,8 @@ typedef struct template /* The default value for unknown CPUs - enable all features to avoid problems. */ #define CpuUnknownFlags (Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686 \ |CpuP4|CpuSledgehammer|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuVMX \ - |Cpu3dnow|Cpu3dnowA|CpuK6|CpuPadLock|CpuSVME|CpuSSSE3|CpuABM|CpuSSE4a) + |Cpu3dnow|Cpu3dnowA|CpuK6|CpuPadLock|CpuSVME|CpuSSSE3|CpuSSE4_1 \ + |CpuABM|CpuSSE4a) /* the bits in opcode_modifier are used to generate the final opcode from the base_opcode. These bits also are used to detect alternate forms of @@ -110,7 +112,9 @@ typedef struct template #define No_xSuf 0x800000 /* x suffix on instruction illegal */ #define FWait 0x1000000 /* instruction needs FWAIT */ #define IsString 0x2000000 /* quick test for string instructions */ -#define regKludge 0x4000000 /* fake an extra reg operand for clr, imul */ +#define regKludge 0x4000000 /* fake an extra reg operand for clr, imul + and special register processing for + some instructions. */ #define IsPrefix 0x8000000 /* opcode is a prefix */ #define ImmExt 0x10000000 /* instruction has extension in 8 bit imm */ #define NoRex64 0x20000000 /* instruction don't need Rex64 prefix. */ |