diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2007-04-18 16:13:15 +0000 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2007-04-18 16:13:15 +0000 |
commit | 42903f7f5903fc4a27294aab1e23708c59a86b17 (patch) | |
tree | 5e5e5c86c160605d02c3bd5d9663b0faf1776893 /gas | |
parent | 026d3abbb22d4ac067a57d0a9aa2b2482266c9e7 (diff) | |
download | gdb-42903f7f5903fc4a27294aab1e23708c59a86b17.zip gdb-42903f7f5903fc4a27294aab1e23708c59a86b17.tar.gz gdb-42903f7f5903fc4a27294aab1e23708c59a86b17.tar.bz2 |
gas/
2007-04-18 H.J. Lu <hongjiu.lu@intel.com>
* config/tc-i386.c (cpu_arch): Add .sse4.1.
(process_operands): Adjust implicit operand for blendvpd,
blendvps and pblendvb in SSE4.1.
(output_insn): Support SSE4.1.
gas/testsuite/
2007-04-18 H.J. Lu <hongjiu.lu@intel.com>
* gas/i386/i386.exp: Add sse4.1 and x86-64-sse4.1.
* gas/i386/sse4_1.d: New file.
* gas/i386/sse4_1.s: Likewise.
* gas/i386/x86-64-sse4_1.d: Likewise.
* gas/i386/x86-64-sse4_1.s: Likewise.
opcodes/
2007-04-18 H.J. Lu <hongjiu.lu@intel.com>
* i386-dis.c (XMM_Fixup): New.
(Edqb): New.
(Edqd): New.
(XMM0): New.
(dqb_mode): New.
(dqd_mode): New.
(PREGRP39 ... PREGRP85): New.
(threebyte_0x38_uses_DATA_prefix): Updated for SSE4.
(threebyte_0x3a_uses_DATA_prefix): Likewise.
(prefix_user_table): Add PREGRP39 ... PREGRP85.
(three_byte_table): Likewise.
(putop): Handle 'K'.
(intel_operand_size): Handle dqb_mode, dqd_mode):
(OP_E): Likewise.
(OP_G): Likewise.
* i386-opc.c (i386_optab): Add SSE4.1 opcodes.
* i386-opc.h (CpuSSE4_1): New.
(CpuUnknownFlags): Add CpuSSE4_1.
(regKludge): Update comment.
Diffstat (limited to 'gas')
-rw-r--r-- | gas/ChangeLog | 7 | ||||
-rw-r--r-- | gas/config/tc-i386.c | 62 | ||||
-rw-r--r-- | gas/testsuite/ChangeLog | 9 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/i386.exp | 2 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/sse4_1.d | 102 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/sse4_1.s | 99 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-sse4_1.d | 110 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-sse4_1.s | 107 |
8 files changed, 484 insertions, 14 deletions
diff --git a/gas/ChangeLog b/gas/ChangeLog index 110f35b..05ccfc0 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,3 +1,10 @@ +2007-04-18 H.J. Lu <hongjiu.lu@intel.com> + + * config/tc-i386.c (cpu_arch): Add .sse4.1. + (process_operands): Adjust implicit operand for blendvpd, + blendvps and pblendvb in SSE4.1. + (output_insn): Support SSE4.1. + 2007-04-18 Paul Brook <paul@codesourcery.com> * config/tc-arm.c (do_t_rsb): Use 16-bit encoding when possible. diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index b70396d..6ee4010 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -498,6 +498,8 @@ static const arch_entry cpu_arch[] = CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3}, {".ssse3", PROCESSOR_UNKNOWN, CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3}, + {".sse4.1", PROCESSOR_UNKNOWN, + CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3|CpuSSE4_1}, {".3dnow", PROCESSOR_UNKNOWN, CpuMMX|Cpu3dnow}, {".3dnowa", PROCESSOR_UNKNOWN, @@ -3277,14 +3279,47 @@ process_operands (void) is converted into xor %reg, %reg. */ if (i.tm.opcode_modifier & regKludge) { - unsigned int first_reg_op = (i.types[0] & Reg) ? 0 : 1; - /* Pretend we saw the extra register operand. */ - assert (i.reg_operands == 1 - && i.op[first_reg_op + 1].regs == 0); - i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs; - i.types[first_reg_op + 1] = i.types[first_reg_op]; - i.operands++; - i.reg_operands++; + if ((i.tm.cpu_flags & CpuSSE4_1)) + { + /* The first operand in instruction blendvpd, blendvps and + pblendvb in SSE4.1 is implicit and must be xmm0. */ + assert (i.operands == 3 + && i.reg_operands >= 2 + && i.types[0] == RegXMM); + if (i.op[0].regs->reg_num != 0) + { + if (intel_syntax) + as_bad (_("the last operand of `%s' must be `%sxmm0'"), + i.tm.name, register_prefix); + else + as_bad (_("the first operand of `%s' must be `%sxmm0'"), + i.tm.name, register_prefix); + return 0; + } + i.op[0] = i.op[1]; + i.op[1] = i.op[2]; + i.types[0] = i.types[1]; + i.types[1] = i.types[2]; + i.operands--; + i.reg_operands--; + + /* We need to adjust fields in i.tm since they are used by + build_modrm_byte. */ + i.tm.operand_types [0] = i.tm.operand_types [1]; + i.tm.operand_types [1] = i.tm.operand_types [2]; + i.tm.operands--; + } + else + { + unsigned int first_reg_op = (i.types[0] & Reg) ? 0 : 1; + /* Pretend we saw the extra register operand. */ + assert (i.reg_operands == 1 + && i.op[first_reg_op + 1].regs == 0); + i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs; + i.types[first_reg_op + 1] = i.types[first_reg_op]; + i.operands++; + i.reg_operands++; + } } if (i.tm.opcode_modifier & ShortForm) @@ -3893,11 +3928,10 @@ output_insn (void) unsigned char *q; unsigned int prefix; - /* All opcodes on i386 have either 1 or 2 bytes. Supplemental - Streaming SIMD extensions 3 Instructions have 3 bytes. We may - use one more higher byte to specify a prefix the instruction - requires. */ - if ((i.tm.cpu_flags & CpuSSSE3) != 0) + /* All opcodes on i386 have either 1 or 2 bytes. SSSE3 and + SSE4.1 instructions have 3 bytes. We may use one more higher + byte to specify a prefix the instruction requires. */ + if ((i.tm.cpu_flags & (CpuSSSE3 | CpuSSE4_1)) != 0) { if (i.tm.base_opcode & 0xff000000) { @@ -3938,7 +3972,7 @@ output_insn (void) } else { - if ((i.tm.cpu_flags & CpuSSSE3) != 0) + if ((i.tm.cpu_flags & (CpuSSSE3 | CpuSSE4_1)) != 0) { p = frag_more (3); *p++ = (i.tm.base_opcode >> 16) & 0xff; diff --git a/gas/testsuite/ChangeLog b/gas/testsuite/ChangeLog index 8f6a390..0679212 100644 --- a/gas/testsuite/ChangeLog +++ b/gas/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2007-04-18 H.J. Lu <hongjiu.lu@intel.com> + + * gas/i386/i386.exp: Add sse4.1 and x86-64-sse4.1. + + * gas/i386/sse4_1.d: New file. + * gas/i386/sse4_1.s: Likewise. + * gas/i386/x86-64-sse4_1.d: Likewise. + * gas/i386/x86-64-sse4_1.s: Likewise. + 2007-04-18 Paul Brook <paul@codesourcery.com> * gas/arm/thumb2_add.s: Add rsb #0 test. diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp index cfef275..30053e5 100644 --- a/gas/testsuite/gas/i386/i386.exp +++ b/gas/testsuite/gas/i386/i386.exp @@ -86,6 +86,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_32_check]] run_dump_test "nops-3" run_dump_test "addr16" run_dump_test "addr32" + run_dump_test "sse4_1" # These tests require support for 8 and 16 bit relocs, # so we only run them for ELF and COFF targets. @@ -171,6 +172,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_64_check]] t run_dump_test "x86-64-nops-1-k8" run_dump_test "x86-64-nops-1-nocona" run_dump_test "x86-64-nops-1-merom" + run_dump_test "x86-64-sse4_1" if { ![istarget "*-*-aix*"] && ![istarget "*-*-beos*"] diff --git a/gas/testsuite/gas/i386/sse4_1.d b/gas/testsuite/gas/i386/sse4_1.d new file mode 100644 index 0000000..6797228 --- /dev/null +++ b/gas/testsuite/gas/i386/sse4_1.d @@ -0,0 +1,102 @@ +#objdump: -dw +#name: i386 SSE4.1 + +.*: file format .* + +Disassembly of section .text: + +0+000 <foo>: +[ ]*[0-9a-f]+: 66 0f 3a 0d 01 00 blendpd \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0d c1 00 blendpd \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0c 01 00 blendps \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0c c1 00 blendps \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 15 01 blendvpd %xmm0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 15 c1 blendvpd %xmm0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 14 01 blendvps %xmm0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 14 c1 blendvps %xmm0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 41 01 00 dppd \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 41 c1 00 dppd \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 40 01 00 dpps \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 40 c1 00 dpps \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 17 c1 00 extractps \$0x0,%xmm0,%ecx +[ ]*[0-9a-f]+: 66 0f 3a 17 01 00 extractps \$0x0,%xmm0,\(%ecx\) +[ ]*[0-9a-f]+: 66 0f 3a 21 c1 00 insertps \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 21 01 00 insertps \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 2a 01 movntdqa \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 42 01 00 mpsadbw \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 42 c1 00 mpsadbw \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 2b 01 packusdw \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 2b c1 packusdw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 10 01 pblendvb %xmm0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 10 c1 pblendvb %xmm0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0e 01 00 pblendw \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0e c1 00 pblendw \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 29 c1 pcmpeqq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 29 01 pcmpeqq \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 14 c1 00 pextrb \$0x0,%xmm0,%ecx +[ ]*[0-9a-f]+: 66 0f 3a 14 01 00 pextrb \$0x0,%xmm0,\(%ecx\) +[ ]*[0-9a-f]+: 66 0f 3a 16 c1 00 pextrd \$0x0,%xmm0,%ecx +[ ]*[0-9a-f]+: 66 0f 3a 16 01 00 pextrd \$0x0,%xmm0,\(%ecx\) +[ ]*[0-9a-f]+: 66 0f c5 c8 00 pextrw \$0x0,%xmm0,%ecx +[ ]*[0-9a-f]+: 66 0f 3a 15 01 00 pextrw \$0x0,%xmm0,\(%ecx\) +[ ]*[0-9a-f]+: 66 0f 38 41 c1 phminposuw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 41 01 phminposuw \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 20 01 00 pinsrb \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 20 c1 00 pinsrb \$0x0,%ecx,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 22 01 00 pinsrd \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 22 c1 00 pinsrd \$0x0,%ecx,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3c c1 pmaxsb %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3c 01 pmaxsb \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3d c1 pmaxsd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3d 01 pmaxsd \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3f c1 pmaxud %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3f 01 pmaxud \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3e c1 pmaxuw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3e 01 pmaxuw \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 38 c1 pminsb %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 38 01 pminsb \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 39 c1 pminsd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 39 01 pminsd \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3b c1 pminud %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3b 01 pminud \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3a c1 pminuw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3a 01 pminuw \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 20 c1 pmovsxbw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 20 01 pmovsxbw \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 21 c1 pmovsxbd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 21 01 pmovsxbd \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 22 c1 pmovsxbq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 22 01 pmovsxbq \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 23 c1 pmovsxwd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 23 01 pmovsxwd \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 24 c1 pmovsxwq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 24 01 pmovsxwq \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 25 c1 pmovsxdq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 25 01 pmovsxdq \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 30 c1 pmovzxbw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 30 01 pmovzxbw \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 31 c1 pmovzxbd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 31 01 pmovzxbd \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 32 c1 pmovzxbq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 32 01 pmovzxbq \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 33 c1 pmovzxwd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 33 01 pmovzxwd \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 34 c1 pmovzxwq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 34 01 pmovzxwq \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 35 c1 pmovzxdq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 35 01 pmovzxdq \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 28 c1 pmuldq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 28 01 pmuldq \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 40 c1 pmulld %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 40 01 pmulld \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 17 c1 ptest %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 17 01 ptest \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 09 01 00 roundpd \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 09 c1 00 roundpd \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 08 01 00 roundps \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 08 c1 00 roundps \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0b 01 00 roundsd \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0b c1 00 roundsd \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0a 01 00 roundss \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0a c1 00 roundss \$0x0,%xmm1,%xmm0 +#pass diff --git a/gas/testsuite/gas/i386/sse4_1.s b/gas/testsuite/gas/i386/sse4_1.s new file mode 100644 index 0000000..258dde8 --- /dev/null +++ b/gas/testsuite/gas/i386/sse4_1.s @@ -0,0 +1,99 @@ +# Streaming SIMD extensions 4.1 Instructions + + .text +foo: + blendpd $0,(%ecx),%xmm0 + blendpd $0,%xmm1,%xmm0 + blendps $0,(%ecx),%xmm0 + blendps $0,%xmm1,%xmm0 + blendvpd %xmm0,(%ecx),%xmm0 + blendvpd %xmm0,%xmm1,%xmm0 + blendvps %xmm0,(%ecx),%xmm0 + blendvps %xmm0,%xmm1,%xmm0 + dppd $0,(%ecx),%xmm0 + dppd $0,%xmm1,%xmm0 + dpps $0,(%ecx),%xmm0 + dpps $0,%xmm1,%xmm0 + extractps $0,%xmm0,%ecx + extractps $0,%xmm0,(%ecx) + insertps $0,%xmm1,%xmm0 + insertps $0,(%ecx),%xmm0 + movntdqa (%ecx),%xmm0 + mpsadbw $0,(%ecx),%xmm0 + mpsadbw $0,%xmm1,%xmm0 + packusdw (%ecx),%xmm0 + packusdw %xmm1,%xmm0 + pblendvb %xmm0,(%ecx),%xmm0 + pblendvb %xmm0,%xmm1,%xmm0 + pblendw $0,(%ecx),%xmm0 + pblendw $0,%xmm1,%xmm0 + pcmpeqq %xmm1,%xmm0 + pcmpeqq (%ecx),%xmm0 + pextrb $0,%xmm0,%ecx + pextrb $0,%xmm0,(%ecx) + pextrd $0,%xmm0,%ecx + pextrd $0,%xmm0,(%ecx) + pextrw $0,%xmm0,%ecx + pextrw $0,%xmm0,(%ecx) + phminposuw %xmm1,%xmm0 + phminposuw (%ecx),%xmm0 + pinsrb $0,(%ecx),%xmm0 + pinsrb $0,%ecx,%xmm0 + pinsrd $0,(%ecx),%xmm0 + pinsrd $0,%ecx,%xmm0 + pmaxsb %xmm1,%xmm0 + pmaxsb (%ecx),%xmm0 + pmaxsd %xmm1,%xmm0 + pmaxsd (%ecx),%xmm0 + pmaxud %xmm1,%xmm0 + pmaxud (%ecx),%xmm0 + pmaxuw %xmm1,%xmm0 + pmaxuw (%ecx),%xmm0 + pminsb %xmm1,%xmm0 + pminsb (%ecx),%xmm0 + pminsd %xmm1,%xmm0 + pminsd (%ecx),%xmm0 + pminud %xmm1,%xmm0 + pminud (%ecx),%xmm0 + pminuw %xmm1,%xmm0 + pminuw (%ecx),%xmm0 + pmovsxbw %xmm1,%xmm0 + pmovsxbw (%ecx),%xmm0 + pmovsxbd %xmm1,%xmm0 + pmovsxbd (%ecx),%xmm0 + pmovsxbq %xmm1,%xmm0 + pmovsxbq (%ecx),%xmm0 + pmovsxwd %xmm1,%xmm0 + pmovsxwd (%ecx),%xmm0 + pmovsxwq %xmm1,%xmm0 + pmovsxwq (%ecx),%xmm0 + pmovsxdq %xmm1,%xmm0 + pmovsxdq (%ecx),%xmm0 + pmovzxbw %xmm1,%xmm0 + pmovzxbw (%ecx),%xmm0 + pmovzxbd %xmm1,%xmm0 + pmovzxbd (%ecx),%xmm0 + pmovzxbq %xmm1,%xmm0 + pmovzxbq (%ecx),%xmm0 + pmovzxwd %xmm1,%xmm0 + pmovzxwd (%ecx),%xmm0 + pmovzxwq %xmm1,%xmm0 + pmovzxwq (%ecx),%xmm0 + pmovzxdq %xmm1,%xmm0 + pmovzxdq (%ecx),%xmm0 + pmuldq %xmm1,%xmm0 + pmuldq (%ecx),%xmm0 + pmulld %xmm1,%xmm0 + pmulld (%ecx),%xmm0 + ptest %xmm1,%xmm0 + ptest (%ecx),%xmm0 + roundpd $0,(%ecx),%xmm0 + roundpd $0,%xmm1,%xmm0 + roundps $0,(%ecx),%xmm0 + roundps $0,%xmm1,%xmm0 + roundsd $0,(%ecx),%xmm0 + roundsd $0,%xmm1,%xmm0 + roundss $0,(%ecx),%xmm0 + roundss $0,%xmm1,%xmm0 + + .p2align 4,0 diff --git a/gas/testsuite/gas/i386/x86-64-sse4_1.d b/gas/testsuite/gas/i386/x86-64-sse4_1.d new file mode 100644 index 0000000..6ed99e2 --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-sse4_1.d @@ -0,0 +1,110 @@ +#objdump: -dw +#name: x86-64 SSE4.1 + +.*: file format .* + +Disassembly of section .text: + +0+000 <foo>: +[ ]*[0-9a-f]+: 66 0f 3a 0d 01 00 blendpd \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0d c1 00 blendpd \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0c 01 00 blendps \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0c c1 00 blendps \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 15 01 blendvpd %xmm0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 15 c1 blendvpd %xmm0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 14 01 blendvps %xmm0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 14 c1 blendvps %xmm0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 41 01 00 dppd \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 41 c1 00 dppd \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 40 01 00 dpps \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 40 c1 00 dpps \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 48 0f 3a 17 c1 00 extractps \$0x0,%xmm0,%rcx +[ ]*[0-9a-f]+: 66 0f 3a 17 c1 00 extractps \$0x0,%xmm0,%ecx +[ ]*[0-9a-f]+: 66 0f 3a 17 01 00 extractps \$0x0,%xmm0,\(%rcx\) +[ ]*[0-9a-f]+: 66 0f 3a 21 c1 00 insertps \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 21 01 00 insertps \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 2a 01 movntdqa \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 42 01 00 mpsadbw \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 42 c1 00 mpsadbw \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 2b 01 packusdw \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 2b c1 packusdw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 10 01 pblendvb %xmm0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 10 c1 pblendvb %xmm0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0e 01 00 pblendw \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0e c1 00 pblendw \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 29 c1 pcmpeqq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 29 01 pcmpeqq \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 48 0f 3a 14 c1 00 pextrb \$0x0,%xmm0,%rcx +[ ]*[0-9a-f]+: 66 0f 3a 14 c1 00 pextrb \$0x0,%xmm0,%ecx +[ ]*[0-9a-f]+: 66 0f 3a 14 01 00 pextrb \$0x0,%xmm0,\(%rcx\) +[ ]*[0-9a-f]+: 66 0f 3a 16 c1 00 pextrd \$0x0,%xmm0,%ecx +[ ]*[0-9a-f]+: 66 0f 3a 16 01 00 pextrd \$0x0,%xmm0,\(%rcx\) +[ ]*[0-9a-f]+: 66 48 0f 3a 16 c1 00 pextrq \$0x0,%xmm0,%rcx +[ ]*[0-9a-f]+: 66 48 0f 3a 16 01 00 pextrq \$0x0,%xmm0,\(%rcx\) +[ ]*[0-9a-f]+: 66 48 0f c5 c8 00 pextrw \$0x0,%xmm0,%rcx +[ ]*[0-9a-f]+: 66 0f c5 c8 00 pextrw \$0x0,%xmm0,%ecx +[ ]*[0-9a-f]+: 66 0f 3a 15 01 00 pextrw \$0x0,%xmm0,\(%rcx\) +[ ]*[0-9a-f]+: 66 0f 38 41 c1 phminposuw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 41 01 phminposuw \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 20 01 00 pinsrb \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 20 c1 00 pinsrb \$0x0,%ecx,%xmm0 +[ ]*[0-9a-f]+: 66 48 0f 3a 20 c1 00 pinsrb \$0x0,%rcx,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 22 01 00 pinsrd \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 22 c1 00 pinsrd \$0x0,%ecx,%xmm0 +[ ]*[0-9a-f]+: 66 48 0f 3a 22 01 00 pinsrq \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 48 0f 3a 22 c1 00 pinsrq \$0x0,%rcx,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3c c1 pmaxsb %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3c 01 pmaxsb \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3d c1 pmaxsd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3d 01 pmaxsd \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3f c1 pmaxud %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3f 01 pmaxud \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3e c1 pmaxuw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3e 01 pmaxuw \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 38 c1 pminsb %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 38 01 pminsb \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 39 c1 pminsd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 39 01 pminsd \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3b c1 pminud %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3b 01 pminud \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3a c1 pminuw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 3a 01 pminuw \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 20 c1 pmovsxbw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 20 01 pmovsxbw \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 21 c1 pmovsxbd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 21 01 pmovsxbd \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 22 c1 pmovsxbq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 22 01 pmovsxbq \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 23 c1 pmovsxwd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 23 01 pmovsxwd \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 24 c1 pmovsxwq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 24 01 pmovsxwq \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 25 c1 pmovsxdq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 25 01 pmovsxdq \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 30 c1 pmovzxbw %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 30 01 pmovzxbw \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 31 c1 pmovzxbd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 31 01 pmovzxbd \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 32 c1 pmovzxbq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 32 01 pmovzxbq \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 33 c1 pmovzxwd %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 33 01 pmovzxwd \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 34 c1 pmovzxwq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 34 01 pmovzxwq \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 35 c1 pmovzxdq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 35 01 pmovzxdq \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 28 c1 pmuldq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 28 01 pmuldq \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 40 c1 pmulld %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 40 01 pmulld \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 17 c1 ptest %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 17 01 ptest \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 09 01 00 roundpd \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 09 c1 00 roundpd \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 08 01 00 roundps \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 08 c1 00 roundps \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0b 01 00 roundsd \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0b c1 00 roundsd \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0a 01 00 roundss \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 0a c1 00 roundss \$0x0,%xmm1,%xmm0 +#pass diff --git a/gas/testsuite/gas/i386/x86-64-sse4_1.s b/gas/testsuite/gas/i386/x86-64-sse4_1.s new file mode 100644 index 0000000..70c2394 --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-sse4_1.s @@ -0,0 +1,107 @@ +# Streaming SIMD extensions 4.1 Instructions + + .text +foo: + blendpd $0x0,(%rcx),%xmm0 + blendpd $0x0,%xmm1,%xmm0 + blendps $0x0,(%rcx),%xmm0 + blendps $0x0,%xmm1,%xmm0 + blendvpd %xmm0,(%rcx),%xmm0 + blendvpd %xmm0,%xmm1,%xmm0 + blendvps %xmm0,(%rcx),%xmm0 + blendvps %xmm0,%xmm1,%xmm0 + dppd $0x0,(%rcx),%xmm0 + dppd $0x0,%xmm1,%xmm0 + dpps $0x0,(%rcx),%xmm0 + dpps $0x0,%xmm1,%xmm0 + extractps $0x0,%xmm0,%rcx + extractps $0x0,%xmm0,%ecx + extractps $0x0,%xmm0,(%rcx) + insertps $0x0,%xmm1,%xmm0 + insertps $0x0,(%rcx),%xmm0 + movntdqa (%rcx),%xmm0 + mpsadbw $0x0,(%rcx),%xmm0 + mpsadbw $0x0,%xmm1,%xmm0 + packusdw (%rcx),%xmm0 + packusdw %xmm1,%xmm0 + pblendvb %xmm0,(%rcx),%xmm0 + pblendvb %xmm0,%xmm1,%xmm0 + pblendw $0x0,(%rcx),%xmm0 + pblendw $0x0,%xmm1,%xmm0 + pcmpeqq %xmm1,%xmm0 + pcmpeqq (%rcx),%xmm0 + pextrb $0x0,%xmm0,%rcx + pextrb $0x0,%xmm0,%ecx + pextrb $0x0,%xmm0,(%rcx) + pextrd $0x0,%xmm0,%ecx + pextrd $0x0,%xmm0,(%rcx) + pextrq $0x0,%xmm0,%rcx + pextrq $0x0,%xmm0,(%rcx) + pextrw $0x0,%xmm0,%rcx + pextrw $0x0,%xmm0,%ecx + pextrw $0x0,%xmm0,(%rcx) + phminposuw %xmm1,%xmm0 + phminposuw (%rcx),%xmm0 + pinsrb $0x0,(%rcx),%xmm0 + pinsrb $0x0,%ecx,%xmm0 + pinsrb $0x0,%rcx,%xmm0 + pinsrd $0x0,(%rcx),%xmm0 + pinsrd $0x0,%ecx,%xmm0 + pinsrq $0x0,(%rcx),%xmm0 + pinsrq $0x0,%rcx,%xmm0 + pmaxsb %xmm1,%xmm0 + pmaxsb (%rcx),%xmm0 + pmaxsd %xmm1,%xmm0 + pmaxsd (%rcx),%xmm0 + pmaxud %xmm1,%xmm0 + pmaxud (%rcx),%xmm0 + pmaxuw %xmm1,%xmm0 + pmaxuw (%rcx),%xmm0 + pminsb %xmm1,%xmm0 + pminsb (%rcx),%xmm0 + pminsd %xmm1,%xmm0 + pminsd (%rcx),%xmm0 + pminud %xmm1,%xmm0 + pminud (%rcx),%xmm0 + pminuw %xmm1,%xmm0 + pminuw (%rcx),%xmm0 + pmovsxbw %xmm1,%xmm0 + pmovsxbw (%rcx),%xmm0 + pmovsxbd %xmm1,%xmm0 + pmovsxbd (%rcx),%xmm0 + pmovsxbq %xmm1,%xmm0 + pmovsxbq (%rcx),%xmm0 + pmovsxwd %xmm1,%xmm0 + pmovsxwd (%rcx),%xmm0 + pmovsxwq %xmm1,%xmm0 + pmovsxwq (%rcx),%xmm0 + pmovsxdq %xmm1,%xmm0 + pmovsxdq (%rcx),%xmm0 + pmovzxbw %xmm1,%xmm0 + pmovzxbw (%rcx),%xmm0 + pmovzxbd %xmm1,%xmm0 + pmovzxbd (%rcx),%xmm0 + pmovzxbq %xmm1,%xmm0 + pmovzxbq (%rcx),%xmm0 + pmovzxwd %xmm1,%xmm0 + pmovzxwd (%rcx),%xmm0 + pmovzxwq %xmm1,%xmm0 + pmovzxwq (%rcx),%xmm0 + pmovzxdq %xmm1,%xmm0 + pmovzxdq (%rcx),%xmm0 + pmuldq %xmm1,%xmm0 + pmuldq (%rcx),%xmm0 + pmulld %xmm1,%xmm0 + pmulld (%rcx),%xmm0 + ptest %xmm1,%xmm0 + ptest (%rcx),%xmm0 + roundpd $0x0,(%rcx),%xmm0 + roundpd $0x0,%xmm1,%xmm0 + roundps $0x0,(%rcx),%xmm0 + roundps $0x0,%xmm1,%xmm0 + roundsd $0x0,(%rcx),%xmm0 + roundsd $0x0,%xmm1,%xmm0 + roundss $0x0,(%rcx),%xmm0 + roundss $0x0,%xmm1,%xmm0 + + .p2align 4,0 |