From 381d071fc5599e06d72f6f75395fb0ffe5bd531c Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Wed, 18 Apr 2007 16:15:55 +0000 Subject: gas/ 2007-04-18 H.J. Lu * config/tc-i386.c (cpu_arch): Add .sse4.2 and .sse4. (match_template): Handle operand size for crc32 in SSE4.2. (process_suffix): Handle operand type for crc32 in SSE4.2. (output_insn): Support SSE4.2. gas/testsuite/ 2007-04-18 H.J. Lu * gas/i386/i386.exp: Add sse4.2 and x86-64-sse4.2. * gas/i386/sse4_2.d: New file. * gas/i386/sse4_2.s: Likewise. * gas/i386/x86-64-sse4_2.d: Likewise. * gas/i386/x86-64-sse4_2.s: Likewise. opcodes/ 2007-04-18 H.J. Lu * i386-dis.c (CRC32_Fixup): New. (PREGRP85, PREGRP86, PREGRP87, PREGRP88, PREGRP89, PREGRP90, PREGRP91): New. (threebyte_0x38_uses_DATA_prefix): Updated for SSE4.2. (threebyte_0x3a_uses_DATA_prefix): Likewise. (prefix_user_table): Add PREGRP85, PREGRP86, PREGRP87, PREGRP88, PREGRP89, PREGRP90 and PREGRP91. (three_byte_table): Likewise. * i386-opc.c (i386_optab): Add SSE4.2 opcodes. * gas/config/tc-i386.h (CpuSSE4_2): New. (CpuSSE4): Likewise. (CpuUnknownFlags): Add CpuSSE4_2. --- gas/ChangeLog | 7 +++++ gas/config/tc-i386.c | 53 +++++++++++++++++++++++----------- gas/testsuite/ChangeLog | 9 ++++++ gas/testsuite/gas/i386/i386.exp | 2 ++ gas/testsuite/gas/i386/sse4_2.d | 37 ++++++++++++++++++++++++ gas/testsuite/gas/i386/sse4_2.s | 34 ++++++++++++++++++++++ gas/testsuite/gas/i386/x86-64-sse4_2.d | 47 ++++++++++++++++++++++++++++++ gas/testsuite/gas/i386/x86-64-sse4_2.s | 44 ++++++++++++++++++++++++++++ 8 files changed, 216 insertions(+), 17 deletions(-) create mode 100644 gas/testsuite/gas/i386/sse4_2.d create mode 100644 gas/testsuite/gas/i386/sse4_2.s create mode 100644 gas/testsuite/gas/i386/x86-64-sse4_2.d create mode 100644 gas/testsuite/gas/i386/x86-64-sse4_2.s (limited to 'gas') diff --git a/gas/ChangeLog b/gas/ChangeLog index 05ccfc0..8d5a08f 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,5 +1,12 @@ 2007-04-18 H.J. Lu + * config/tc-i386.c (cpu_arch): Add .sse4.2 and .sse4. + (match_template): Handle operand size for crc32 in SSE4.2. + (process_suffix): Handle operand type for crc32 in SSE4.2. + (output_insn): Support SSE4.2. + +2007-04-18 H.J. Lu + * config/tc-i386.c (cpu_arch): Add .sse4.1. (process_operands): Adjust implicit operand for blendvpd, blendvps and pblendvb in SSE4.1. diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 6ee4010..0b60387 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -500,6 +500,10 @@ static const arch_entry cpu_arch[] = CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3}, {".sse4.1", PROCESSOR_UNKNOWN, CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3|CpuSSE4_1}, + {".sse4.2", PROCESSOR_UNKNOWN, + CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3|CpuSSE4}, + {".sse4", PROCESSOR_UNKNOWN, + CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3|CpuSSE4}, {".3dnow", PROCESSOR_UNKNOWN, CpuMMX|Cpu3dnow}, {".3dnowa", PROCESSOR_UNKNOWN, @@ -2640,9 +2644,10 @@ match_template (void) || !MATCH (overlap1, i.types[1], operand_types[1]) /* monitor in SSE3 is a very special case. The first register and the second register may have different - sizes. */ + sizes. The same applies to crc32 in SSE4.2. */ || !((t->base_opcode == 0x0f01 && t->extension_opcode == 0xc8) + || t->base_opcode == 0xf20f38f1 || CONSISTENT_REGISTER_MATCH (overlap0, i.types[0], operand_types[0], overlap1, i.types[1], @@ -2829,19 +2834,30 @@ process_suffix (void) { /* We take i.suffix from the last register operand specified, Destination register type is more significant than source - register type. */ - int op; - - for (op = i.operands; --op >= 0;) - if ((i.types[op] & Reg) - && !(i.tm.operand_types[op] & InOutPortReg)) - { - i.suffix = ((i.types[op] & Reg8) ? BYTE_MNEM_SUFFIX : - (i.types[op] & Reg16) ? WORD_MNEM_SUFFIX : - (i.types[op] & Reg64) ? QWORD_MNEM_SUFFIX : + register type. crc32 in SSE4.2 prefers source register + type. */ + if (i.tm.base_opcode == 0xf20f38f1) + { + if ((i.types[0] & Reg)) + i.suffix = ((i.types[0] & Reg16) ? WORD_MNEM_SUFFIX : LONG_MNEM_SUFFIX); - break; - } + } + + if (!i.suffix) + { + int op; + + for (op = i.operands; --op >= 0;) + if ((i.types[op] & Reg) + && !(i.tm.operand_types[op] & InOutPortReg)) + { + i.suffix = ((i.types[op] & Reg8) ? BYTE_MNEM_SUFFIX : + (i.types[op] & Reg16) ? WORD_MNEM_SUFFIX : + (i.types[op] & Reg64) ? QWORD_MNEM_SUFFIX : + LONG_MNEM_SUFFIX); + break; + } + } } else if (i.suffix == BYTE_MNEM_SUFFIX) { @@ -3929,9 +3945,11 @@ output_insn (void) unsigned int prefix; /* All opcodes on i386 have either 1 or 2 bytes. SSSE3 and - SSE4.1 instructions have 3 bytes. We may use one more higher - byte to specify a prefix the instruction requires. */ - if ((i.tm.cpu_flags & (CpuSSSE3 | CpuSSE4_1)) != 0) + SSE4 instructions have 3 bytes. We may use one more higher + byte to specify a prefix the instruction requires. Exclude + instructions which are in both SSE4 and ABM. */ + if ((i.tm.cpu_flags & (CpuSSSE3 | CpuSSE4)) != 0 + && (i.tm.cpu_flags & CpuABM) == 0) { if (i.tm.base_opcode & 0xff000000) { @@ -3972,7 +3990,8 @@ output_insn (void) } else { - if ((i.tm.cpu_flags & (CpuSSSE3 | CpuSSE4_1)) != 0) + if ((i.tm.cpu_flags & (CpuSSSE3 | CpuSSE4)) != 0 + && (i.tm.cpu_flags & CpuABM) == 0) { p = frag_more (3); *p++ = (i.tm.base_opcode >> 16) & 0xff; diff --git a/gas/testsuite/ChangeLog b/gas/testsuite/ChangeLog index 0679212..263fde4 100644 --- a/gas/testsuite/ChangeLog +++ b/gas/testsuite/ChangeLog @@ -1,5 +1,14 @@ 2007-04-18 H.J. Lu + * gas/i386/i386.exp: Add sse4.2 and x86-64-sse4.2. + + * gas/i386/sse4_2.d: New file. + * gas/i386/sse4_2.s: Likewise. + * gas/i386/x86-64-sse4_2.d: Likewise. + * gas/i386/x86-64-sse4_2.s: Likewise. + +2007-04-18 H.J. Lu + * gas/i386/i386.exp: Add sse4.1 and x86-64-sse4.1. * gas/i386/sse4_1.d: New file. diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp index 30053e5..e701a8d 100644 --- a/gas/testsuite/gas/i386/i386.exp +++ b/gas/testsuite/gas/i386/i386.exp @@ -87,6 +87,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_32_check]] run_dump_test "addr16" run_dump_test "addr32" run_dump_test "sse4_1" + run_dump_test "sse4_2" # These tests require support for 8 and 16 bit relocs, # so we only run them for ELF and COFF targets. @@ -173,6 +174,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_64_check]] t run_dump_test "x86-64-nops-1-nocona" run_dump_test "x86-64-nops-1-merom" run_dump_test "x86-64-sse4_1" + run_dump_test "x86-64-sse4_2" if { ![istarget "*-*-aix*"] && ![istarget "*-*-beos*"] diff --git a/gas/testsuite/gas/i386/sse4_2.d b/gas/testsuite/gas/i386/sse4_2.d new file mode 100644 index 0000000..e6468b3 --- /dev/null +++ b/gas/testsuite/gas/i386/sse4_2.d @@ -0,0 +1,37 @@ +#objdump: -dw +#name: i386 SSE4.2 + +.*: file format .* + +Disassembly of section .text: + +0+000 : +[ ]*[0-9a-f]+: f2 0f 38 f1 19 crc32l \(%ecx\),%ebx +[ ]*[0-9a-f]+: f2 0f 38 f0 d9 crc32b %cl,%ebx +[ ]*[0-9a-f]+: 66 f2 0f 38 f1 d9 crc32w %cx,%ebx +[ ]*[0-9a-f]+: f2 0f 38 f1 d9 crc32l %ecx,%ebx +[ ]*[0-9a-f]+: f2 0f 38 f0 19 crc32b \(%ecx\),%ebx +[ ]*[0-9a-f]+: 66 f2 0f 38 f1 19 crc32w \(%ecx\),%ebx +[ ]*[0-9a-f]+: f2 0f 38 f1 19 crc32l \(%ecx\),%ebx +[ ]*[0-9a-f]+: f2 0f 38 f0 d9 crc32b %cl,%ebx +[ ]*[0-9a-f]+: 66 f2 0f 38 f1 d9 crc32w %cx,%ebx +[ ]*[0-9a-f]+: f2 0f 38 f1 d9 crc32l %ecx,%ebx +[ ]*[0-9a-f]+: 66 0f 38 37 01 pcmpgtq \(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 37 c1 pcmpgtq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 61 01 00 pcmpestri \$0x0,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 61 c1 00 pcmpestri \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 60 01 01 pcmpestrm \$0x1,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 60 c1 01 pcmpestrm \$0x1,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 63 01 02 pcmpistri \$0x2,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 63 c1 02 pcmpistri \$0x2,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 62 01 03 pcmpistrm \$0x3,\(%ecx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 62 c1 03 pcmpistrm \$0x3,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 f3 0f b8 19 popcnt \(%ecx\),%bx +[ ]*[0-9a-f]+: f3 0f b8 19 popcnt \(%ecx\),%ebx +[ ]*[0-9a-f]+: 66 f3 0f b8 19 popcnt \(%ecx\),%bx +[ ]*[0-9a-f]+: f3 0f b8 19 popcnt \(%ecx\),%ebx +[ ]*[0-9a-f]+: 66 f3 0f b8 d9 popcnt %cx,%bx +[ ]*[0-9a-f]+: f3 0f b8 d9 popcnt %ecx,%ebx +[ ]*[0-9a-f]+: 66 f3 0f b8 d9 popcnt %cx,%bx +[ ]*[0-9a-f]+: f3 0f b8 d9 popcnt %ecx,%ebx +#pass diff --git a/gas/testsuite/gas/i386/sse4_2.s b/gas/testsuite/gas/i386/sse4_2.s new file mode 100644 index 0000000..584757c --- /dev/null +++ b/gas/testsuite/gas/i386/sse4_2.s @@ -0,0 +1,34 @@ +# Streaming SIMD extensions 4.2 Instructions + + .text +foo: + crc32 (%ecx),%ebx + crc32 %cl,%ebx + crc32 %cx,%ebx + crc32 %ecx,%ebx + crc32b (%ecx),%ebx + crc32w (%ecx),%ebx + crc32l (%ecx),%ebx + crc32b %cl,%ebx + crc32w %cx,%ebx + crc32l %ecx,%ebx + pcmpgtq (%ecx),%xmm0 + pcmpgtq %xmm1,%xmm0 + pcmpestri $0x0,(%ecx),%xmm0 + pcmpestri $0x0,%xmm1,%xmm0 + pcmpestrm $0x1,(%ecx),%xmm0 + pcmpestrm $0x1,%xmm1,%xmm0 + pcmpistri $0x2,(%ecx),%xmm0 + pcmpistri $0x2,%xmm1,%xmm0 + pcmpistrm $0x3,(%ecx),%xmm0 + pcmpistrm $0x3,%xmm1,%xmm0 + popcnt (%ecx),%bx + popcnt (%ecx),%ebx + popcntw (%ecx),%bx + popcntl (%ecx),%ebx + popcnt %cx,%bx + popcnt %ecx,%ebx + popcntw %cx,%bx + popcntl %ecx,%ebx + + .p2align 4,0 diff --git a/gas/testsuite/gas/i386/x86-64-sse4_2.d b/gas/testsuite/gas/i386/x86-64-sse4_2.d new file mode 100644 index 0000000..b541f74 --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-sse4_2.d @@ -0,0 +1,47 @@ +#objdump: -dw +#name: x86-64 SSE4.2 + +.*: file format .* + +Disassembly of section .text: + +0+000 : +[ ]*[0-9a-f]+: f2 0f 38 f1 19 crc32l \(%rcx\),%ebx +[ ]*[0-9a-f]+: f2 48 0f 38 f1 19 crc32q \(%rcx\),%rbx +[ ]*[0-9a-f]+: f2 0f 38 f0 d9 crc32b %cl,%ebx +[ ]*[0-9a-f]+: f2 48 0f 38 f0 d9 crc32b %cl,%rbx +[ ]*[0-9a-f]+: 66 f2 0f 38 f1 d9 crc32w %cx,%ebx +[ ]*[0-9a-f]+: f2 0f 38 f1 d9 crc32l %ecx,%ebx +[ ]*[0-9a-f]+: f2 48 0f 38 f1 d9 crc32q %rcx,%rbx +[ ]*[0-9a-f]+: f2 0f 38 f0 19 crc32b \(%rcx\),%ebx +[ ]*[0-9a-f]+: 66 f2 0f 38 f1 19 crc32w \(%rcx\),%ebx +[ ]*[0-9a-f]+: f2 0f 38 f1 19 crc32l \(%rcx\),%ebx +[ ]*[0-9a-f]+: f2 48 0f 38 f1 19 crc32q \(%rcx\),%rbx +[ ]*[0-9a-f]+: f2 0f 38 f0 d9 crc32b %cl,%ebx +[ ]*[0-9a-f]+: f2 48 0f 38 f0 d9 crc32b %cl,%rbx +[ ]*[0-9a-f]+: 66 f2 0f 38 f1 d9 crc32w %cx,%ebx +[ ]*[0-9a-f]+: f2 0f 38 f1 d9 crc32l %ecx,%ebx +[ ]*[0-9a-f]+: f2 48 0f 38 f1 d9 crc32q %rcx,%rbx +[ ]*[0-9a-f]+: 66 0f 38 37 01 pcmpgtq \(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 38 37 c1 pcmpgtq %xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 61 01 00 pcmpestri \$0x0,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 61 c1 00 pcmpestri \$0x0,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 60 01 01 pcmpestrm \$0x1,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 60 c1 01 pcmpestrm \$0x1,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 63 01 02 pcmpistri \$0x2,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 63 c1 02 pcmpistri \$0x2,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 62 01 03 pcmpistrm \$0x3,\(%rcx\),%xmm0 +[ ]*[0-9a-f]+: 66 0f 3a 62 c1 03 pcmpistrm \$0x3,%xmm1,%xmm0 +[ ]*[0-9a-f]+: 66 f3 0f b8 19 popcnt \(%rcx\),%bx +[ ]*[0-9a-f]+: f3 0f b8 19 popcnt \(%rcx\),%ebx +[ ]*[0-9a-f]+: f3 48 0f b8 19 popcnt \(%rcx\),%rbx +[ ]*[0-9a-f]+: 66 f3 0f b8 19 popcnt \(%rcx\),%bx +[ ]*[0-9a-f]+: f3 0f b8 19 popcnt \(%rcx\),%ebx +[ ]*[0-9a-f]+: f3 48 0f b8 19 popcnt \(%rcx\),%rbx +[ ]*[0-9a-f]+: 66 f3 0f b8 d9 popcnt %cx,%bx +[ ]*[0-9a-f]+: f3 0f b8 d9 popcnt %ecx,%ebx +[ ]*[0-9a-f]+: f3 48 0f b8 d9 popcnt %rcx,%rbx +[ ]*[0-9a-f]+: 66 f3 0f b8 d9 popcnt %cx,%bx +[ ]*[0-9a-f]+: f3 0f b8 d9 popcnt %ecx,%ebx +[ ]*[0-9a-f]+: f3 48 0f b8 d9 popcnt %rcx,%rbx +#pass diff --git a/gas/testsuite/gas/i386/x86-64-sse4_2.s b/gas/testsuite/gas/i386/x86-64-sse4_2.s new file mode 100644 index 0000000..fdc7491 --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-sse4_2.s @@ -0,0 +1,44 @@ +# Streaming SIMD extensions 4.2 Instructions + + .text +foo: + crc32 (%rcx),%ebx + crc32 (%rcx),%rbx + crc32 %cl,%ebx + crc32 %cl,%rbx + crc32 %cx,%ebx + crc32 %ecx,%ebx + crc32 %rcx,%rbx + crc32b (%rcx),%ebx + crc32w (%rcx),%ebx + crc32l (%rcx),%ebx + crc32q (%rcx),%rbx + crc32b %cl,%ebx + crc32b %cl,%rbx + crc32w %cx,%ebx + crc32l %ecx,%ebx + crc32q %rcx,%rbx + pcmpgtq (%rcx),%xmm0 + pcmpgtq %xmm1,%xmm0 + pcmpestri $0x0,(%rcx),%xmm0 + pcmpestri $0x0,%xmm1,%xmm0 + pcmpestrm $0x1,(%rcx),%xmm0 + pcmpestrm $0x1,%xmm1,%xmm0 + pcmpistri $0x2,(%rcx),%xmm0 + pcmpistri $0x2,%xmm1,%xmm0 + pcmpistrm $0x3,(%rcx),%xmm0 + pcmpistrm $0x3,%xmm1,%xmm0 + popcnt (%rcx),%bx + popcnt (%rcx),%ebx + popcnt (%rcx),%rbx + popcntw (%rcx),%bx + popcntl (%rcx),%ebx + popcntq (%rcx),%rbx + popcnt %cx,%bx + popcnt %ecx,%ebx + popcnt %rcx,%rbx + popcntw %cx,%bx + popcntl %ecx,%ebx + popcntq %rcx,%rbx + + .p2align 4,0 -- cgit v1.1