diff options
Diffstat (limited to 'gas')
-rw-r--r-- | gas/ChangeLog | 14 | ||||
-rw-r--r-- | gas/config/tc-i386.c | 77 | ||||
-rw-r--r-- | gas/config/tc-i386.h | 16 | ||||
-rw-r--r-- | gas/testsuite/ChangeLog | 8 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/amdfam10.d | 22 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/amdfam10.s | 18 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/i386.exp | 2 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-amdfam10.d | 26 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-amdfam10.s | 22 |
9 files changed, 194 insertions, 11 deletions
diff --git a/gas/ChangeLog b/gas/ChangeLog index bdc99e6..78f0bc8 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,3 +1,17 @@ +2006-07-13 Dwarakanath Rajagopal <dwarak.rajagopal@amd.com> + Michael Meissner <michael.meissner@amd.com> + + * config/tc-i386.h (PROCESSOR_AMDFAM10): New processor_type. + (CpuSSE4a, CpuABM, CpuAmdFam10): New Cpu directives. + * config/tc-i386.c (cpu_arch): Add support for AmdFam10 + architecture. + (i386_align_code): Ditto. + (md_assemble_code): Add support for insertq/extrq instructions, + swapping as needed for intel syntax. + (swap_imm_operands): New function to swap immediate operands. + (swap_operands): Deal with 4 operand instructions. + (build_modrm_byte): Add support for insertq instruction. + 2006-07-13 H.J. Lu <hongjiu.lu@intel.com> * config/tc-i386.h (Size64): Fix a typo in comment. diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index aaca79c..119a9ab 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -89,6 +89,7 @@ static const reg_entry *parse_register PARAMS ((char *reg_string, static char *parse_insn PARAMS ((char *, char *)); static char *parse_operands PARAMS ((char *, const char *)); static void swap_operands PARAMS ((void)); +static void swap_imm_operands PARAMS ((void)); static void optimize_imm PARAMS ((void)); static void optimize_disp PARAMS ((void)); static int match_template PARAMS ((void)); @@ -491,6 +492,9 @@ static const arch_entry cpu_arch[] = {"k8", PROCESSOR_K8, Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon |CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2}, + {"amdfam10", PROCESSOR_AMDFAM10, + Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon + |CpuSledgehammer|CpuAmdFam10|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2|CpuSSE3|CpuSSE4a|CpuABM}, {".mmx", PROCESSOR_UNKNOWN, CpuMMX}, {".sse", PROCESSOR_UNKNOWN, @@ -508,7 +512,11 @@ static const arch_entry cpu_arch[] = {".pacifica", PROCESSOR_UNKNOWN, CpuSVME}, {".svme", PROCESSOR_UNKNOWN, - CpuSVME} + CpuSVME}, + {".sse4a", PROCESSOR_UNKNOWN, + CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSE4a}, + {".abm", PROCESSOR_UNKNOWN, + CpuABM} }; const pseudo_typeS md_pseudo_table[] = @@ -741,7 +749,7 @@ i386_align_code (fragP, count) 1. For PROCESSOR_I486, PROCESSOR_PENTIUM and PROCESSOR_GENERIC32, f32_patt will be used. - 2. For PROCESSOR_K8 in 64bit, NOPs with 0x66 prefixe will be used. + 2. For PROCESSOR_K8 and PROCESSOR_AMDFAM10 in 64bit, NOPs with 0x66 prefix will be used. 3. For PROCESSOR_MEROM, alt_long_patt will be used. 4. For PROCESSOR_PENTIUMPRO, PROCESSOR_PENTIUM4, PROCESSOR_NOCONA, PROCESSOR_YONAH, PROCESSOR_MEROM, PROCESSOR_K6, PROCESSOR_ATHLON @@ -812,6 +820,7 @@ i386_align_code (fragP, count) case PROCESSOR_ATHLON: case PROCESSOR_K8: case PROCESSOR_GENERIC64: + case PROCESSOR_AMDFAM10: patt = alt_short_patt; break; case PROCESSOR_I486: @@ -840,6 +849,7 @@ i386_align_code (fragP, count) case PROCESSOR_K6: case PROCESSOR_ATHLON: case PROCESSOR_K8: + case PROCESSOR_AMDFAM10: case PROCESSOR_GENERIC32: /* We use cpu_arch_isa_flags to check if we CAN optimize for Cpu686. */ @@ -1733,15 +1743,27 @@ md_assemble (line) if (line == NULL) return; + /* The order of the immediates should be reversed + for 2 immediates extrq and insertq instructions */ + if ((i.imm_operands == 2) && + ((strcmp (mnemonic, "extrq") == 0) + || (strcmp (mnemonic, "insertq") == 0))) + { + swap_imm_operands (); + /* "extrq" and insertq" are the only two instructions whose operands + have to be reversed even though they have two immediate operands. + */ + if (intel_syntax) + swap_operands (); + } + /* Now we've parsed the mnemonic into a set of templates, and have the operands at hand. */ /* All intel opcodes have reversed operands except for "bound" and "enter". We also don't reverse intersegment "jmp" and "call" instructions with 2 immediate operands so that the immediate segment - precedes the offset, as it does when in AT&T mode. "enter" and the - intersegment "jmp" and "call" instructions are the only ones that - have two immediate operands. */ + precedes the offset, as it does when in AT&T mode. */ if (intel_syntax && i.operands > 1 && (strcmp (mnemonic, "bound") != 0) && (strcmp (mnemonic, "invlpga") != 0) @@ -2272,6 +2294,27 @@ parse_operands (l, mnemonic) } static void +swap_imm_operands () +{ + union i386_op temp_op; + unsigned int temp_type; + enum bfd_reloc_code_real temp_reloc; + int xchg1 = 0; + int xchg2 = 1; + + temp_type = i.types[xchg2]; + i.types[xchg2] = i.types[xchg1]; + i.types[xchg1] = temp_type; + temp_op = i.op[xchg2]; + i.op[xchg2] = i.op[xchg1]; + i.op[xchg1] = temp_op; + temp_reloc = i.reloc[xchg2]; + i.reloc[xchg2] = i.reloc[xchg1]; + i.reloc[xchg1] = temp_reloc; +} + + +static void swap_operands () { union i386_op temp_op; @@ -2280,6 +2323,26 @@ swap_operands () int xchg1 = 0; int xchg2 = 0; + if (i.operands == 4) + /* There will be two exchanges in a 4 operand instruction. + First exchange is the done inside this block.(1st and 4rth operand) + The next exchange is done outside this block.(2nd and 3rd operand) */ + { + xchg1 = 0; + xchg2 = 3; + temp_type = i.types[xchg2]; + i.types[xchg2] = i.types[xchg1]; + i.types[xchg1] = temp_type; + temp_op = i.op[xchg2]; + i.op[xchg2] = i.op[xchg1]; + i.op[xchg1] = temp_op; + temp_reloc = i.reloc[xchg2]; + i.reloc[xchg2] = i.reloc[xchg1]; + i.reloc[xchg1] = temp_reloc; + xchg1 = 1; + xchg2 = 2; + } + if (i.operands == 2) { xchg1 = 0; @@ -3281,6 +3344,10 @@ build_modrm_byte () | SReg2 | SReg3 | Control | Debug | Test)) ? 0 : 1); + + /* In 4 operands instructions with 2 immediate operands, the first two are immediate + bytes and hence source operand will be in the next byte after the immediates */ + if ((i.operands == 4)&&(i.imm_operands=2)) source++; dest = source + 1; i.rm.mode = 3; diff --git a/gas/config/tc-i386.h b/gas/config/tc-i386.h index c51563a..4dc1756 100644 --- a/gas/config/tc-i386.h +++ b/gas/config/tc-i386.h @@ -91,8 +91,8 @@ extern const char extra_symbol_chars[]; extern const char *i386_comment_chars; #define tc_comment_chars i386_comment_chars -#define MAX_OPERANDS 3 /* max operands per insn */ -#define MAX_IMMEDIATE_OPERANDS 2/* max immediates per insn (lcall, ljmp) */ +#define MAX_OPERANDS 4 /* max operands per insn */ +#define MAX_IMMEDIATE_OPERANDS 2/* max immediates per insn (lcall, ljmp, insertq, extrq) */ #define MAX_MEMORY_OPERANDS 2 /* max memory refs per insn (string ops) */ /* Prefixes will be emitted in the order defined below. @@ -185,6 +185,9 @@ typedef struct #define CpuSVME 0x80000 /* AMD Secure Virtual Machine Ext-s required */ #define CpuVMX 0x100000 /* VMX Instructions required */ #define CpuMNI 0x200000 /* Merom New Instructions required */ +#define CpuSSE4a 0x400000 /* SSE4a New Instuctions required */ +#define CpuABM 0x800000 /* ABM New Instructions required */ +#define CpuAmdFam10 0x1000000 /* AmdFam10 New instructions required */ /* These flags are set by gas depending on the flag_code. */ #define Cpu64 0x4000000 /* 64bit support required */ @@ -192,8 +195,8 @@ typedef struct /* The default value for unknown CPUs - enable all features to avoid problems. */ #define CpuUnknownFlags (Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686 \ - |CpuP4|CpuSledgehammer|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuPNI|CpuVMX \ - |Cpu3dnow|Cpu3dnowA|CpuK6|CpuAthlon|CpuPadLock|CpuSVME|CpuMNI) + |CpuP4|CpuSledgehammer|CpuAmdFam10|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuPNI|CpuVMX \ + |Cpu3dnow|Cpu3dnowA|CpuK6|CpuAthlon|CpuPadLock|CpuSVME|CpuMNI|CpuABM|CpuSSE4a) /* the bits in opcode_modifier are used to generate the final opcode from the base_opcode. These bits also are used to detect alternate forms of @@ -240,7 +243,7 @@ typedef struct by OR'ing together all of the possible type masks. (e.g. 'operand_types[i] = Reg|Imm' specifies that operand i can be either a register or an immediate operand. */ - unsigned int operand_types[3]; + unsigned int operand_types[4]; /* operand_types[i] bits */ /* register */ @@ -391,7 +394,8 @@ enum processor_type PROCESSOR_ATHLON, PROCESSOR_K8, PROCESSOR_GENERIC32, - PROCESSOR_GENERIC64 + PROCESSOR_GENERIC64, + PROCESSOR_AMDFAM10 }; /* x86 arch names, types and features */ diff --git a/gas/testsuite/ChangeLog b/gas/testsuite/ChangeLog index fcec568..ecf5b43 100644 --- a/gas/testsuite/ChangeLog +++ b/gas/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2006-07-13 Dwarakanath Rajagopal <dwarak.rajagopal@amd.com> + Michael Meissner <michael.meissner@amd.com> + + * gas/i386/amdfam10.s: New file for amdfam10 instructions. + * gam/i386/amdfam10.d: Ditto. + * gas/i386/x86-64-amdfam10.s: Ditto. + * gam/i386/x86-64-amdfam10.d: Ditto. + 2006-07-12 Nick Clifton <nickc@redhat.com> * gas/sh/basic.exp: Run "too_large" dump test. diff --git a/gas/testsuite/gas/i386/amdfam10.d b/gas/testsuite/gas/i386/amdfam10.d new file mode 100644 index 0000000..1c664f6 --- /dev/null +++ b/gas/testsuite/gas/i386/amdfam10.d @@ -0,0 +1,22 @@ +#objdump: -dw +#name: i386 amdfam10 + +.*: +file format .* + +Disassembly of section .text: + +0+000 <foo>: + 0: f3 0f bd 19[ ]+lzcnt \(%ecx\),%ebx + 4: f3 66 0f bd 19[ ]+lzcnt \(%ecx\),%bx + 9: f3 0f bd d9[ ]+lzcnt %ecx,%ebx + d: f3 66 0f bd d9[ ]+lzcnt %cx,%bx + 12: 0f b8 19[ ]+popcnt \(%ecx\),%ebx + 15: 66 0f b8 19[ ]+popcnt \(%ecx\),%bx + 19: 0f b8 d9[ ]+popcnt %ecx,%ebx + 1c: 66 0f b8 d9[ ]+popcnt %cx,%bx + 20: 66 0f 79 ca[ ]+extrq %xmm2,%xmm1 + 24: 66 0f 78 c1 02 04[ ]*extrq \$0x4,\$0x2,%xmm1 + 2a: f2 0f 79 ca[ ]+insertq %xmm2,%xmm1 + 2e: f2 0f 78 ca 02 04[ ]*insertq \$0x4,\$0x2,%xmm2,%xmm1 + 34: f2 0f 2b 09[ ]+movntsd %xmm1,\(%ecx\) + 38: f3 0f 2b 09[ ]+movntss %xmm1,\(%ecx\) diff --git a/gas/testsuite/gas/i386/amdfam10.s b/gas/testsuite/gas/i386/amdfam10.s new file mode 100644 index 0000000..9b67b9c --- /dev/null +++ b/gas/testsuite/gas/i386/amdfam10.s @@ -0,0 +1,18 @@ +#AMDFAM10 New Instructions + + .text +foo: + lzcnt (%ecx),%ebx + lzcnt (%ecx),%bx + lzcnt %ecx,%ebx + lzcnt %cx,%bx + popcnt (%ecx),%ebx + popcnt (%ecx),%bx + popcnt %ecx,%ebx + popcnt %cx,%bx + extrq %xmm2,%xmm1 + extrq $4,$2,%xmm1 + insertq %xmm2,%xmm1 + insertq $4,$2,%xmm2,%xmm1 + movntsd %xmm1,(%ecx) + movntss %xmm1,(%ecx) diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp index 71e2ee9..4158b86 100644 --- a/gas/testsuite/gas/i386/i386.exp +++ b/gas/testsuite/gas/i386/i386.exp @@ -68,6 +68,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_32_check]] run_dump_test "crx" run_list_test "cr-err" "" run_dump_test "svme" + run_dump_test "amdfam10" run_dump_test "merom" run_dump_test "rep" run_dump_test "rep-suffix" @@ -141,6 +142,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_64_check]] t run_list_test "x86-64-inval-seg" "-al" run_dump_test "x86-64-branch" run_dump_test "svme64" + run_dump_test "x86-64-amdfam10" run_dump_test "x86-64-vmx" run_dump_test "immed64" run_dump_test "x86-64-prescott" diff --git a/gas/testsuite/gas/i386/x86-64-amdfam10.d b/gas/testsuite/gas/i386/x86-64-amdfam10.d new file mode 100644 index 0000000..84d4d52 --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-amdfam10.d @@ -0,0 +1,26 @@ +#objdump: -dw +#name: x86-64 amdfam10 + +.*: +file format .* + +Disassembly of section .text: + +0+000 <foo>: + 0: f3 48 0f bd 19[ ]+lzcnt \(%rcx\),%rbx + 5: f3 0f bd 19[ ]+lzcnt \(%rcx\),%ebx + 9: f3 66 0f bd 19[ ]+lzcnt \(%rcx\),%bx + e: f3 48 0f bd d9[ ]+lzcnt %rcx,%rbx + 13: f3 0f bd d9[ ]+lzcnt %ecx,%ebx + 17: f3 66 0f bd d9[ ]+lzcnt %cx,%bx + 1c: 48 0f b8 19[ ]+popcnt \(%rcx\),%rbx + 20: 0f b8 19[ ]+popcnt \(%rcx\),%ebx + 23: 66 0f b8 19[ ]+popcnt \(%rcx\),%bx + 27: 48 0f b8 d9[ ]+popcnt %rcx,%rbx + 2b: 0f b8 d9[ ]+popcnt %ecx,%ebx + 2e: 66 0f b8 d9[ ]+popcnt %cx,%bx + 32: 66 0f 79 ca[ ]+extrq %xmm2,%xmm1 + 36: 66 0f 78 c1 02 04[ ]+extrq \$0x4,\$0x2,%xmm1 + 3c: f2 0f 79 ca[ ]+insertq %xmm2,%xmm1 + 40: f2 0f 78 ca 02 04[ ]+insertq \$0x4,\$0x2,%xmm2,%xmm1 + 46: f2 0f 2b 09[ ]+movntsd %xmm1,\(%rcx\) + 4a: f3 0f 2b 09[ ]+movntss %xmm1,\(%rcx\) diff --git a/gas/testsuite/gas/i386/x86-64-amdfam10.s b/gas/testsuite/gas/i386/x86-64-amdfam10.s new file mode 100644 index 0000000..5a4be49 --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-amdfam10.s @@ -0,0 +1,22 @@ +#AMDFAM10 New Instructions + + .text +foo: + lzcnt (%rcx),%rbx + lzcnt (%rcx),%ebx + lzcnt (%rcx),%bx + lzcnt %rcx,%rbx + lzcnt %ecx,%ebx + lzcnt %cx,%bx + popcnt (%rcx),%rbx + popcnt (%rcx),%ebx + popcnt (%rcx),%bx + popcnt %rcx,%rbx + popcnt %ecx,%ebx + popcnt %cx,%bx + extrq %xmm2,%xmm1 + extrq $4,$2,%xmm1 + insertq %xmm2,%xmm1 + insertq $4,$2,%xmm2,%xmm1 + movntsd %xmm1,(%rcx) + movntss %xmm1,(%rcx) |