aboutsummaryrefslogtreecommitdiff
path: root/gas/config/tc-i386.c
diff options
context:
space:
mode:
Diffstat (limited to 'gas/config/tc-i386.c')
-rw-r--r--gas/config/tc-i386.c718
1 files changed, 641 insertions, 77 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 80a1ac8..e8e8031 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -81,6 +81,7 @@
#define LONG_MNEM_SUFFIX 'l'
#define QWORD_MNEM_SUFFIX 'q'
#define XMMWORD_MNEM_SUFFIX 'x'
+#define YMMWORD_MNEM_SUFFIX 'y'
/* Intel Syntax. Use a non-ascii letter since since it never appears
in instructions. */
#define LONG_DOUBLE_MNEM_SUFFIX '\1'
@@ -220,6 +221,16 @@ static void handle_large_common (int small ATTRIBUTE_UNUSED);
static const char *default_arch = DEFAULT_ARCH;
+/* VEX prefix. */
+typedef struct
+{
+ /* VEX prefix is either 2 byte or 3 byte. */
+ unsigned char bytes[3];
+ unsigned int length;
+ /* Destination or source register specifier. */
+ const reg_entry *register_specifier;
+} vex_prefix;
+
/* 'md_assemble ()' gathers together information and puts it into a
i386_insn. */
@@ -285,6 +296,7 @@ struct _i386_insn
rex_byte rex;
sib_byte sib;
drex_byte drex;
+ vex_prefix vex;
};
typedef struct _i386_insn i386_insn;
@@ -462,6 +474,9 @@ static i386_cpu_flags cpu_arch_isa_flags;
larger than a byte offset. */
static unsigned int no_cond_jump_promotion = 0;
+/* Encode SSE instructions with VEX prefix. */
+static unsigned int sse2avx;
+
/* Pre-defined "_GLOBAL_OFFSET_TABLE_". */
static symbolS *GOT_symbol;
@@ -622,12 +637,20 @@ static const arch_entry cpu_arch[] =
CPU_SSE4_2_FLAGS },
{ ".sse4", PROCESSOR_UNKNOWN,
CPU_SSE4_2_FLAGS },
+ { ".avx", PROCESSOR_UNKNOWN,
+ CPU_AVX_FLAGS },
{ ".vmx", PROCESSOR_UNKNOWN,
CPU_VMX_FLAGS },
{ ".smx", PROCESSOR_UNKNOWN,
CPU_SMX_FLAGS },
{ ".xsave", PROCESSOR_UNKNOWN,
CPU_XSAVE_FLAGS },
+ { ".aes", PROCESSOR_UNKNOWN,
+ CPU_AES_FLAGS },
+ { ".clmul", PROCESSOR_UNKNOWN,
+ CPU_CLMUL_FLAGS },
+ { ".fma", PROCESSOR_UNKNOWN,
+ CPU_FMA_FLAGS },
{ ".3dnow", PROCESSOR_UNKNOWN,
CPU_3DNOW_FLAGS },
{ ".3dnowa", PROCESSOR_UNKNOWN,
@@ -1176,29 +1199,45 @@ cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
return x;
}
-/* Return 3 if there is a perfect match, 2 if compatible with 64bit,
- 1 if compatible with arch, 0 if there is no match. */
+#define CPU_FLAGS_ARCH_MATCH 0x1
+#define CPU_FLAGS_64BIT_MATCH 0x2
+
+#define CPU_FLAGS_32BIT_MATCH CPU_FLAGS_ARCH_MATCH
+#define CPU_FLAGS_PERFECT_MATCH \
+ (CPU_FLAGS_32BIT_MATCH | CPU_FLAGS_64BIT_MATCH)
+
+/* Return CPU flags match bits. */
static int
-cpu_flags_match (i386_cpu_flags x)
+cpu_flags_match (const template *t)
{
- int overlap = cpu_flags_check_cpu64 (x) ? 2 : 0;
+ i386_cpu_flags x = t->cpu_flags;
+ int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
x.bitfield.cpu64 = 0;
x.bitfield.cpuno64 = 0;
if (cpu_flags_all_zero (&x))
- overlap |= 1;
+ {
+ /* This instruction is available on all archs. */
+ match |= CPU_FLAGS_32BIT_MATCH;
+ }
else
{
+ /* This instruction is available only on some archs. */
i386_cpu_flags cpu = cpu_arch_flags;
cpu.bitfield.cpu64 = 0;
cpu.bitfield.cpuno64 = 0;
cpu = cpu_flags_and (x, cpu);
- overlap |= cpu_flags_all_zero (&cpu) ? 0 : 1;
+ if (!cpu_flags_all_zero (&cpu))
+ {
+ /* Check SSE2AVX */
+ if (!t->opcode_modifier.sse2avx || sse2avx)
+ match |= CPU_FLAGS_32BIT_MATCH;
+ }
}
- return overlap;
+ return match;
}
static INLINE i386_operand_type
@@ -1269,6 +1308,7 @@ static const i386_operand_type disp16_32 = OPERAND_TYPE_DISP16_32;
static const i386_operand_type anydisp
= OPERAND_TYPE_ANYDISP;
static const i386_operand_type regxmm = OPERAND_TYPE_REGXMM;
+static const i386_operand_type regymm = OPERAND_TYPE_REGYMM;
static const i386_operand_type imm8 = OPERAND_TYPE_IMM8;
static const i386_operand_type imm8s = OPERAND_TYPE_IMM8S;
static const i386_operand_type imm16 = OPERAND_TYPE_IMM16;
@@ -1278,6 +1318,7 @@ static const i386_operand_type imm64 = OPERAND_TYPE_IMM64;
static const i386_operand_type imm16_32 = OPERAND_TYPE_IMM16_32;
static const i386_operand_type imm16_32s = OPERAND_TYPE_IMM16_32S;
static const i386_operand_type imm16_32_32s = OPERAND_TYPE_IMM16_32_32S;
+static const i386_operand_type vex_imm4 = OPERAND_TYPE_VEX_IMM4;
enum operand_type
{
@@ -1356,7 +1397,9 @@ match_mem_size (const template *t, unsigned int j)
|| (i.types[j].bitfield.tbyte
&& !t->operand_types[j].bitfield.tbyte)
|| (i.types[j].bitfield.xmmword
- && !t->operand_types[j].bitfield.xmmword)));
+ && !t->operand_types[j].bitfield.xmmword)
+ || (i.types[j].bitfield.ymmword
+ && !t->operand_types[j].bitfield.ymmword)));
}
/* Return 1 if there is no size conflict on any operands for
@@ -1437,6 +1480,7 @@ operand_type_match (i386_operand_type overlap,
temp.bitfield.qword = 0;
temp.bitfield.tbyte = 0;
temp.bitfield.xmmword = 0;
+ temp.bitfield.ymmword = 0;
if (operand_type_all_zero (&temp))
return 0;
@@ -1548,6 +1592,12 @@ fits_in_unsigned_long (offsetT num ATTRIBUTE_UNUSED)
#endif
} /* fits_in_unsigned_long() */
+static INLINE int
+fits_in_imm4 (offsetT num)
+{
+ return (num & 0xf) == num;
+}
+
static i386_operand_type
smallest_imm_type (offsetT num)
{
@@ -1991,6 +2041,7 @@ md_begin ()
operand_chars['?'] = '?';
#endif
digit_chars['-'] = '-';
+ mnemonic_chars['_'] = '_';
mnemonic_chars['-'] = '-';
mnemonic_chars['.'] = '.';
identifier_chars['_'] = '_';
@@ -2069,6 +2120,7 @@ pi (char *line, i386_insn *x)
|| x->types[i].bitfield.reg64
|| x->types[i].bitfield.regmmx
|| x->types[i].bitfield.regxmm
+ || x->types[i].bitfield.regymm
|| x->types[i].bitfield.sreg2
|| x->types[i].bitfield.sreg3
|| x->types[i].bitfield.control
@@ -2170,6 +2222,7 @@ const type_names[] =
{ OPERAND_TYPE_REGMMX, "rMMX" },
{ OPERAND_TYPE_REGXMM, "rXMM" },
{ OPERAND_TYPE_ESSEG, "es" },
+ { OPERAND_TYPE_VEX_IMM4, "VEX i4" },
};
static void
@@ -2395,6 +2448,102 @@ intel_float_operand (const char *mnemonic)
return 1;
}
+/* Build the VEX prefix. */
+
+static void
+build_vex_prefix (void)
+{
+ unsigned int register_specifier;
+ unsigned int implied_prefix;
+ unsigned int vector_length;
+
+ /* Check register specifier. */
+ if (i.vex.register_specifier)
+ {
+ register_specifier = i.vex.register_specifier->reg_num;
+ if ((i.vex.register_specifier->reg_flags & RegRex))
+ register_specifier += 8;
+ register_specifier = ~register_specifier & 0xf;
+ }
+ else
+ register_specifier = 0xf;
+
+ vector_length = i.tm.opcode_modifier.vex256 ? 1 : 0;
+
+ switch ((i.tm.base_opcode >> 8) & 0xff)
+ {
+ case 0:
+ implied_prefix = 0;
+ break;
+ case DATA_PREFIX_OPCODE:
+ implied_prefix = 1;
+ break;
+ case REPE_PREFIX_OPCODE:
+ implied_prefix = 2;
+ break;
+ case REPNE_PREFIX_OPCODE:
+ implied_prefix = 3;
+ break;
+ default:
+ abort ();
+ }
+
+ /* Use 2-byte VEX prefix if possible. */
+ if (i.tm.opcode_modifier.vex0f
+ && (i.rex & (REX_W | REX_X | REX_B)) == 0)
+ {
+ /* 2-byte VEX prefix. */
+ unsigned int r;
+
+ i.vex.length = 2;
+ i.vex.bytes[0] = 0xc5;
+
+ /* Check the REX.R bit. */
+ r = (i.rex & REX_R) ? 0 : 1;
+ i.vex.bytes[1] = (r << 7
+ | register_specifier << 3
+ | vector_length << 2
+ | implied_prefix);
+ }
+ else
+ {
+ /* 3-byte VEX prefix. */
+ unsigned int m, w;
+
+ if (i.tm.opcode_modifier.vex0f)
+ m = 0x1;
+ else if (i.tm.opcode_modifier.vex0f38)
+ m = 0x2;
+ else if (i.tm.opcode_modifier.vex0f3a)
+ m = 0x3;
+ else
+ abort ();
+
+ i.vex.length = 3;
+ i.vex.bytes[0] = 0xc4;
+
+ /* The high 3 bits of the second VEX byte are 1's compliment
+ of RXB bits from REX. */
+ i.vex.bytes[1] = (~i.rex & 0x7) << 5 | m;
+
+ /* Check the REX.W bit. */
+ w = (i.rex & REX_W) ? 1 : 0;
+ if (i.tm.opcode_modifier.vexw0 || i.tm.opcode_modifier.vexw1)
+ {
+ if (w)
+ abort ();
+
+ if (i.tm.opcode_modifier.vexw1)
+ w = 1;
+ }
+
+ i.vex.bytes[2] = (w << 7
+ | register_specifier << 3
+ | vector_length << 2
+ | implied_prefix);
+ }
+}
+
static void
process_immext (void)
{
@@ -2417,18 +2566,20 @@ process_immext (void)
i.operands = 0;
}
- /* These AMD 3DNow! and SSE2 Instructions have an opcode suffix
+ /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
which is coded in the same place as an 8-bit immediate field
would be. Here we fake an 8-bit immediate operand from the
opcode suffix stored in tm.extension_opcode.
- SSE5 also uses this encoding, for some of its 3 argument
- instructions. */
+ SSE5 and AVX instructions also use this encoding, for some of
+ 3 argument instructions. */
assert (i.imm_operands == 0
&& (i.operands <= 2
|| (i.tm.cpu_flags.bitfield.cpusse5
- && i.operands <= 3)));
+ && i.operands <= 3)
+ || (i.tm.opcode_modifier.vex
+ && i.operands <= 4)));
exp = &im_expressions[i.imm_operands++];
i.op[i.operands].imms = exp;
@@ -2553,7 +2704,9 @@ md_assemble (char *line)
|| i.types[j].bitfield.floatacc)
i.reg_operands--;
- if (i.tm.opcode_modifier.immext)
+ /* ImmExt should be processed after SSE2AVX. */
+ if (!i.tm.opcode_modifier.sse2avx
+ && i.tm.opcode_modifier.immext)
process_immext ();
/* For insns with operands there are more diddles to do to the opcode. */
@@ -2568,6 +2721,9 @@ md_assemble (char *line)
as_warn (_("translating to `%sp'"), i.tm.name);
}
+ if (i.tm.opcode_modifier.vex)
+ build_vex_prefix ();
+
/* Handle conversion of 'int $3' --> special int3 insn. */
if (i.tm.base_opcode == INT_OPCODE && i.op[0].imms->X_add_number == 3)
{
@@ -2810,12 +2966,12 @@ parse_insn (char *line, char *mnemonic)
supported = 0;
for (t = current_templates->start; t < current_templates->end; ++t)
{
- supported |= cpu_flags_match (t->cpu_flags);
- if (supported == 3)
+ supported |= cpu_flags_match (t);
+ if (supported == CPU_FLAGS_PERFECT_MATCH)
goto skip;
}
- if (!(supported & 2))
+ if (!(supported & CPU_FLAGS_64BIT_MATCH))
{
as_bad (flag_code == CODE_64BIT
? _("`%s' is not supported in 64-bit mode")
@@ -2823,7 +2979,7 @@ parse_insn (char *line, char *mnemonic)
current_templates->start->name);
return NULL;
}
- if (!(supported & 1))
+ if (supported != CPU_FLAGS_PERFECT_MATCH)
{
as_bad (_("`%s' is not supported on `%s%s'"),
current_templates->start->name, cpu_arch_name,
@@ -3004,6 +3160,7 @@ swap_operands (void)
{
switch (i.operands)
{
+ case 5:
case 4:
swap_2_operands (1, i.operands - 2);
case 3:
@@ -3245,12 +3402,36 @@ optimize_disp (void)
}
}
+/* Check if operands are valid for the instrucrtion. Update VEX
+ operand types. */
+
+static int
+VEX_check_operands (const template *t)
+{
+ if (!t->opcode_modifier.vex)
+ return 0;
+
+ /* Only check VEX_Imm4, which must be the first operand. */
+ if (t->operand_types[0].bitfield.vex_imm4)
+ {
+ if (i.op[0].imms->X_op != O_constant
+ || !fits_in_imm4 (i.op[0].imms->X_add_number))
+ return 1;
+
+ /* Turn off Imm8 so that update_imm won't complain. */
+ i.types[0] = vex_imm4;
+ }
+
+ return 0;
+}
+
static int
match_template (void)
{
/* Points to template once we've found it. */
const template *t;
i386_operand_type overlap0, overlap1, overlap2, overlap3;
+ i386_operand_type overlap4;
unsigned int found_reverse_match;
i386_opcode_modifier suffix_check;
i386_operand_type operand_types [MAX_OPERANDS];
@@ -3259,8 +3440,8 @@ match_template (void)
unsigned int found_cpu_match;
unsigned int check_register;
-#if MAX_OPERANDS != 4
-# error "MAX_OPERANDS must be 4."
+#if MAX_OPERANDS != 5
+# error "MAX_OPERANDS must be 5."
#endif
found_reverse_match = 0;
@@ -3289,7 +3470,8 @@ match_template (void)
continue;
/* Check processor support. */
- found_cpu_match = cpu_flags_match (t->cpu_flags) == 3;
+ found_cpu_match = (cpu_flags_match (t)
+ == CPU_FLAGS_PERFECT_MATCH);
if (!found_cpu_match)
continue;
@@ -3330,9 +3512,11 @@ match_template (void)
&& !intel_float_operand (t->name))
: intel_float_operand (t->name) != 2)
&& ((!operand_types[0].bitfield.regmmx
- && !operand_types[0].bitfield.regxmm)
+ && !operand_types[0].bitfield.regxmm
+ && !operand_types[0].bitfield.regymm)
|| (!operand_types[t->operands > 1].bitfield.regmmx
- && !!operand_types[t->operands > 1].bitfield.regxmm))
+ && !!operand_types[t->operands > 1].bitfield.regxmm
+ && !!operand_types[t->operands > 1].bitfield.regymm))
&& (t->base_opcode != 0x0fc7
|| t->extension_opcode != 1 /* cmpxchg8b */))
continue;
@@ -3426,6 +3610,7 @@ match_template (void)
continue;
case 3:
case 4:
+ case 5:
overlap1 = operand_type_and (i.types[1], operand_types[1]);
if (!operand_type_match (overlap0, i.types[0])
|| !operand_type_match (overlap1, i.types[1])
@@ -3471,6 +3656,9 @@ match_template (void)
/* Found a forward 2 operand match here. */
switch (t->operands)
{
+ case 5:
+ overlap4 = operand_type_and (i.types[4],
+ operand_types[4]);
case 4:
overlap3 = operand_type_and (i.types[3],
operand_types[3]);
@@ -3482,6 +3670,15 @@ match_template (void)
switch (t->operands)
{
+ case 5:
+ if (!operand_type_match (overlap4, i.types[4])
+ || !operand_type_register_match (overlap3,
+ i.types[3],
+ operand_types[3],
+ overlap4,
+ i.types[4],
+ operand_types[4]))
+ continue;
case 4:
if (!operand_type_match (overlap3, i.types[3])
|| (check_register
@@ -3517,6 +3714,11 @@ match_template (void)
found_reverse_match = 0;
continue;
}
+
+ /* Check if VEX operands are valid. */
+ if (VEX_check_operands (t))
+ continue;
+
/* We've found a match; break out of loop. */
break;
}
@@ -3700,9 +3902,10 @@ process_suffix (void)
if (!check_word_reg ())
return 0;
}
- else if (i.suffix == XMMWORD_MNEM_SUFFIX)
+ else if (i.suffix == XMMWORD_MNEM_SUFFIX
+ || i.suffix == YMMWORD_MNEM_SUFFIX)
{
- /* Skip if the instruction has x suffix. match_template
+ /* Skip if the instruction has x/y suffix. match_template
should check if it is a valid suffix. */
}
else if (intel_syntax && i.tm.opcode_modifier.ignoresize)
@@ -3789,7 +3992,8 @@ process_suffix (void)
if (i.suffix
&& i.suffix != BYTE_MNEM_SUFFIX
- && i.suffix != XMMWORD_MNEM_SUFFIX)
+ && i.suffix != XMMWORD_MNEM_SUFFIX
+ && i.suffix != YMMWORD_MNEM_SUFFIX)
{
/* It's not a byte, select word/dword operation. */
if (i.tm.opcode_modifier.w)
@@ -3916,6 +4120,7 @@ check_byte_reg (void)
|| i.types[op].bitfield.reg64
|| i.types[op].bitfield.regmmx
|| i.types[op].bitfield.regxmm
+ || i.types[op].bitfield.regymm
|| i.types[op].bitfield.sreg2
|| i.types[op].bitfield.sreg3
|| i.types[op].bitfield.control
@@ -4508,6 +4713,19 @@ process_drex (void)
}
static int
+bad_implicit_operand (int xmm)
+{
+ const char *reg = xmm ? "xmm0" : "ymm0";
+ if (intel_syntax)
+ as_bad (_("the last operand of `%s' must be `%s%s'"),
+ i.tm.name, register_prefix, reg);
+ else
+ as_bad (_("the first operand of `%s' must be `%s%s'"),
+ i.tm.name, register_prefix, reg);
+ return 0;
+}
+
+static int
process_operands (void)
{
/* Default segment register this instruction will use for memory
@@ -4521,23 +4739,98 @@ process_operands (void)
|| i.tm.opcode_modifier.drexc)
process_drex ();
- if (i.tm.opcode_modifier.firstxmm0)
+ if (i.tm.opcode_modifier.sse2avx
+ && (i.tm.opcode_modifier.vexnds
+ || i.tm.opcode_modifier.vexndd))
{
+ unsigned int dup = i.operands;
+ unsigned int dest = dup - 1;
unsigned int j;
- /* The first operand is implicit and must be xmm0. */
+ /* The destination must be an xmm register. */
assert (i.reg_operands
- && operand_type_equal (&i.types[0], &regxmm));
- if (i.op[0].regs->reg_num != 0)
+ && MAX_OPERANDS > dup
+ && operand_type_equal (&i.types[dest], &regxmm));
+
+ if (i.tm.opcode_modifier.firstxmm0)
{
- if (intel_syntax)
- as_bad (_("the last operand of `%s' must be `%sxmm0'"),
- i.tm.name, register_prefix);
+ /* The first operand is implicit and must be xmm0. */
+ assert (operand_type_equal (&i.types[0], &regxmm));
+ if (i.op[0].regs->reg_num != 0)
+ return bad_implicit_operand (1);
+
+ if (i.tm.opcode_modifier.vex3sources)
+ {
+ /* Keep xmm0 for instructions with VEX prefix and 3
+ sources. */
+ goto duplicate;
+ }
else
- as_bad (_("the first operand of `%s' must be `%sxmm0'"),
- i.tm.name, register_prefix);
- return 0;
+ {
+ /* We remove the first xmm0 and keep the number of
+ operands unchanged, which in fact duplicates the
+ destination. */
+ for (j = 1; j < i.operands; j++)
+ {
+ i.op[j - 1] = i.op[j];
+ i.types[j - 1] = i.types[j];
+ i.tm.operand_types[j - 1] = i.tm.operand_types[j];
+ }
+ }
+ }
+ else if (i.tm.opcode_modifier.implicit1stxmm0)
+ {
+ assert ((MAX_OPERANDS - 1) > dup
+ && i.tm.opcode_modifier.vex3sources);
+
+ /* Add the implicit xmm0 for instructions with VEX prefix
+ and 3 sources. */
+ for (j = i.operands; j > 0; j--)
+ {
+ i.op[j] = i.op[j - 1];
+ i.types[j] = i.types[j - 1];
+ i.tm.operand_types[j] = i.tm.operand_types[j - 1];
+ }
+ i.op[0].regs
+ = (const reg_entry *) hash_find (reg_hash, "xmm0");
+ i.types[0] = regxmm;
+ i.tm.operand_types[0] = regxmm;
+
+ i.operands += 2;
+ i.reg_operands += 2;
+ i.tm.operands += 2;
+
+ dup++;
+ dest++;
+ i.op[dup] = i.op[dest];
+ i.types[dup] = i.types[dest];
+ i.tm.operand_types[dup] = i.tm.operand_types[dest];
}
+ else
+ {
+duplicate:
+ i.operands++;
+ i.reg_operands++;
+ i.tm.operands++;
+
+ i.op[dup] = i.op[dest];
+ i.types[dup] = i.types[dest];
+ i.tm.operand_types[dup] = i.tm.operand_types[dest];
+ }
+
+ if (i.tm.opcode_modifier.immext)
+ process_immext ();
+ }
+ else if (i.tm.opcode_modifier.firstxmm0)
+ {
+ unsigned int j;
+
+ /* The first operand is implicit and must be xmm0/ymm0. */
+ assert (i.reg_operands
+ && (operand_type_equal (&i.types[0], &regxmm)
+ || operand_type_equal (&i.types[0], &regymm)));
+ if (i.op[0].regs->reg_num != 0)
+ return bad_implicit_operand (i.types[0].bitfield.regxmm);
for (j = 1; j < i.operands; j++)
{
@@ -4665,6 +4958,129 @@ static const seg_entry *
build_modrm_byte (void)
{
const seg_entry *default_seg = 0;
+ unsigned int source, dest;
+ int vex_3_sources;
+
+ /* The first operand of instructions with VEX prefix and 3 sources
+ must be VEX_Imm4. */
+ vex_3_sources = i.tm.opcode_modifier.vex3sources;
+ if (vex_3_sources)
+ {
+ unsigned int nds, reg;
+
+ if (i.tm.opcode_modifier.veximmext
+ && i.tm.opcode_modifier.immext)
+ {
+ dest = i.operands - 2;
+ assert (dest == 3);
+ }
+ else
+ dest = i.operands - 1;
+ nds = dest - 1;
+
+ /* There are 2 kinds of instructions:
+ 1. 5 operands: one immediate operand and 4 register
+ operands or 3 register operands plus 1 memory operand.
+ It must have VexNDS and VexW0 or VexW1. The destination
+ must be either XMM or YMM register.
+ 2. 4 operands: 4 register operands or 3 register operands
+ plus 1 memory operand. It must have VexNDS and VexImmExt. */
+ if (!((i.reg_operands == 4
+ || (i.reg_operands == 3 && i.mem_operands == 1))
+ && i.tm.opcode_modifier.vexnds
+ && (operand_type_equal (&i.tm.operand_types[dest], &regxmm)
+ || operand_type_equal (&i.tm.operand_types[dest], &regymm))
+ && (operand_type_equal (&i.tm.operand_types[nds], &regxmm)
+ || operand_type_equal (&i.tm.operand_types[nds], &regymm))
+ && ((dest == 4
+ && i.imm_operands == 1
+ && i.types[0].bitfield.vex_imm4
+ && (i.tm.opcode_modifier.vexw0
+ || i.tm.opcode_modifier.vexw1))
+ || (dest == 3
+ && (i.imm_operands == 0
+ || (i.imm_operands == 1
+ && i.tm.opcode_modifier.immext))
+ && i.tm.opcode_modifier.veximmext))))
+ abort ();
+
+ i.vex.register_specifier = i.op[nds].regs;
+
+ if (i.imm_operands == 0)
+ {
+ /* When there is no immediate operand, generate an 8bit
+ immediate operand to encode the first operand. */
+ expressionS *exp = &im_expressions[i.imm_operands++];
+ i.op[i.operands].imms = exp;
+ i.types[i.operands] = imm8;
+ i.operands++;
+ /* If VexW1 is set, the first operand is the source and
+ the second operand is encoded in the immediate operand. */
+ if (i.tm.opcode_modifier.vexw1)
+ {
+ source = 0;
+ reg = 1;
+ }
+ else
+ {
+ source = 1;
+ reg = 0;
+ }
+ assert (operand_type_equal (&i.tm.operand_types[reg], &regxmm)
+ || operand_type_equal (&i.tm.operand_types[reg],
+ &regymm));
+ exp->X_op = O_constant;
+ exp->X_add_number
+ = ((i.op[reg].regs->reg_num
+ + ((i.op[reg].regs->reg_flags & RegRex) ? 8 : 0)) << 4);
+ }
+ else
+ {
+ unsigned int imm;
+
+ if (i.tm.opcode_modifier.vexw0)
+ {
+ /* If VexW0 is set, the third operand is the source and
+ the second operand is encoded in the immediate
+ operand. */
+ source = 2;
+ reg = 1;
+ }
+ else
+ {
+ /* VexW1 is set, the second operand is the source and
+ the third operand is encoded in the immediate
+ operand. */
+ source = 1;
+ reg = 2;
+ }
+
+ if (i.tm.opcode_modifier.immext)
+ {
+ /* When ImmExt is set, the immdiate byte is the last
+ operand. */
+ imm = i.operands - 1;
+ source--;
+ reg--;
+ }
+ else
+ {
+ imm = 0;
+
+ /* Turn on Imm8 so that output_imm will generate it. */
+ i.types[imm].bitfield.imm8 = 1;
+ }
+
+ assert (operand_type_equal (&i.tm.operand_types[reg], &regxmm)
+ || operand_type_equal (&i.tm.operand_types[reg],
+ &regymm));
+ i.op[imm].imms->X_add_number
+ |= ((i.op[reg].regs->reg_num
+ + ((i.op[reg].regs->reg_flags & RegRex) ? 8 : 0)) << 4);
+ }
+ }
+ else
+ source = dest = 0;
/* SSE5 4 operand instructions are encoded in such a way that one of
the inputs must match the destination register. Process_drex hides
@@ -4688,11 +5104,18 @@ build_modrm_byte (void)
}
/* i.reg_operands MUST be the number of real register operands;
- implicit registers do not count. */
- else if (i.reg_operands == 2)
+ implicit registers do not count. If there are 3 register
+ operands, it must be a instruction with VexNDS. For a
+ instruction with VexNDD, the destination register is encoded
+ in VEX prefix. If there are 4 register operands, it must be
+ a instruction with VEX prefix and 3 sources. */
+ else if (i.mem_operands == 0
+ && ((i.reg_operands == 2
+ && !i.tm.opcode_modifier.vexndd)
+ || (i.reg_operands == 3
+ && i.tm.opcode_modifier.vexnds)
+ || (i.reg_operands == 4 && vex_3_sources)))
{
- unsigned int source, dest;
-
switch (i.operands)
{
case 2:
@@ -4701,10 +5124,12 @@ build_modrm_byte (void)
case 3:
/* When there are 3 operands, one of them may be immediate,
which may be the first or the last operand. Otherwise,
- the first operand must be shift count register (cl). */
+ the first operand must be shift count register (cl) or it
+ is an instruction with VexNDS. */
assert (i.imm_operands == 1
|| (i.imm_operands == 0
- && i.types[0].bitfield.shiftcount));
+ && (i.tm.opcode_modifier.vexnds
+ || i.types[0].bitfield.shiftcount)));
if (operand_type_check (i.types[0], imm)
|| i.types[0].bitfield.shiftcount)
source = 1;
@@ -4714,17 +5139,53 @@ build_modrm_byte (void)
case 4:
/* When there are 4 operands, the first two must be 8bit
immediate operands. The source operand will be the 3rd
- one. */
- assert (i.imm_operands == 2
- && i.types[0].bitfield.imm8
- && i.types[1].bitfield.imm8);
- source = 2;
+ one.
+
+ For instructions with VexNDS, if the first operand
+ an imm8, the source operand is the 2nd one. If the last
+ operand is imm8, the source operand is the first one. */
+ assert ((i.imm_operands == 2
+ && i.types[0].bitfield.imm8
+ && i.types[1].bitfield.imm8)
+ || (i.tm.opcode_modifier.vexnds
+ && i.imm_operands == 1
+ && (i.types[0].bitfield.imm8
+ || i.types[i.operands - 1].bitfield.imm8)));
+ if (i.tm.opcode_modifier.vexnds)
+ {
+ if (i.types[0].bitfield.imm8)
+ source = 1;
+ else
+ source = 0;
+ }
+ else
+ source = 2;
+ break;
+ case 5:
break;
default:
abort ();
}
- dest = source + 1;
+ if (!vex_3_sources)
+ {
+ dest = source + 1;
+
+ if (i.tm.opcode_modifier.vexnds)
+ {
+ /* For instructions with VexNDS, the register-only
+ source operand must be XMM or YMM register. It is
+ encoded in VEX prefix. */
+ if ((dest + 1) >= i.operands
+ || (!operand_type_equal (&i.tm.operand_types[dest],
+ &regxmm)
+ && !operand_type_equal (&i.tm.operand_types[dest],
+ &regymm)))
+ abort ();
+ i.vex.register_specifier = i.op[dest].regs;
+ dest++;
+ }
+ }
i.rm.mode = 3;
/* One of the register operands will be encoded in the i.tm.reg
@@ -4763,6 +5224,8 @@ build_modrm_byte (void)
}
else
{ /* If it's not 2 reg operands... */
+ unsigned int mem;
+
if (i.mem_operands)
{
unsigned int fake_zero_displacement = 0;
@@ -4966,7 +5429,11 @@ build_modrm_byte (void)
exp->X_add_symbol = (symbolS *) 0;
exp->X_op_symbol = (symbolS *) 0;
}
+
+ mem = op;
}
+ else
+ mem = ~0;
/* Fill in i.rm.reg or i.rm.regmem field with register operand
(if any) based on i.tm.extension_opcode. Again, we must be
@@ -4989,6 +5456,8 @@ build_modrm_byte (void)
}
else
{
+ unsigned int vex_reg = ~0;
+
for (op = 0; op < i.operands; op++)
if (i.types[op].bitfield.reg8
|| i.types[op].bitfield.reg16
@@ -4996,6 +5465,7 @@ build_modrm_byte (void)
|| i.types[op].bitfield.reg64
|| i.types[op].bitfield.regmmx
|| i.types[op].bitfield.regxmm
+ || i.types[op].bitfield.regymm
|| i.types[op].bitfield.sreg2
|| i.types[op].bitfield.sreg3
|| i.types[op].bitfield.control
@@ -5003,7 +5473,48 @@ build_modrm_byte (void)
|| i.types[op].bitfield.test)
break;
- assert (op < i.operands);
+ if (vex_3_sources)
+ op = dest;
+ else if (i.tm.opcode_modifier.vexnds)
+ {
+ /* For instructions with VexNDS, the register-only
+ source operand is encoded in VEX prefix. */
+ assert (mem != (unsigned int) ~0);
+
+ if (op > mem)
+ {
+ vex_reg = op++;
+ assert (op < i.operands);
+ }
+ else
+ {
+ vex_reg = op + 1;
+ assert (vex_reg < i.operands);
+ }
+ }
+ else if (i.tm.opcode_modifier.vexndd)
+ {
+ /* For instructions with VexNDD, there should be
+ no memory operand and the register destination
+ is encoded in VEX prefix. */
+ assert (i.mem_operands == 0
+ && (op + 2) == i.operands);
+ vex_reg = op + 1;
+ }
+ else
+ assert (op < i.operands);
+
+ if (vex_reg != (unsigned int) ~0)
+ {
+ assert (i.reg_operands == 2);
+
+ if (!operand_type_equal (&i.tm.operand_types[vex_reg],
+ & regxmm)
+ && !operand_type_equal (&i.tm.operand_types[vex_reg],
+ &regymm))
+ abort ();
+ i.vex.register_specifier = i.op[vex_reg].regs;
+ }
/* If there is an extension opcode to put here, the
register number must be put into the regmem field. */
@@ -5276,40 +5787,71 @@ output_insn (void)
unsigned int j;
unsigned int prefix;
- switch (i.tm.opcode_length)
+ /* Since the VEX prefix contains the implicit prefix, we don't
+ need the explicit prefix. */
+ if (!i.tm.opcode_modifier.vex)
{
- case 3:
- if (i.tm.base_opcode & 0xff000000)
- {
- prefix = (i.tm.base_opcode >> 24) & 0xff;
- goto check_prefix;
- }
- break;
- case 2:
- if ((i.tm.base_opcode & 0xff0000) != 0)
+ switch (i.tm.opcode_length)
{
- prefix = (i.tm.base_opcode >> 16) & 0xff;
- if (i.tm.cpu_flags.bitfield.cpupadlock)
+ case 3:
+ if (i.tm.base_opcode & 0xff000000)
{
+ prefix = (i.tm.base_opcode >> 24) & 0xff;
+ goto check_prefix;
+ }
+ break;
+ case 2:
+ if ((i.tm.base_opcode & 0xff0000) != 0)
+ {
+ prefix = (i.tm.base_opcode >> 16) & 0xff;
+ if (i.tm.cpu_flags.bitfield.cpupadlock)
+ {
check_prefix:
- if (prefix != REPE_PREFIX_OPCODE
- || i.prefix[LOCKREP_PREFIX] != REPE_PREFIX_OPCODE)
+ if (prefix != REPE_PREFIX_OPCODE
+ || (i.prefix[LOCKREP_PREFIX]
+ != REPE_PREFIX_OPCODE))
+ add_prefix (prefix);
+ }
+ else
add_prefix (prefix);
}
- else
- add_prefix (prefix);
+ break;
+ case 1:
+ break;
+ default:
+ abort ();
}
- break;
- case 1:
- break;
- default:
- abort ();
+
+ /* The prefix bytes. */
+ for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
+ if (*q)
+ FRAG_APPEND_1_CHAR (*q);
}
- /* The prefix bytes. */
- for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
- if (*q)
- FRAG_APPEND_1_CHAR (*q);
+ if (i.tm.opcode_modifier.vex)
+ {
+ for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
+ if (*q)
+ switch (j)
+ {
+ case REX_PREFIX:
+ /* REX byte is encoded in VEX prefix. */
+ break;
+ case SEG_PREFIX:
+ case ADDR_PREFIX:
+ FRAG_APPEND_1_CHAR (*q);
+ break;
+ default:
+ /* There should be no other prefixes for instructions
+ with VEX prefix. */
+ abort ();
+ }
+
+ /* Now the VEX prefix. */
+ p = frag_more (i.vex.length);
+ for (j = 0; j < i.vex.length; j++)
+ p[j] = i.vex.bytes[j];
+ }
/* Now the opcode; be careful about word order here! */
if (i.tm.opcode_length == 1)
@@ -7250,6 +7792,7 @@ const char *md_shortopts = "qn";
#define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
#define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
#define OPTION_MOLD_GCC (OPTION_MD_BASE + 9)
+#define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
struct option md_longopts[] =
{
@@ -7265,6 +7808,7 @@ struct option md_longopts[] =
{"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
{"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
{"mold-gcc", no_argument, NULL, OPTION_MOLD_GCC},
+ {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
{NULL, no_argument, NULL, 0}
};
size_t md_longopts_size = sizeof (md_longopts);
@@ -7454,6 +7998,10 @@ md_parse_option (int c, char *arg)
old_gcc = 1;
break;
+ case OPTION_MSSE2AVX:
+ sse2avx = 1;
+ break;
+
default:
return 0;
}
@@ -7497,8 +8045,8 @@ md_show_usage (stream)
generic32, generic64\n\
EXTENSION is combination of:\n\
mmx, sse, sse2, sse3, ssse3, sse4.1, sse4.2, sse4,\n\
- vmx, smx, xsave, 3dnow, 3dnowa, sse4a, sse5, svme,\n\
- abm, padlock\n"));
+ avx, vmx, smx, xsave, aes, clmul, fma, 3dnow,\n\
+ 3dnowa, sse4a, sse5, svme, abm, padlock\n"));
fprintf (stream, _("\
-mtune=CPU optimize for CPU, CPU is one of:\n\
i8086, i186, i286, i386, i486, pentium, pentiumpro,\n\
@@ -7506,6 +8054,8 @@ md_show_usage (stream)
core, core2, k6, k6_2, athlon, k8, amdfam10,\n\
generic32, generic64\n"));
fprintf (stream, _("\
+ -msse2avx encode SSE instructions with VEX prefix\n"));
+ fprintf (stream, _("\
-mmnemonic=[att|intel] use AT&T/Intel mnemonic\n"));
fprintf (stream, _("\
-msyntax=[att|intel] use AT&T/Intel syntax\n"));
@@ -7943,7 +8493,7 @@ tc_gen_reloc (section, fixp)
constant digits [[ radixOverride ]]
- dataType BYTE | WORD | DWORD | FWORD | QWORD | TBYTE | OWORD | XMMWORD
+ dataType BYTE | WORD | DWORD | FWORD | QWORD | TBYTE | OWORD | XMMWORD | YMMWORD
digits decdigit
| digits decdigit
@@ -8059,6 +8609,7 @@ tc_gen_reloc (section, fixp)
| TBYTE
| OWORD
| XMMWORD
+ | YMMWORD
| .
| $
| register
@@ -8111,6 +8662,7 @@ static struct intel_token cur_token, prev_token;
#define T_ID 13
#define T_SHL 14
#define T_SHR 15
+#define T_YMMWORD 16
/* Prototypes for intel parser functions. */
static int intel_match_token (int);
@@ -8516,6 +9068,12 @@ intel_e09 (void)
i.types[this_operand].bitfield.xmmword = 1;
}
+ else if (prev_token.code == T_YMMWORD)
+ {
+ suffix = YMMWORD_MNEM_SUFFIX;
+ i.types[this_operand].bitfield.ymmword = 1;
+ }
+
else
{
as_bad (_("Unknown operand modifier `%s'"), prev_token.str);
@@ -8699,6 +9257,7 @@ intel_e10 (void)
| TBYTE
| OWORD
| XMMWORD
+ | YMMWORD
| $
| .
| register
@@ -8894,7 +9453,8 @@ intel_e11 (void)
| QWORD
| TBYTE
| OWORD
- | XMMWORD */
+ | XMMWORD
+ | YMMWORD */
case T_BYTE:
case T_WORD:
case T_DWORD:
@@ -8902,6 +9462,7 @@ intel_e11 (void)
case T_QWORD:
case T_TBYTE:
case T_XMMWORD:
+ case T_YMMWORD:
intel_match_token (cur_token.code);
if (cur_token.code == T_PTR)
@@ -9167,6 +9728,9 @@ intel_get_token (void)
|| strcasecmp (new_token.str, "OWORD") == 0)
new_token.code = T_XMMWORD;
+ else if (strcasecmp (new_token.str, "YMMWORD") == 0)
+ new_token.code = T_YMMWORD;
+
else if (strcasecmp (new_token.str, "PTR") == 0)
new_token.code = T_PTR;