aboutsummaryrefslogtreecommitdiff
path: root/gas
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@novell.com>2018-04-26 08:55:02 +0200
committerJan Beulich <jbeulich@suse.com>2018-04-26 08:55:02 +0200
commite2195274d4a0752459ea89ffbf50b2704fb0c0b4 (patch)
tree5220d4b7d06406fe4278798d5d149fd08a253a78 /gas
parent7a69eac330adff3913a8698eac450cc7968ba8b0 (diff)
downloadbinutils-e2195274d4a0752459ea89ffbf50b2704fb0c0b4.zip
binutils-e2195274d4a0752459ea89ffbf50b2704fb0c0b4.tar.gz
binutils-e2195274d4a0752459ea89ffbf50b2704fb0c0b4.tar.bz2
x86: fold various non-memory operand AVX512VL templates
There's little point carrying up to three templates per insn flavor when the sole difference is operand size and the dependency on AVX512VL being enabled. Instead the need for AVX512VL can be derived from an operand allowing for ZMMword as well as one or both or XMMword and YMMword (irrespective of whether this is a register or memory operand). Without further abstraction to deal with the different Disp8MemShift values between the templates, only a limited set (mostly ones only allowing for non-memory operands) can be folded, which is being done here. Also drop IgnoreSize wherever possible from anything that's being touched anyway.
Diffstat (limited to 'gas')
-rw-r--r--gas/ChangeLog7
-rw-r--r--gas/config/tc-i386.c71
2 files changed, 61 insertions, 17 deletions
diff --git a/gas/ChangeLog b/gas/ChangeLog
index b0c6efc..4801b59 100644
--- a/gas/ChangeLog
+++ b/gas/ChangeLog
@@ -1,5 +1,12 @@
2018-04-26 Jan Beulich <jbeulich@suse.com>
+ * config/tc-i386.c (check_VecOperands): Add AVX512VL check. Set
+ .baseindex.
+ (match_template): Don't set suffix_check when Intel syntax and
+ broadcast. Make check_register a per-operand bitmap.
+
+2018-04-26 Jan Beulich <jbeulich@suse.com>
+
* config/tc-i386.c (optimize_encoding): Check for zeroing
masking.
* testsuite/gas/i386/optimize-1.d,
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 6e0d1dd..cd53fa46 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -4966,6 +4966,30 @@ static int
check_VecOperands (const insn_template *t)
{
unsigned int op;
+ i386_cpu_flags cpu;
+ static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
+
+ /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
+ any one operand are implicity requiring AVX512VL support if the actual
+ operand size is YMMword or XMMword. Since this function runs after
+ template matching, there's no need to check for YMMword/XMMword in
+ the template. */
+ cpu = cpu_flags_and (t->cpu_flags, avx512);
+ if (!cpu_flags_all_zero (&cpu)
+ && !t->cpu_flags.bitfield.cpuavx512vl
+ && !cpu_arch_flags.bitfield.cpuavx512vl)
+ {
+ for (op = 0; op < t->operands; ++op)
+ {
+ if (t->operand_types[op].bitfield.zmmword
+ && (i.types[op].bitfield.ymmword
+ || i.types[op].bitfield.xmmword))
+ {
+ i.error = unsupported;
+ return 1;
+ }
+ }
+ }
/* Without VSIB byte, we can't have a vector register for index. */
if (!t->opcode_modifier.vecsib
@@ -5095,6 +5119,7 @@ check_VecOperands (const insn_template *t)
{
unsigned int j;
+ type.bitfield.baseindex = 1;
for (j = 0; j < i.operands; ++j)
{
if (j != op
@@ -5263,7 +5288,9 @@ match_template (char mnem_suffix)
addr_prefix_disp = -1;
memset (&suffix_check, 0, sizeof (suffix_check));
- if (i.suffix == BYTE_MNEM_SUFFIX)
+ if (intel_syntax && i.broadcast)
+ /* nothing */;
+ else if (i.suffix == BYTE_MNEM_SUFFIX)
suffix_check.no_bsuf = 1;
else if (i.suffix == WORD_MNEM_SUFFIX)
suffix_check.no_wsuf = 1;
@@ -5431,7 +5458,15 @@ match_template (char mnem_suffix)
continue;
/* We check register size if needed. */
- check_register = t->opcode_modifier.checkregsize;
+ if (t->opcode_modifier.checkregsize)
+ {
+ check_register = (1 << t->operands) - 1;
+ if (i.broadcast)
+ check_register &= ~(1 << i.broadcast->operand);
+ }
+ else
+ check_register = 0;
+
overlap0 = operand_type_and (i.types[0], operand_types[0]);
switch (t->operands)
{
@@ -5475,7 +5510,7 @@ match_template (char mnem_suffix)
overlap1 = operand_type_and (i.types[1], operand_types[1]);
if (!operand_type_match (overlap0, i.types[0])
|| !operand_type_match (overlap1, i.types[1])
- || (check_register
+ || ((check_register & 3) == 3
&& !operand_type_register_match (i.types[0],
operand_types[0],
i.types[1],
@@ -5542,30 +5577,32 @@ check_reverse:
/* Fall through. */
case 4:
if (!operand_type_match (overlap3, i.types[3])
- || (check_register
- && (!operand_type_register_match (i.types[1],
+ || ((check_register & 0xa) == 0xa
+ && !operand_type_register_match (i.types[1],
operand_types[1],
i.types[3],
- operand_types[3])
- || !operand_type_register_match (i.types[2],
- operand_types[2],
- i.types[3],
- operand_types[3]))))
+ operand_types[3]))
+ || ((check_register & 0xc) == 0xc
+ && !operand_type_register_match (i.types[2],
+ operand_types[2],
+ i.types[3],
+ operand_types[3])))
continue;
/* Fall through. */
case 3:
/* Here we make use of the fact that there are no
reverse match 3 operand instructions. */
if (!operand_type_match (overlap2, i.types[2])
- || (check_register
- && (!operand_type_register_match (i.types[0],
+ || ((check_register & 5) == 5
+ && !operand_type_register_match (i.types[0],
operand_types[0],
i.types[2],
- operand_types[2])
- || !operand_type_register_match (i.types[1],
- operand_types[1],
- i.types[2],
- operand_types[2]))))
+ operand_types[2]))
+ || ((check_register & 6) == 6
+ && !operand_type_register_match (i.types[1],
+ operand_types[1],
+ i.types[2],
+ operand_types[2])))
continue;
break;
}