diff options
author | Jan Beulich <jbeulich@novell.com> | 2018-04-26 08:55:02 +0200 |
---|---|---|
committer | Jan Beulich <jbeulich@suse.com> | 2018-04-26 08:55:02 +0200 |
commit | e2195274d4a0752459ea89ffbf50b2704fb0c0b4 (patch) | |
tree | 5220d4b7d06406fe4278798d5d149fd08a253a78 /gas | |
parent | 7a69eac330adff3913a8698eac450cc7968ba8b0 (diff) | |
download | binutils-e2195274d4a0752459ea89ffbf50b2704fb0c0b4.zip binutils-e2195274d4a0752459ea89ffbf50b2704fb0c0b4.tar.gz binutils-e2195274d4a0752459ea89ffbf50b2704fb0c0b4.tar.bz2 |
x86: fold various non-memory operand AVX512VL templates
There's little point carrying up to three templates per insn flavor
when the sole difference is operand size and the dependency on AVX512VL
being enabled. Instead the need for AVX512VL can be derived from an
operand allowing for ZMMword as well as one or both or XMMword and
YMMword (irrespective of whether this is a register or memory operand).
Without further abstraction to deal with the different Disp8MemShift
values between the templates, only a limited set (mostly ones only
allowing for non-memory operands) can be folded, which is being done
here.
Also drop IgnoreSize wherever possible from anything that's being
touched anyway.
Diffstat (limited to 'gas')
-rw-r--r-- | gas/ChangeLog | 7 | ||||
-rw-r--r-- | gas/config/tc-i386.c | 71 |
2 files changed, 61 insertions, 17 deletions
diff --git a/gas/ChangeLog b/gas/ChangeLog index b0c6efc..4801b59 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,5 +1,12 @@ 2018-04-26 Jan Beulich <jbeulich@suse.com> + * config/tc-i386.c (check_VecOperands): Add AVX512VL check. Set + .baseindex. + (match_template): Don't set suffix_check when Intel syntax and + broadcast. Make check_register a per-operand bitmap. + +2018-04-26 Jan Beulich <jbeulich@suse.com> + * config/tc-i386.c (optimize_encoding): Check for zeroing masking. * testsuite/gas/i386/optimize-1.d, diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 6e0d1dd..cd53fa46 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -4966,6 +4966,30 @@ static int check_VecOperands (const insn_template *t) { unsigned int op; + i386_cpu_flags cpu; + static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS; + + /* Templates allowing for ZMMword as well as YMMword and/or XMMword for + any one operand are implicity requiring AVX512VL support if the actual + operand size is YMMword or XMMword. Since this function runs after + template matching, there's no need to check for YMMword/XMMword in + the template. */ + cpu = cpu_flags_and (t->cpu_flags, avx512); + if (!cpu_flags_all_zero (&cpu) + && !t->cpu_flags.bitfield.cpuavx512vl + && !cpu_arch_flags.bitfield.cpuavx512vl) + { + for (op = 0; op < t->operands; ++op) + { + if (t->operand_types[op].bitfield.zmmword + && (i.types[op].bitfield.ymmword + || i.types[op].bitfield.xmmword)) + { + i.error = unsupported; + return 1; + } + } + } /* Without VSIB byte, we can't have a vector register for index. */ if (!t->opcode_modifier.vecsib @@ -5095,6 +5119,7 @@ check_VecOperands (const insn_template *t) { unsigned int j; + type.bitfield.baseindex = 1; for (j = 0; j < i.operands; ++j) { if (j != op @@ -5263,7 +5288,9 @@ match_template (char mnem_suffix) addr_prefix_disp = -1; memset (&suffix_check, 0, sizeof (suffix_check)); - if (i.suffix == BYTE_MNEM_SUFFIX) + if (intel_syntax && i.broadcast) + /* nothing */; + else if (i.suffix == BYTE_MNEM_SUFFIX) suffix_check.no_bsuf = 1; else if (i.suffix == WORD_MNEM_SUFFIX) suffix_check.no_wsuf = 1; @@ -5431,7 +5458,15 @@ match_template (char mnem_suffix) continue; /* We check register size if needed. */ - check_register = t->opcode_modifier.checkregsize; + if (t->opcode_modifier.checkregsize) + { + check_register = (1 << t->operands) - 1; + if (i.broadcast) + check_register &= ~(1 << i.broadcast->operand); + } + else + check_register = 0; + overlap0 = operand_type_and (i.types[0], operand_types[0]); switch (t->operands) { @@ -5475,7 +5510,7 @@ match_template (char mnem_suffix) overlap1 = operand_type_and (i.types[1], operand_types[1]); if (!operand_type_match (overlap0, i.types[0]) || !operand_type_match (overlap1, i.types[1]) - || (check_register + || ((check_register & 3) == 3 && !operand_type_register_match (i.types[0], operand_types[0], i.types[1], @@ -5542,30 +5577,32 @@ check_reverse: /* Fall through. */ case 4: if (!operand_type_match (overlap3, i.types[3]) - || (check_register - && (!operand_type_register_match (i.types[1], + || ((check_register & 0xa) == 0xa + && !operand_type_register_match (i.types[1], operand_types[1], i.types[3], - operand_types[3]) - || !operand_type_register_match (i.types[2], - operand_types[2], - i.types[3], - operand_types[3])))) + operand_types[3])) + || ((check_register & 0xc) == 0xc + && !operand_type_register_match (i.types[2], + operand_types[2], + i.types[3], + operand_types[3]))) continue; /* Fall through. */ case 3: /* Here we make use of the fact that there are no reverse match 3 operand instructions. */ if (!operand_type_match (overlap2, i.types[2]) - || (check_register - && (!operand_type_register_match (i.types[0], + || ((check_register & 5) == 5 + && !operand_type_register_match (i.types[0], operand_types[0], i.types[2], - operand_types[2]) - || !operand_type_register_match (i.types[1], - operand_types[1], - i.types[2], - operand_types[2])))) + operand_types[2])) + || ((check_register & 6) == 6 + && !operand_type_register_match (i.types[1], + operand_types[1], + i.types[2], + operand_types[2]))) continue; break; } |