x86: fold certain VEX and EVEX templates

In anticipation of APX introduce logic to reduce the number of templates we have now, allowing to limit some the number of ones we then need to gain. The fundamental requirements are that - attributes be compatible, which specifically means VexW needs to be the same in the templates (which often isn't the case, for VEX encodings having far more WIG tha, EVEX ones), - the EVEX form being AVX512F (with or without AVX512VL), not any of its extensions (the same will then be required for APX - it'll need to be APX_F). Note that in check_register() there's now a redundant zmm check. Since this logic will need revisiting for APX anyway, I'd like to keep it that way for now. (Similarly a couple of if()-s which could be folded are kept separate, to reduce code churn when adding APX support.)
author: Jan Beulich <jbeulich@suse.com> 2023-09-27 14:15:19 +0200
committer: Jan Beulich <jbeulich@suse.com> 2023-09-27 14:15:19 +0200
commit: a6f3add0027e42f78b2f1366a7b4cdee418987ce (patch)
tree: fd5c830ddb8e3edf176b7b239f5f9f3b1abe0971 /gas
parent: f586e3409b752748bf213520c2dbb0b44e0005d8 (diff)
download: gdb-a6f3add0027e42f78b2f1366a7b4cdee418987ce.zip
gdb-a6f3add0027e42f78b2f1366a7b4cdee418987ce.tar.gz
gdb-a6f3add0027e42f78b2f1366a7b4cdee418987ce.tar.bz2
2 files changed, 102 insertions, 5 deletions
diff --git a/gas/config/tc-i386-intel.c b/gas/config/tc-i386-intel.c
index 68bf615..f1d995f 100644
--- a/gas/config/tc-i386-intel.c
+++ b/gas/config/tc-i386-intel.c
@@ -209,6 +209,11 @@ operatorT i386_operator (const char *name, unsigned int operands, char *pc)
 	      || i386_types[j].sz[0] > 8
 	      || (i386_types[j].sz[0] & (i386_types[j].sz[0] - 1)))
 	    return O_illegal;
+	  if (i.vec_encoding == vex_encoding_default)
+	    i.vec_encoding = vex_encoding_evex;
+	  else if (i.vec_encoding != vex_encoding_evex
+		   && i.vec_encoding != vex_encoding_evex512)
+	    return O_illegal;
 	  if (!i.broadcast.bytes && !i.broadcast.type)
 	    {
 	      i.broadcast.bytes = i386_types[j].sz[0];
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 5df9fde..8e5a16e 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -436,6 +436,7 @@ struct _i386_insn
 	vex_encoding_vex,
 	vex_encoding_vex3,
 	vex_encoding_evex,
+	vex_encoding_evex512,
 	vex_encoding_error
       } vec_encoding;
 
@@ -1872,6 +1873,13 @@ cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
 
 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
 
+static INLINE bool need_evex_encoding (void)
+{
+  return i.vec_encoding == vex_encoding_evex
+	|| i.vec_encoding == vex_encoding_evex512
+	|| i.mask.reg;
+}
+
 #define CPU_FLAGS_ARCH_MATCH		0x1
 #define CPU_FLAGS_64BIT_MATCH		0x2
 
@@ -1899,6 +1907,29 @@ cpu_flags_match (const insn_template *t)
       /* This instruction is available only on some archs.  */
       i386_cpu_flags cpu = cpu_arch_flags;
 
+      /* Dual VEX/EVEX templates may need stripping of one of the flags.  */
+      if (t->opcode_modifier.vex && t->opcode_modifier.evex)
+	{
+	  /* Dual AVX/AVX512F templates need to retain AVX512F only if we already
+	     know that EVEX encoding will be needed.  */
+	  if ((x.bitfield.cpuavx || x.bitfield.cpuavx2)
+	      && x.bitfield.cpuavx512f)
+	    {
+	      if (need_evex_encoding ())
+		{
+		  x.bitfield.cpuavx = 0;
+		  x.bitfield.cpuavx2 = 0;
+		}
+	      /* need_evex_encoding() isn't reliable before operands were
+		 parsed.  */
+	      else if (i.operands)
+		{
+		  x.bitfield.cpuavx512f = 0;
+		  x.bitfield.cpuavx512vl = 0;
+		}
+	    }
+	}
+
       /* AVX512VL is no standalone feature - match it and then strip it.  */
       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
 	return match;
@@ -1924,6 +1955,8 @@ cpu_flags_match (const insn_template *t)
 		  && (!x.bitfield.cpupclmulqdq || cpu.bitfield.cpupclmulqdq))
 		match |= CPU_FLAGS_ARCH_MATCH;
 	    }
+	  else if (x.bitfield.cpuavx2 && cpu.bitfield.cpuavx2)
+	    match |= CPU_FLAGS_ARCH_MATCH;
 	  else if (x.bitfield.cpuavx512f)
 	    {
 	      /* We need to check a few extra flags with AVX512F.  */
@@ -3646,6 +3679,27 @@ install_template (const insn_template *t)
 
   i.tm = *t;
 
+  /* Dual VEX/EVEX templates need stripping one of the possible variants.  */
+  if (t->opcode_modifier.vex && t->opcode_modifier.evex)
+  {
+      if ((is_cpu (t, CpuAVX) || is_cpu (t, CpuAVX2))
+	  && is_cpu (t, CpuAVX512F))
+	{
+	  if (need_evex_encoding ())
+	    {
+	      i.tm.opcode_modifier.vex = 0;
+	      i.tm.cpu.bitfield.cpuavx = 0;
+	      if (is_cpu (&i.tm, CpuAVX2))
+	        i.tm.cpu.bitfield.isa = 0;
+	    }
+	  else
+	    {
+	      i.tm.opcode_modifier.evex = 0;
+	      i.tm.cpu.bitfield.cpuavx512f = 0;
+	    }
+	}
+  }
+
   /* Note that for pseudo prefixes this produces a length of 1. But for them
      the length isn't interesting at all.  */
   for (l = 1; l < 4; ++l)
@@ -4553,6 +4607,8 @@ optimize_encoding (void)
 	      i.tm.opcode_modifier.vex = VEX128;
 	      i.tm.opcode_modifier.vexw = VEXW0;
 	      i.tm.opcode_modifier.evex = 0;
+	      i.vec_encoding = vex_encoding_vex;
+	      i.mask.reg = NULL;
 	    }
 	  else if (optimize > 1)
 	    i.tm.opcode_modifier.evex = EVEX128;
@@ -5438,6 +5494,11 @@ md_assemble (char *line)
   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
     optimize_encoding ();
 
+  /* Past optimization there's no need to distinguish vex_encoding_evex and
+     vex_encoding_evex512 anymore.  */
+  if (i.vec_encoding == vex_encoding_evex512)
+    i.vec_encoding = vex_encoding_evex;
+
   if (use_unaligned_vector_move)
     encode_with_unaligned_vector_move ();
 
@@ -5467,6 +5528,7 @@ md_assemble (char *line)
 	  if (i.tm.operand_types[j].bitfield.tmmword)
 	    i.xstate |= xstate_tmm;
 	  else if (i.tm.operand_types[j].bitfield.zmmword
+		   && !i.tm.opcode_modifier.vex
 		   && vector_size >= VSZ512)
 	    i.xstate |= xstate_zmm;
 	  else if (i.tm.operand_types[j].bitfield.ymmword
@@ -6474,7 +6536,8 @@ check_VecOperands (const insn_template *t)
   cpu = cpu_flags_and (cpu_flags_from_attr (t->cpu), avx512);
   if (!cpu_flags_all_zero (&cpu)
       && !is_cpu (t, CpuAVX512VL)
-      && !cpu_arch_flags.bitfield.cpuavx512vl)
+      && !cpu_arch_flags.bitfield.cpuavx512vl
+      && (!t->opcode_modifier.vex || need_evex_encoding ()))
     {
       for (op = 0; op < t->operands; ++op)
 	{
@@ -6785,6 +6848,8 @@ check_VecOperands (const insn_template *t)
 
   /* Check vector Disp8 operand.  */
   if (t->opcode_modifier.disp8memshift
+      && (!t->opcode_modifier.vex
+          || need_evex_encoding ())
       && i.disp_encoding <= disp_encoding_8bit)
     {
       if (i.broadcast.type || i.broadcast.bytes)
@@ -6880,7 +6945,8 @@ VEX_check_encoding (const insn_template *t)
       return 1;
     }
 
-  if (i.vec_encoding == vex_encoding_evex)
+  if (i.vec_encoding == vex_encoding_evex
+      || i.vec_encoding == vex_encoding_evex512)
     {
       /* This instruction must be encoded with EVEX prefix.  */
       if (!is_evex_encoding (t))
@@ -11219,6 +11285,10 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
 	  goto done;
 	}
 
+      /* No need to distinguish vex_encoding_evex and vex_encoding_evex512.  */
+      if (i.vec_encoding == vex_encoding_evex512)
+	i.vec_encoding = vex_encoding_evex;
+
       /* Are we to emit ModR/M encoding?  */
       if (!i.short_form
 	  && (i.mem_operands
@@ -11642,6 +11712,12 @@ RC_SAE_specifier (const char *pstr)
 	      return NULL;
 	    }
 
+	  if (i.vec_encoding == vex_encoding_default)
+	    i.vec_encoding = vex_encoding_evex512;
+	  else if (i.vec_encoding != vex_encoding_evex
+		   && i.vec_encoding != vex_encoding_evex512)
+	    return NULL;
+
 	  i.rounding.type = RC_NamesTable[j].type;
 
 	  return (char *)(pstr + RC_NamesTable[j].len);
@@ -11701,6 +11777,12 @@ check_VecOperations (char *op_string)
 		}
 	      op_string++;
 
+	      if (i.vec_encoding == vex_encoding_default)
+		i.vec_encoding = vex_encoding_evex;
+	      else if (i.vec_encoding != vex_encoding_evex
+		       && i.vec_encoding != vex_encoding_evex512)
+		goto unknown_vec_op;
+
 	      i.broadcast.type = bcst_type;
 	      i.broadcast.operand = this_operand;
 
@@ -13962,8 +14044,17 @@ static bool check_register (const reg_entry *r)
 	}
     }
 
-  if (vector_size < VSZ512 && r->reg_type.bitfield.zmmword)
-    return false;
+  if (r->reg_type.bitfield.zmmword)
+    {
+      if (vector_size < VSZ512)
+	return false;
+
+      if (i.vec_encoding == vex_encoding_default)
+	i.vec_encoding = vex_encoding_evex512;
+      else if (i.vec_encoding != vex_encoding_evex
+	       && i.vec_encoding != vex_encoding_evex512)
+	i.vec_encoding = vex_encoding_error;
+    }
 
   if (vector_size < VSZ256 && r->reg_type.bitfield.ymmword)
     return false;
@@ -13988,7 +14079,8 @@ static bool check_register (const reg_entry *r)
 	  || flag_code != CODE_64BIT)
 	return false;
 
-      if (i.vec_encoding == vex_encoding_default)
+      if (i.vec_encoding == vex_encoding_default
+	  || i.vec_encoding == vex_encoding_evex512)
 	i.vec_encoding = vex_encoding_evex;
       else if (i.vec_encoding != vex_encoding_evex)
 	i.vec_encoding = vex_encoding_error;
author	Jan Beulich <jbeulich@suse.com>	2023-09-27 14:15:19 +0200
committer	Jan Beulich <jbeulich@suse.com>	2023-09-27 14:15:19 +0200
commit	a6f3add0027e42f78b2f1366a7b4cdee418987ce (patch)
tree	fd5c830ddb8e3edf176b7b239f5f9f3b1abe0971 /gas
parent	f586e3409b752748bf213520c2dbb0b44e0005d8 (diff)
download	gdb-a6f3add0027e42f78b2f1366a7b4cdee418987ce.zip gdb-a6f3add0027e42f78b2f1366a7b4cdee418987ce.tar.gz gdb-a6f3add0027e42f78b2f1366a7b4cdee418987ce.tar.bz2