4 files changed, 144 insertions, 33 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c54f216..56ad96d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,30 @@
 2019-10-16  Richard Sandiford  <richard.sandiford@arm.com>
 
+	* genmodes.c (mode_data::order): New field.
+	(blank_mode): Update accordingly.
+	(VECTOR_MODES_WITH_PREFIX): Add an order parameter.
+	(make_vector_modes): Likewise.
+	(VECTOR_MODES): Update use accordingly.
+	(cmp_modes): Sort by the new order field ahead of sorting by size.
+	* config/aarch64/aarch64-modes.def (VNx2QI, VN2xHI, VNx2SI)
+	(VNx4QI, VNx4HI, VNx8QI): New partial vector modes.
+	* config/aarch64/aarch64.c (VEC_PARTIAL): New flag value.
+	(aarch64_classify_vector_mode): Handle the new partial modes.
+	(aarch64_vl_bytes): New function.
+	(aarch64_hard_regno_nregs): Use it instead of BYTES_PER_SVE_VECTOR
+	when counting the number of registers in an SVE mode.
+	(aarch64_class_max_nregs): Likewise.
+	(aarch64_hard_regno_mode_ok): Don't allow partial vectors
+	in registers yet.
+	(aarch64_classify_address): Treat partial vectors analogously
+	to full vectors.
+	(aarch64_print_address_internal): Consolidate the printing of
+	MUL VL addresses, using aarch64_vl_bytes as the number of
+	bytes represented by "VL".
+	(aarch64_vector_mode_supported_p): Reject partial vector modes.
+
+2019-10-16  Richard Sandiford  <richard.sandiford@arm.com>
+
 	* config/aarch64/aarch64.c (aarch64_layout_frame): Use is_constant
 	rather than known_lt when choosing frame layouts.
 
diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index e034ffc..a9b1bce 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -82,8 +82,8 @@ INT_MODE (XI, 64);
    strictly necessary to set the alignment here, since the default would
    be clamped to BIGGEST_ALIGNMENT anyhow, but it seems clearer.  */
 #define SVE_MODES(NVECS, VB, VH, VS, VD) \
-  VECTOR_MODES_WITH_PREFIX (VNx, INT, 16 * NVECS); \
-  VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 16 * NVECS); \
+  VECTOR_MODES_WITH_PREFIX (VNx, INT, 16 * NVECS, 0); \
+  VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 16 * NVECS, 0); \
   \
   ADJUST_NUNITS (VB##QI, aarch64_sve_vg * NVECS * 8); \
   ADJUST_NUNITS (VH##HI, aarch64_sve_vg * NVECS * 4); \
@@ -108,6 +108,40 @@ SVE_MODES (2, VNx32, VNx16, VNx8, VNx4)
 SVE_MODES (3, VNx48, VNx24, VNx12, VNx6)
 SVE_MODES (4, VNx64, VNx32, VNx16, VNx8)
 
+/* Partial SVE vectors:
+
+      VNx2QI VNx4QI VNx8QI
+      VNx2HI VNx4HI
+      VNx2SI
+
+   In memory they occupy contiguous locations, in the same way as fixed-length
+   vectors.  E.g. VNx8QImode is half the size of VNx16QImode.
+
+   Passing 1 as the final argument ensures that the modes come after all
+   other modes in the GET_MODE_WIDER chain, so that we never pick them
+   in preference to a full vector mode.  */
+VECTOR_MODES_WITH_PREFIX (VNx, INT, 2, 1);
+VECTOR_MODES_WITH_PREFIX (VNx, INT, 4, 1);
+VECTOR_MODES_WITH_PREFIX (VNx, INT, 8, 1);
+
+ADJUST_NUNITS (VNx2QI, aarch64_sve_vg);
+ADJUST_NUNITS (VNx2HI, aarch64_sve_vg);
+ADJUST_NUNITS (VNx2SI, aarch64_sve_vg);
+
+ADJUST_NUNITS (VNx4QI, aarch64_sve_vg * 2);
+ADJUST_NUNITS (VNx4HI, aarch64_sve_vg * 2);
+
+ADJUST_NUNITS (VNx8QI, aarch64_sve_vg * 4);
+
+ADJUST_ALIGNMENT (VNx2QI, 1);
+ADJUST_ALIGNMENT (VNx4QI, 1);
+ADJUST_ALIGNMENT (VNx8QI, 1);
+
+ADJUST_ALIGNMENT (VNx2HI, 2);
+ADJUST_ALIGNMENT (VNx4HI, 2);
+
+ADJUST_ALIGNMENT (VNx2SI, 4);
+
 /* Quad float: 128-bit floating mode for long doubles.  */
 FLOAT_MODE (TF, 16, ieee_quad_format);
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0537e7f..1f0e74a 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1538,6 +1538,9 @@ const unsigned int VEC_SVE_PRED = 4;
 /* Can be used in combination with VEC_ADVSIMD or VEC_SVE_DATA to indicate
    a structure of 2, 3 or 4 vectors.  */
 const unsigned int VEC_STRUCT   = 8;
+/* Can be used in combination with VEC_SVE_DATA to indicate that the
+   vector has fewer significant bytes than a full SVE vector.  */
+const unsigned int VEC_PARTIAL  = 16;
 /* Useful combinations of the above.  */
 const unsigned int VEC_ANY_SVE  = VEC_SVE_DATA | VEC_SVE_PRED;
 const unsigned int VEC_ANY_DATA = VEC_ADVSIMD | VEC_SVE_DATA;
@@ -1558,7 +1561,17 @@ aarch64_classify_vector_mode (machine_mode mode)
      of -msve-vector-bits.  */
   switch (mode)
     {
-    /* Single SVE vectors.  */
+    /* Partial SVE QI vectors.  */
+    case E_VNx2QImode:
+    case E_VNx4QImode:
+    case E_VNx8QImode:
+    /* Partial SVE HI vectors.  */
+    case E_VNx2HImode:
+    case E_VNx4HImode:
+    /* Partial SVE SI vector.  */
+    case E_VNx2SImode:
+      return TARGET_SVE ? VEC_SVE_DATA | VEC_PARTIAL : 0;
+
     case E_VNx16QImode:
     case E_VNx8HImode:
     case E_VNx4SImode:
@@ -1641,6 +1654,24 @@ aarch64_sve_data_mode_p (machine_mode mode)
   return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA;
 }
 
+/* Return the number of defined bytes in one constituent vector of
+   SVE mode MODE, which has vector flags VEC_FLAGS.  */
+static poly_int64
+aarch64_vl_bytes (machine_mode mode, unsigned int vec_flags)
+{
+  if (vec_flags & VEC_PARTIAL)
+    /* A single partial vector.  */
+    return GET_MODE_SIZE (mode);
+
+  if (vec_flags & VEC_SVE_DATA)
+    /* A single vector or a tuple.  */
+    return BYTES_PER_SVE_VECTOR;
+
+  /* A single predicate.  */
+  gcc_assert (vec_flags & VEC_SVE_PRED);
+  return BYTES_PER_SVE_PRED;
+}
+
 /* Implement target hook TARGET_ARRAY_MODE.  */
 static opt_machine_mode
 aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
@@ -1769,10 +1800,13 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
     case FP_REGS:
     case FP_LO_REGS:
     case FP_LO8_REGS:
-      if (aarch64_sve_data_mode_p (mode))
-	return exact_div (GET_MODE_SIZE (mode),
-			  BYTES_PER_SVE_VECTOR).to_constant ();
-      return CEIL (lowest_size, UNITS_PER_VREG);
+      {
+	unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+	if (vec_flags & VEC_SVE_DATA)
+	  return exact_div (GET_MODE_SIZE (mode),
+			    aarch64_vl_bytes (mode, vec_flags)).to_constant ();
+	return CEIL (lowest_size, UNITS_PER_VREG);
+      }
     case PR_REGS:
     case PR_LO_REGS:
     case PR_HI_REGS:
@@ -1796,6 +1830,11 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
     return mode == DImode;
 
   unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+  /* At the moment, partial vector modes are only useful for memory
+     references, but that could change in future.  */
+  if (vec_flags & VEC_PARTIAL)
+    return false;
+
   if (vec_flags & VEC_SVE_PRED)
     return PR_REGNUM_P (regno);
 
@@ -7441,9 +7480,15 @@ aarch64_classify_address (struct aarch64_address_info *info,
 
   HOST_WIDE_INT const_size;
 
+  /* Whether a vector mode is partial doesn't affect address legitimacy.
+     Partial vectors like VNx8QImode allow the same indexed addressing
+     mode and MUL VL addressing mode as full vectors like VNx16QImode;
+     in both cases, MUL VL counts multiples of GET_MODE_SIZE.  */
+  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+  vec_flags &= ~VEC_PARTIAL;
+
   /* On BE, we use load/store pair for all large int mode load/stores.
      TI/TFmode may also use a load/store pair.  */
-  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
   bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
   bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
 			    || type == ADDR_QUERY_LDP_STP_N
@@ -8948,7 +8993,7 @@ aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
 				aarch64_addr_query_type type)
 {
   struct aarch64_address_info addr;
-  unsigned int size;
+  unsigned int size, vec_flags;
 
   /* Check all addresses are Pmode - including ILP32.  */
   if (GET_MODE (x) != Pmode
@@ -8964,26 +9009,24 @@ aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
       {
       case ADDRESS_REG_IMM:
 	if (known_eq (addr.const_offset, 0))
-	  asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
-	else if (aarch64_sve_data_mode_p (mode))
 	  {
-	    HOST_WIDE_INT vnum
-	      = exact_div (addr.const_offset,
-			   BYTES_PER_SVE_VECTOR).to_constant ();
-	    asm_fprintf (f, "[%s, #%wd, mul vl]",
-			 reg_names[REGNO (addr.base)], vnum);
+	    asm_fprintf (f, "[%s]", reg_names[REGNO (addr.base)]);
+	    return true;
 	  }
-	else if (aarch64_sve_pred_mode_p (mode))
+
+	vec_flags = aarch64_classify_vector_mode (mode);
+	if (vec_flags & VEC_ANY_SVE)
 	  {
 	    HOST_WIDE_INT vnum
 	      = exact_div (addr.const_offset,
-			   BYTES_PER_SVE_PRED).to_constant ();
+			   aarch64_vl_bytes (mode, vec_flags)).to_constant ();
 	    asm_fprintf (f, "[%s, #%wd, mul vl]",
 			 reg_names[REGNO (addr.base)], vnum);
+	    return true;
 	  }
-	else
-	  asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
-		       INTVAL (addr.offset));
+
+	asm_fprintf (f, "[%s, %wd]", reg_names[REGNO (addr.base)],
+		     INTVAL (addr.offset));
 	return true;
 
       case ADDRESS_REG_REG:
@@ -9395,7 +9438,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
      can hold MODE, but at the moment we need to handle all modes.
      Just ignore any runtime parts for registers that can't store them.  */
   HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
-  unsigned int nregs;
+  unsigned int nregs, vec_flags;
   switch (regclass)
     {
     case TAILCALL_ADDR_REGS:
@@ -9406,11 +9449,12 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
     case FP_REGS:
     case FP_LO_REGS:
     case FP_LO8_REGS:
-      if (aarch64_sve_data_mode_p (mode)
+      vec_flags = aarch64_classify_vector_mode (mode);
+      if ((vec_flags & VEC_SVE_DATA)
 	  && constant_multiple_p (GET_MODE_SIZE (mode),
-				  BYTES_PER_SVE_VECTOR, &nregs))
+				  aarch64_vl_bytes (mode, vec_flags), &nregs))
 	return nregs;
-      return (aarch64_vector_data_mode_p (mode)
+      return (vec_flags & VEC_ADVSIMD
 	      ? CEIL (lowest_size, UNITS_PER_VREG)
 	      : CEIL (lowest_size, UNITS_PER_WORD));
     case STACK_REG:
@@ -15057,7 +15101,7 @@ static bool
 aarch64_vector_mode_supported_p (machine_mode mode)
 {
   unsigned int vec_flags = aarch64_classify_vector_mode (mode);
-  return vec_flags != 0 && (vec_flags & VEC_STRUCT) == 0;
+  return vec_flags != 0 && (vec_flags & (VEC_STRUCT | VEC_PARTIAL)) == 0;
 }
 
 /* Return the full-width SVE vector mode for element mode MODE, if one
diff --git a/gcc/genmodes.c b/gcc/genmodes.c
index f33eefa..95522d6 100644
--- a/gcc/genmodes.c
+++ b/gcc/genmodes.c
@@ -53,6 +53,7 @@ struct mode_data
 
   const char *name;		/* printable mode name -- SI, not SImode */
   enum mode_class cl;		/* this mode class */
+  unsigned int order;		/* top-level sorting order */
   unsigned int precision;	/* size in bits, equiv to TYPE_PRECISION */
   unsigned int bytesize;	/* storage size in addressable units */
   unsigned int ncomponents;	/* number of subunits */
@@ -85,7 +86,7 @@ static struct mode_data *void_mode;
 
 static const struct mode_data blank_mode = {
   0, "<unknown>", MAX_MODE_CLASS,
-  -1U, -1U, -1U, -1U,
+  0, -1U, -1U, -1U, -1U,
   0, 0, 0, 0, 0, 0,
   "<unknown>", 0, 0, 0, 0, false, false, 0
 };
@@ -484,14 +485,15 @@ make_complex_modes (enum mode_class cl,
     }
 }
 
-/* For all modes in class CL, construct vector modes of width
-   WIDTH, having as many components as necessary.  */
-#define VECTOR_MODES_WITH_PREFIX(PREFIX, C, W) \
-  make_vector_modes (MODE_##C, #PREFIX, W, __FILE__, __LINE__)
-#define VECTOR_MODES(C, W) VECTOR_MODES_WITH_PREFIX (V, C, W)
+/* For all modes in class CL, construct vector modes of width WIDTH,
+   having as many components as necessary.  ORDER is the sorting order
+   of the mode, with smaller numbers indicating a higher priority.  */
+#define VECTOR_MODES_WITH_PREFIX(PREFIX, C, W, ORDER) \
+  make_vector_modes (MODE_##C, #PREFIX, W, ORDER, __FILE__, __LINE__)
+#define VECTOR_MODES(C, W) VECTOR_MODES_WITH_PREFIX (V, C, W, 0)
 static void ATTRIBUTE_UNUSED
 make_vector_modes (enum mode_class cl, const char *prefix, unsigned int width,
-		   const char *file, unsigned int line)
+		   unsigned int order, const char *file, unsigned int line)
 {
   struct mode_data *m;
   struct mode_data *v;
@@ -530,6 +532,7 @@ make_vector_modes (enum mode_class cl, const char *prefix, unsigned int width,
 	}
 
       v = new_mode (vclass, xstrdup (buf), file, line);
+      v->order = order;
       v->component = m;
       v->ncomponents = ncomponents;
     }
@@ -832,6 +835,11 @@ cmp_modes (const void *a, const void *b)
   const struct mode_data *const m = *(const struct mode_data *const*)a;
   const struct mode_data *const n = *(const struct mode_data *const*)b;
 
+  if (m->order > n->order)
+    return 1;
+  else if (m->order < n->order)
+    return -1;
+
   if (m->bytesize > n->bytesize)
     return 1;
   else if (m->bytesize < n->bytesize)