arm: [MVE intrinsics] Add support for contiguous loads and stores

This patch adds base support for load/store intrinsics to the framework, starting with loads and stores for contiguous memory elements, without extension nor truncation. Compared to the aarch64/SVE implementation, there's no support for gather/scatter loads/stores yet. This will be added later as needed. 2023-11-16 Christophe Lyon <christophe.lyon@linaro.org> gcc/ * config/arm/arm-mve-builtins-functions.h (multi_vector_function) (full_width_access): New classes. * config/arm/arm-mve-builtins.cc (find_type_suffix_for_scalar_type, infer_pointer_type) (require_pointer_type, get_contiguous_base, add_mem_operand) (add_fixed_operand, use_contiguous_load_insn) (use_contiguous_store_insn): New. * config/arm/arm-mve-builtins.h (memory_vector_mode) (infer_pointer_type, require_pointer_type, get_contiguous_base) (add_mem_operand) (add_fixed_operand, use_contiguous_load_insn) (use_contiguous_store_insn): New.
author: Christophe Lyon <christophe.lyon@linaro.org> 2023-11-15 08:12:35 +0000
committer: Christophe Lyon <christophe.lyon@linaro.org> 2023-11-20 11:23:56 +0000
commit: 0c2037d9d93a8f768cb11698ff794278246bb31f (patch)
tree: bc4df8f2989afbedf41a2aa0b4cb4a274ebb250b /gcc
parent: 524c892e642e87da853d2a9d0314dd6c220f59de (diff)
download: gcc-0c2037d9d93a8f768cb11698ff794278246bb31f.zip
gcc-0c2037d9d93a8f768cb11698ff794278246bb31f.tar.gz
gcc-0c2037d9d93a8f768cb11698ff794278246bb31f.tar.bz2
3 files changed, 199 insertions, 1 deletions
diff --git a/gcc/config/arm/arm-mve-builtins-functions.h b/gcc/config/arm/arm-mve-builtins-functions.h
index eba1f07..6d234a2 100644
--- a/gcc/config/arm/arm-mve-builtins-functions.h
+++ b/gcc/config/arm/arm-mve-builtins-functions.h
@@ -966,6 +966,62 @@ public:
   }
 };
 
+/* A function_base that sometimes or always operates on tuples of
+   vectors.  */
+class multi_vector_function : public function_base
+{
+public:
+  CONSTEXPR multi_vector_function (unsigned int vectors_per_tuple)
+    : m_vectors_per_tuple (vectors_per_tuple) {}
+
+  unsigned int
+  vectors_per_tuple () const override
+  {
+    return m_vectors_per_tuple;
+  }
+
+  /* The number of vectors in a tuple, or 1 if the function only operates
+     on single vectors.  */
+  unsigned int m_vectors_per_tuple;
+};
+
+/* A function_base that loads or stores contiguous memory elements
+   without extending or truncating them.  */
+class full_width_access : public multi_vector_function
+{
+public:
+  CONSTEXPR full_width_access (unsigned int vectors_per_tuple = 1)
+    : multi_vector_function (vectors_per_tuple) {}
+
+  tree
+  memory_scalar_type (const function_instance &fi) const override
+  {
+    return fi.scalar_type (0);
+  }
+
+  machine_mode
+  memory_vector_mode (const function_instance &fi) const override
+  {
+    machine_mode mode = fi.vector_mode (0);
+    /* Vectors of floating-point are managed in memory as vectors of
+       integers.  */
+    switch (mode)
+      {
+      case E_V4SFmode:
+	mode = E_V4SImode;
+	break;
+      case E_V8HFmode:
+	mode = E_V8HImode;
+	break;
+      }
+
+    if (m_vectors_per_tuple != 1)
+      mode = targetm.array_mode (mode, m_vectors_per_tuple).require ();
+
+    return mode;
+  }
+};
+
 } /* end namespace arm_mve */
 
 /* Declare the global function base NAME, creating it from an instance
diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
index 02dc8fa..a265cb0 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -36,6 +36,7 @@
 #include "fold-const.h"
 #include "gimple.h"
 #include "gimple-iterator.h"
+#include "explow.h"
 #include "emit-rtl.h"
 #include "langhooks.h"
 #include "stringpool.h"
@@ -529,6 +530,22 @@ matches_type_p (const_tree model_type, const_tree candidate)
 	  && TYPE_MAIN_VARIANT (model_type) == TYPE_MAIN_VARIANT (candidate));
 }
 
+/* If TYPE is a valid MVE element type, return the corresponding type
+   suffix, otherwise return NUM_TYPE_SUFFIXES.  */
+static type_suffix_index
+find_type_suffix_for_scalar_type (const_tree type)
+{
+  /* A linear search should be OK here, since the code isn't hot and
+     the number of types is only small.  */
+  for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i)
+      {
+	vector_type_index vector_i = type_suffixes[suffix_i].vector_type;
+	if (matches_type_p (scalar_types[vector_i], type))
+	  return type_suffix_index (suffix_i);
+      }
+  return NUM_TYPE_SUFFIXES;
+}
+
 /* Report an error against LOCATION that the user has tried to use
    a floating point function when the mve.fp extension is disabled.  */
 static void
@@ -1125,6 +1142,37 @@ function_resolver::resolve_to (mode_suffix_index mode,
   return res;
 }
 
+/* Require argument ARGNO to be a pointer to a scalar type that has a
+   corresponding type suffix.  Return that type suffix on success,
+   otherwise report an error and return NUM_TYPE_SUFFIXES.  */
+type_suffix_index
+function_resolver::infer_pointer_type (unsigned int argno)
+{
+  tree actual = get_argument_type (argno);
+  if (actual == error_mark_node)
+    return NUM_TYPE_SUFFIXES;
+
+  if (TREE_CODE (actual) != POINTER_TYPE)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+		" expects a pointer type", actual, argno + 1, fndecl);
+      return NUM_TYPE_SUFFIXES;
+    }
+
+  tree target = TREE_TYPE (actual);
+  type_suffix_index type = find_type_suffix_for_scalar_type (target);
+  if (type == NUM_TYPE_SUFFIXES)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, but %qT is not"
+		" a valid MVE element type", actual, argno + 1, fndecl,
+		build_qualified_type (target, 0));
+      return NUM_TYPE_SUFFIXES;
+    }
+  unsigned int bits = type_suffixes[type].element_bits;
+
+  return type;
+}
+
 /* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS
    vectors; NUM_VECTORS is 1 for the former.  Return the associated type
    suffix on success, using TYPE_SUFFIX_b for predicates.  Report an error
@@ -1498,6 +1546,22 @@ function_resolver::require_scalar_type (unsigned int argno,
   return true;
 }
 
+/* Require argument ARGNO to be some form of pointer, without being specific
+   about its target type.  Return true if the argument has the right form,
+   otherwise report an appropriate error.  */
+bool
+function_resolver::require_pointer_type (unsigned int argno)
+{
+  if (!scalar_argument_p (argno))
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+		" expects a scalar pointer", get_argument_type (argno),
+		argno + 1, fndecl);
+      return false;
+    }
+  return true;
+}
+
 /* Require the function to have exactly EXPECTED arguments.  Return true
    if it does, otherwise report an appropriate error.  */
 bool
@@ -1955,6 +2019,14 @@ function_expander::direct_optab_handler (optab op, unsigned int suffix_i)
   return ::direct_optab_handler (op, vector_mode (suffix_i));
 }
 
+/* Return the base address for a contiguous load or store
+   function.  */
+rtx
+function_expander::get_contiguous_base ()
+{
+  return args[0];
+}
+
 /* For a function that does the equivalent of:
 
      OUTPUT = COND ? FN (INPUTS) : FALLBACK;
@@ -2043,6 +2115,26 @@ function_expander::add_integer_operand (HOST_WIDE_INT x)
   create_integer_operand (&m_ops.last (), x);
 }
 
+/* Add a memory operand with mode MODE and address ADDR.  */
+void
+function_expander::add_mem_operand (machine_mode mode, rtx addr)
+{
+  gcc_assert (VECTOR_MODE_P (mode));
+  rtx mem = gen_rtx_MEM (mode, memory_address (mode, addr));
+  /* The memory is only guaranteed to be element-aligned.  */
+  set_mem_align (mem, GET_MODE_ALIGNMENT (GET_MODE_INNER (mode)));
+  add_fixed_operand (mem);
+}
+
+/* Add an operand that must be X.  The only way of legitimizing an
+   invalid X is to reload the address of a MEM.  */
+void
+function_expander::add_fixed_operand (rtx x)
+{
+  m_ops.safe_grow (m_ops.length () + 1, true);
+  create_fixed_operand (&m_ops.last (), x);
+}
+
 /* Generate instruction ICODE, given that its operands have already
    been added to M_OPS.  Return the value of the first operand.  */
 rtx
@@ -2137,6 +2229,30 @@ function_expander::use_cond_insn (insn_code icode, unsigned int merge_argno)
   return generate_insn (icode);
 }
 
+/* Implement the call using instruction ICODE, which loads memory operand 1
+   into register operand 0.  */
+rtx
+function_expander::use_contiguous_load_insn (insn_code icode)
+{
+  machine_mode mem_mode = memory_vector_mode ();
+
+  add_output_operand (icode);
+  add_mem_operand (mem_mode, get_contiguous_base ());
+  return generate_insn (icode);
+}
+
+/* Implement the call using instruction ICODE, which stores register operand 1
+   into memory operand 0.  */
+rtx
+function_expander::use_contiguous_store_insn (insn_code icode)
+{
+  machine_mode mem_mode = memory_vector_mode ();
+
+  add_mem_operand (mem_mode, get_contiguous_base ());
+  add_input_operand (icode, args[1]);
+  return generate_insn (icode);
+}
+
 /* Implement the call using a normal unpredicated optab for PRED_none.
 
    <optab> corresponds to:
diff --git a/gcc/config/arm/arm-mve-builtins.h b/gcc/config/arm/arm-mve-builtins.h
index 4fd230f..9c219fa 100644
--- a/gcc/config/arm/arm-mve-builtins.h
+++ b/gcc/config/arm/arm-mve-builtins.h
@@ -278,6 +278,7 @@ public:
 
   unsigned int vectors_per_tuple () const;
   tree memory_scalar_type () const;
+  machine_mode memory_vector_mode () const;
 
   const mode_suffix_info &mode_suffix () const;
 
@@ -383,6 +384,7 @@ public:
 		   type_suffix_index = NUM_TYPE_SUFFIXES,
 		   type_suffix_index = NUM_TYPE_SUFFIXES);
 
+  type_suffix_index infer_pointer_type (unsigned int);
   type_suffix_index infer_vector_or_tuple_type (unsigned int, unsigned int);
   type_suffix_index infer_vector_type (unsigned int);
 
@@ -394,8 +396,9 @@ public:
 				    type_suffix_index,
 				    type_class_index = SAME_TYPE_CLASS,
 				    unsigned int = SAME_SIZE);
-  bool require_integer_immediate (unsigned int);
   bool require_scalar_type (unsigned int, const char *);
+  bool require_pointer_type (unsigned int);
+  bool require_integer_immediate (unsigned int);
   bool require_derived_scalar_type (unsigned int, type_class_index,
 				    unsigned int = SAME_SIZE);
   
@@ -476,18 +479,23 @@ public:
 
   insn_code direct_optab_handler (optab, unsigned int = 0);
 
+  rtx get_contiguous_base ();
   rtx get_fallback_value (machine_mode, unsigned int, unsigned int &);
   rtx get_reg_target ();
 
   void add_output_operand (insn_code);
   void add_input_operand (insn_code, rtx);
   void add_integer_operand (HOST_WIDE_INT);
+  void add_mem_operand (machine_mode, rtx);
+  void add_fixed_operand (rtx);
   rtx generate_insn (insn_code);
 
   rtx use_exact_insn (insn_code);
   rtx use_unpred_insn (insn_code);
   rtx use_pred_x_insn (insn_code);
   rtx use_cond_insn (insn_code, unsigned int = DEFAULT_MERGE_ARGNO);
+  rtx use_contiguous_load_insn (insn_code);
+  rtx use_contiguous_store_insn (insn_code);
 
   rtx map_to_rtx_codes (rtx_code, rtx_code, rtx_code);
 
@@ -528,6 +536,15 @@ public:
     gcc_unreachable ();
   }
 
+  /* If the function addresses memory, return a vector mode whose
+     GET_MODE_NUNITS is the number of elements addressed and whose
+     GET_MODE_INNER is the mode of a single scalar memory element.  */
+  virtual machine_mode
+  memory_vector_mode (const function_instance &) const
+  {
+    gcc_unreachable ();
+  }
+
   /* Try to fold the given gimple call.  Return the new gimple statement
      on success, otherwise return null.  */
   virtual gimple *fold (gimple_folder &) const { return NULL; }
@@ -661,6 +678,15 @@ function_instance::memory_scalar_type () const
   return base->memory_scalar_type (*this);
 }
 
+/* If the function addresses memory, return a vector mode whose
+   GET_MODE_NUNITS is the number of elements addressed and whose
+   GET_MODE_INNER is the mode of a single scalar memory element.  */
+inline machine_mode
+function_instance::memory_vector_mode () const
+{
+  return base->memory_vector_mode (*this);
+}
+
 /* Return information about the function's mode suffix.  */
 inline const mode_suffix_info &
 function_instance::mode_suffix () const
author	Christophe Lyon <christophe.lyon@linaro.org>	2023-11-15 08:12:35 +0000
committer	Christophe Lyon <christophe.lyon@linaro.org>	2023-11-20 11:23:56 +0000
commit	0c2037d9d93a8f768cb11698ff794278246bb31f (patch)
tree	bc4df8f2989afbedf41a2aa0b4cb4a274ebb250b /gcc
parent	524c892e642e87da853d2a9d0314dd6c220f59de (diff)
download	gcc-0c2037d9d93a8f768cb11698ff794278246bb31f.zip gcc-0c2037d9d93a8f768cb11698ff794278246bb31f.tar.gz gcc-0c2037d9d93a8f768cb11698ff794278246bb31f.tar.bz2