15 files changed, 2355 insertions, 17 deletions
diff --git a/gcc/d/intrinsics.cc b/gcc/d/intrinsics.cc
index 0dd5543..454d940 100644
--- a/gcc/d/intrinsics.cc
+++ b/gcc/d/intrinsics.cc
@@ -29,9 +29,12 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm.h"
 #include "function.h"
 #include "tree.h"
+#include "diagnostic.h"
+#include "langhooks.h"
 #include "fold-const.h"
 #include "stringpool.h"
 #include "builtins.h"
+#include "vec-perm-indices.h"
 
 #include "d-tree.h"
 
@@ -161,6 +164,16 @@ maybe_set_intrinsic (FuncDeclaration *decl)
 	    case INTRINSIC_MULUL:
 	    case INTRINSIC_NEGS:
 	    case INTRINSIC_NEGSL:
+	    case INTRINSIC_LOADUNALIGNED:
+	    case INTRINSIC_STOREUNALIGNED:
+	    case INTRINSIC_SHUFFLE:
+	    case INTRINSIC_SHUFFLEVECTOR:
+	    case INTRINSIC_CONVERTVECTOR:
+	    case INTRINSIC_BLENDVECTOR:
+	    case INTRINSIC_EQUALMASK:
+	    case INTRINSIC_NOTEQUALMASK:
+	    case INTRINSIC_GREATERMASK:
+	    case INTRINSIC_GREATEREQUALMASK:
 	    case INTRINSIC_VLOAD8:
 	    case INTRINSIC_VLOAD16:
 	    case INTRINSIC_VLOAD32:
@@ -169,6 +182,8 @@ maybe_set_intrinsic (FuncDeclaration *decl)
 	    case INTRINSIC_VSTORE16:
 	    case INTRINSIC_VSTORE32:
 	    case INTRINSIC_VSTORE64:
+	      /* Cannot interpret function during CTFE.  If the library
+		 provides a definition, its body will be used instead.  */
 	      break;
 
 	    case INTRINSIC_POW:
@@ -196,6 +211,314 @@ maybe_set_intrinsic (FuncDeclaration *decl)
     }
 }
 
+/* Helper function for maybe_warn_intrinsic_mismatch.  Issue warning about
+   mismatch in the EXPECTED return type in call to the intrinsic function in
+   CALLEXP, and return TRUE.  */
+
+static bool
+warn_mismatched_return_type (tree callexp, const char *expected)
+{
+  warning_at (EXPR_LOCATION (callexp), OPT_Wbuiltin_declaration_mismatch,
+	      "mismatch in return type of intrinsic function %qD "
+	      "(%qT, should be %qs)", get_callee_fndecl (callexp),
+	      TREE_TYPE (callexp), expected);
+  return true;
+}
+
+/* Helper function for maybe_warn_intrinsic_mismatch.  Issue warning or error
+   about mismatch in the EXPECTED argument type at ARGNO in call to the
+   intrinsic function in CALLEXP, and return TRUE.  */
+
+static bool
+warn_mismatched_argument (tree callexp, unsigned argno, const char *expected)
+{
+  warning_at (EXPR_LOCATION (callexp), OPT_Wbuiltin_declaration_mismatch,
+	      "mismatch in argument %u type of intrinsic function %qD "
+	      "(%qT, should be %qs)", argno + 1, get_callee_fndecl (callexp),
+	      TREE_TYPE (CALL_EXPR_ARG (callexp, argno)), expected);
+  return true;
+}
+
+static bool
+warn_mismatched_argument (tree callexp, unsigned argno, tree expected,
+			  bool error_p = false)
+{
+  if (error_p)
+    error_at (EXPR_LOCATION (callexp),
+	      "mismatch in argument %u type of intrinsic function %qD "
+	      "(%qT, should be %qT)", argno + 1, get_callee_fndecl (callexp),
+	      TREE_TYPE (CALL_EXPR_ARG (callexp, argno)), expected);
+  else
+    warning_at (EXPR_LOCATION (callexp), OPT_Wbuiltin_declaration_mismatch,
+		"mismatch in argument %u type of intrinsic function %qD "
+		"(%qT, should be %qT)", argno + 1, get_callee_fndecl (callexp),
+		TREE_TYPE (CALL_EXPR_ARG (callexp, argno)), expected);
+
+  return true;
+}
+
+/* Helper function for maybe_warn_intrinsic_mismatch.  Builds a vector integer
+   type suitable for the mask argument of INTRINSIC_SHUFFLE from the given
+   input argument TYPE.  */
+
+static tree
+build_shuffle_mask_type (tree type)
+{
+  const unsigned bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type)));
+  const int unsignedp = TYPE_UNSIGNED (TREE_TYPE (type));
+  tree inner = lang_hooks.types.type_for_size (bits, unsignedp);
+  gcc_assert (inner && TREE_CODE (inner) == INTEGER_TYPE);
+
+  /* %% Get the front-end type for the vector so the D type will be
+     printed (this should really be handled by a D tree printer).  */
+  Type *t = build_frontend_type (inner);
+  gcc_assert (t != NULL);
+  unsigned HOST_WIDE_INT nunits;
+  TYPE_VECTOR_SUBPARTS (type).is_constant (&nunits);
+
+  return build_ctype (TypeVector::create (t->sarrayOf (nunits)));
+}
+
+/* Checks if call to intrinsic FUNCTION in CALLEXP matches the internal
+   type and value constraints that we expect from the library definitions.
+   Returns TRUE and issues a warning if there is a mismatch.
+
+   Note: The return type and parameters are encoded into the signature `deco'
+   string that we match on in maybe_set_intrinsic(), so if the deco mangle
+   string has 'i' in the part that specifies the return type, then the matched
+   intrinsic will always have the return type `int'.
+
+   For templated intrinsics however, we rely on template constraints to ensure
+   that the generic type matches what we expect it to be.  There is still an
+   enforced relationship between a template argument and its instantiated type.
+   For example: `T func(T)(T*)' would have the generic return type `@1T' and
+   generic parameter type `@1PT', so it can be assumed that if the return type
+   matches what we expect then all parameters are fine as well.  Otherwise it
+   can be assumed that some internal_error has occurred for this to be the case.
+   Where a templated intrinsic has multiple template arguments, each generic
+   type will need to be checked for its validity.  */
+
+static bool
+maybe_warn_intrinsic_mismatch (tree function, tree callexp)
+{
+  switch (DECL_INTRINSIC_CODE (function))
+    {
+    case INTRINSIC_NONE:
+    default:
+      return false;
+
+    case INTRINSIC_LOADUNALIGNED:
+      {
+	/* Expects the signature:
+	   vector(T) loadUnaligned (vector(T)*);  */
+	gcc_assert (call_expr_nargs (callexp) == 1);
+
+	tree ptr = TREE_TYPE (CALL_EXPR_ARG (callexp, 0));
+	if (!VECTOR_TYPE_P (TREE_TYPE (callexp))
+	    || !POINTER_TYPE_P (ptr) || !VECTOR_TYPE_P (TREE_TYPE (ptr)))
+	  return warn_mismatched_return_type (callexp, "__vector(T)");
+
+	return false;
+      }
+
+    case INTRINSIC_STOREUNALIGNED:
+      {
+	/* Expects the signature:
+	   vector(T) storeUnaligned (vector(T)*, vector(T));  */
+	gcc_assert (call_expr_nargs (callexp) == 2);
+
+	tree ptr = TREE_TYPE (CALL_EXPR_ARG (callexp, 0));
+	tree val = TREE_TYPE (CALL_EXPR_ARG (callexp, 1));
+	if (!VECTOR_TYPE_P (TREE_TYPE (callexp))
+	    || !POINTER_TYPE_P (ptr) || !VECTOR_TYPE_P (TREE_TYPE (ptr))
+	    || !VECTOR_TYPE_P (val))
+	  return warn_mismatched_return_type (callexp, "__vector(T)");
+
+	return false;
+      }
+
+    case INTRINSIC_SHUFFLE:
+    case INTRINSIC_BLENDVECTOR:
+      {
+	/* Expects the signature:
+	   vector(T) shuffle (vector(T), vector(U), vector(V));
+	   vector(T) blendvector (vector(T), vector(U), vector(V));  */
+	gcc_assert (call_expr_nargs (callexp) == 3);
+
+	tree vec0 = TREE_TYPE (CALL_EXPR_ARG (callexp, 0));
+	if (!VECTOR_TYPE_P (TREE_TYPE (callexp))
+	    || !VECTOR_TYPE_P (vec0))
+	  return warn_mismatched_return_type (callexp, "__vector(T)");
+
+	tree vec1 = TREE_TYPE (CALL_EXPR_ARG (callexp, 1));
+	if (!VECTOR_TYPE_P (vec1))
+	  return warn_mismatched_argument (callexp, 1, vec0);
+
+	tree mask = TREE_TYPE (CALL_EXPR_ARG (callexp, 2));
+	if (!VECTOR_TYPE_P (mask) || !VECTOR_INTEGER_TYPE_P (mask))
+	  {
+	    tree expected = build_shuffle_mask_type (vec0);
+	    return warn_mismatched_argument (callexp, 2, expected,
+					     VECTOR_TYPE_P (mask));
+	  }
+
+	/* Types have been validated, now issue errors about violations on the
+	   constraints of the intrinsic.  */
+	if (TYPE_MAIN_VARIANT (vec0) != TYPE_MAIN_VARIANT (vec1))
+	  return warn_mismatched_argument (callexp, 1, vec0, true);
+
+	/* Vector element sizes should be equal between arguments and mask.  */
+	if (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vec0)))
+	    != GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (mask)))
+	    || maybe_ne (TYPE_VECTOR_SUBPARTS (vec0),
+			 TYPE_VECTOR_SUBPARTS (mask))
+	    || maybe_ne (TYPE_VECTOR_SUBPARTS (vec1),
+			 TYPE_VECTOR_SUBPARTS (mask)))
+	  {
+	    tree expected = build_shuffle_mask_type (vec0);
+	    return warn_mismatched_argument (callexp, 2, expected, true);
+	  }
+
+	return false;
+      }
+
+    case INTRINSIC_SHUFFLEVECTOR:
+      {
+	/* Expects the signature:
+	   vector(T[N]) shufflevector (vector(T), vector(U), N...);  */
+	gcc_assert (call_expr_nargs (callexp) >= 3);
+	gcc_assert (VECTOR_TYPE_P (TREE_TYPE (callexp)));
+
+	tree vec0 = TREE_TYPE (CALL_EXPR_ARG (callexp, 0));
+	if (!VECTOR_TYPE_P (vec0))
+	  return warn_mismatched_argument (callexp, 0, "__vector(T)");
+
+	tree vec1 = TREE_TYPE (CALL_EXPR_ARG (callexp, 1));
+	if (!VECTOR_TYPE_P (vec1))
+	  return warn_mismatched_argument (callexp, 1, vec0);
+
+	for (int i = 2; i < call_expr_nargs (callexp); i++)
+	  {
+	    tree idx = TREE_TYPE (CALL_EXPR_ARG (callexp, i));
+	    if (TREE_CODE (idx) != INTEGER_TYPE)
+	      return warn_mismatched_argument (callexp, i, d_int_type);
+	  }
+
+	/* Types have been validated, now issue errors about violations on the
+	   constraints of the intrinsic.  */
+	if (TYPE_MAIN_VARIANT (TREE_TYPE (vec0))
+	    != TYPE_MAIN_VARIANT (TREE_TYPE (vec1)))
+	  {
+	    /* %% Get the front-end type for the vector so the D type will be
+	       printed (this should really be handled by a D tree printer).  */
+	    unsigned HOST_WIDE_INT nunits;
+	    if (!TYPE_VECTOR_SUBPARTS (vec1).is_constant (&nunits))
+	      break;
+
+	    Type *inner = build_frontend_type (TREE_TYPE (vec0));
+	    Type *vector = TypeVector::create (inner->sarrayOf (nunits));
+	    return warn_mismatched_argument (callexp, 1,
+					     build_ctype (vector), true);
+	  }
+
+	/* Vector sizes should be known, and number of indices a power of 2.  */
+	unsigned HOST_WIDE_INT vec0_length;
+	unsigned HOST_WIDE_INT vec1_length;
+	if (!TYPE_VECTOR_SUBPARTS (vec0).is_constant (&vec0_length)
+	    || !TYPE_VECTOR_SUBPARTS (vec1).is_constant (&vec1_length)
+	    || !pow2p_hwi (call_expr_nargs (callexp) - 2))
+	  break;
+
+	/* All index arguments must be valid constants as well.  */
+	for (int i = 2; i < call_expr_nargs (callexp); i++)
+	  {
+	    tree idx = CALL_EXPR_ARG (callexp, i);
+	    if (!tree_fits_shwi_p (idx))
+	      {
+		error_at (EXPR_LOCATION (callexp),
+			  "argument %qE cannot be read at compile time", idx);
+		return true;
+	      }
+
+	    HOST_WIDE_INT iidx = tree_to_shwi (idx);
+	    if (iidx < 0
+		|| (unsigned HOST_WIDE_INT) iidx >= vec0_length + vec1_length)
+	      {
+		error_at (EXPR_LOCATION (callexp),
+			  "element index %qE is out of bounds %<[0 .. %E]%>",
+			  idx, build_integer_cst (vec0_length + vec1_length));
+		return true;
+	      }
+	  }
+
+	return false;
+      }
+
+    case INTRINSIC_CONVERTVECTOR:
+      {
+	/* Expects the signature:
+	   vector(T) convertvector (vector(U));  */
+	gcc_assert (call_expr_nargs (callexp) == 1);
+
+	tree ret = TREE_TYPE (callexp);
+	if (!VECTOR_TYPE_P (ret)
+	    || (!VECTOR_INTEGER_TYPE_P (ret) && !VECTOR_FLOAT_TYPE_P (ret)))
+	  return warn_mismatched_return_type (callexp, "__vector(T)");
+
+	tree arg = TREE_TYPE (CALL_EXPR_ARG (callexp, 0));
+	if (!VECTOR_TYPE_P (arg)
+	    || (!VECTOR_INTEGER_TYPE_P (arg) && !VECTOR_FLOAT_TYPE_P (arg)))
+	  return warn_mismatched_argument (callexp, 0, "__vector(T)");
+
+	/* Types have been validated, now issue errors about violations on the
+	   constraints of the intrinsic.  */
+	if (maybe_ne (TYPE_VECTOR_SUBPARTS (ret), TYPE_VECTOR_SUBPARTS (arg)))
+	  {
+	    /* %% Get the front-end type for the vector so the D type will be
+	       printed (this should really be handled by a D tree printer).  */
+	    unsigned HOST_WIDE_INT nunits;
+	    if (!TYPE_VECTOR_SUBPARTS (ret).is_constant (&nunits))
+	      break;
+
+	    Type *inner = build_frontend_type (TREE_TYPE (arg));
+	    Type *vector = TypeVector::create (inner->sarrayOf (nunits));
+	    return warn_mismatched_argument (callexp, 0,
+					     build_ctype (vector), true);
+	  }
+
+	return false;
+      }
+
+    case INTRINSIC_EQUALMASK:
+    case INTRINSIC_NOTEQUALMASK:
+    case INTRINSIC_GREATERMASK:
+    case INTRINSIC_GREATEREQUALMASK:
+      {
+	/* Expects the signature:
+	   vector(T) equalMask(vector(T), vector(T));
+	   vector(T) notEqualMask(vector(T), vector(T));
+	   vector(T) greaterMask(vector(T), vector(T));
+	   vector(T) greateOrEqualMask(vector(T), vector(T));  */
+	gcc_assert (call_expr_nargs (callexp) == 2);
+
+	tree vec0 = TREE_TYPE (CALL_EXPR_ARG (callexp, 0));
+	tree vec1 = TREE_TYPE (CALL_EXPR_ARG (callexp, 1));
+	if (!VECTOR_TYPE_P (TREE_TYPE (callexp))
+	    || !VECTOR_TYPE_P (vec0)
+	    || !VECTOR_TYPE_P (vec1)
+	    || TYPE_MAIN_VARIANT (vec0) != TYPE_MAIN_VARIANT (vec1))
+	  return warn_mismatched_return_type (callexp, "__vector(T)");
+
+	return false;
+      }
+    }
+
+  /* Generic mismatch warning if it hasn't already been handled.  */
+  warning_at (EXPR_LOCATION (callexp), OPT_Wbuiltin_declaration_mismatch,
+	      "mismatch in call of intrinsic function %qD",  function);
+  return true;
+}
+
 /* Construct a function call to the built-in function CODE, N is the number of
    arguments, and the `...' parameters are the argument expressions.
    The original call expression is held in CALLEXP.  */
@@ -750,6 +1073,231 @@ expand_volatile_store (tree callexp)
   return modify_expr (result, value);
 }
 
+/* Expand a front-end intrinsic call to a vector comparison intrinsic, which is
+   either a call to equalMask(), notEqualMask(), greaterMask(), or
+   greaterOrEqualMask().  These intrinsics take two arguments, the signature to
+   which can be either:
+
+	vector(T) equalMask(vector(T) vec0, vector(T) vec1);
+	vector(T) notEqualMask(vector(T) vec0, vector(T) vec1);
+	vector(T) greaterMask(vector(T) vec0, vector(T) vec1);
+	vector(T) greaterOrEqualMask(vector(T) vec0, vector(T) vec1);
+
+   This performs an element-wise comparison between two vectors VEC0 and VEC1,
+   returning a vector with signed integral elements.  */
+
+static tree
+expand_intrinsic_vec_cond (tree_code code, tree callexp)
+{
+  tree vec0 = CALL_EXPR_ARG (callexp, 0);
+  tree vec1 = CALL_EXPR_ARG (callexp, 1);
+  tree type = TREE_TYPE (callexp);
+
+  tree cmp = fold_build2_loc (EXPR_LOCATION (callexp), code,
+			      truth_type_for (type), vec0, vec1);
+  return fold_build3_loc (EXPR_LOCATION (callexp), VEC_COND_EXPR, type, cmp,
+			  build_minus_one_cst (type), build_zero_cst (type));
+}
+
+/* Expand a front-end instrinsic call to convertvector().  This takes one
+   argument, the signature to which is:
+
+	vector(T) convertvector (vector(F) vec);
+
+   This converts a vector VEC to TYPE by casting every element in VEC to the
+   element type of TYPE.  The original call expression is held in CALLEXP.  */
+
+static tree
+expand_intrinsic_vec_convert (tree callexp)
+{
+  tree vec = CALL_EXPR_ARG (callexp, 0);
+  tree type = TREE_TYPE (callexp);
+
+  /* Use VIEW_CONVERT for simple vector conversions.  */
+  if ((TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (vec)))
+       == TYPE_MAIN_VARIANT (TREE_TYPE (type)))
+      || (VECTOR_INTEGER_TYPE_P (TREE_TYPE (vec))
+	  && VECTOR_INTEGER_TYPE_P (type)
+	  && (TYPE_PRECISION (TREE_TYPE (TREE_TYPE (vec)))
+	      == TYPE_PRECISION (TREE_TYPE (type)))))
+    return build1_loc (EXPR_LOCATION (callexp), VIEW_CONVERT_EXPR, type, vec);
+
+  return build_call_expr_internal_loc (EXPR_LOCATION (callexp), IFN_VEC_CONVERT,
+				       type, 1, vec);
+}
+
+/* Expand a front-end instrinsic call to blendvector().  This expects to take
+   three arguments, the signature to which is:
+
+	vector(T) blendvector (vector(T) vec0, vector(U) vec1, vector(M) mask);
+
+   This builds a VEC_COND_EXPR if VEC0, VEC1, and MASK are vector types, VEC0
+   has the same type as VEC1, and the number of elements of VEC0, VEC1, and MASK
+   are the same.  The original call expression is held in CALLEXP.  */
+
+static tree
+expand_intrinsic_vec_blend (tree callexp)
+{
+  tree vec0 = CALL_EXPR_ARG (callexp, 0);
+  tree vec1 = CALL_EXPR_ARG (callexp, 1);
+  tree mask = CALL_EXPR_ARG (callexp, 2);
+
+  tree cmp = fold_build2_loc (EXPR_LOCATION (callexp), NE_EXPR,
+			      truth_type_for (TREE_TYPE (mask)),
+			      mask, build_zero_cst (TREE_TYPE (mask)));
+
+  tree ret = fold_build3_loc (EXPR_LOCATION (callexp), VEC_COND_EXPR,
+			      TREE_TYPE (callexp), cmp, vec0, vec1);
+
+  if (!CONSTANT_CLASS_P (vec0) || !CONSTANT_CLASS_P (vec1))
+    ret = force_target_expr (ret);
+
+  return ret;
+}
+
+/* Expand a front-end instrinsic call to shuffle().  This expects to take three
+   arguments, the signature to which is:
+
+	vector(T) shuffle (vector(T) vec0, vector(T) vec1, vector(M) mask);
+
+   This builds a VEC_PERM_EXPR if VEC0, VEC1, and MASK are vector types, VEC0
+   has the same type as VEC1, and the number of elements of VEC0, VEC1, and MASK
+   are the same.  The original call expression is held in CALLEXP.  */
+
+static tree
+expand_intrinsic_vec_shuffle (tree callexp)
+{
+  tree vec0 = CALL_EXPR_ARG (callexp, 0);
+  tree vec1 = CALL_EXPR_ARG (callexp, 1);
+  tree mask = CALL_EXPR_ARG (callexp, 2);
+
+  return build3_loc (EXPR_LOCATION (callexp), VEC_PERM_EXPR,
+		     TREE_TYPE (callexp), vec0, vec1, mask);
+}
+
+/* Expand a front-end instrinsic call to shufflevector().  This takes two
+   positional arguments and a variadic list, the signature to which is:
+
+	vector(TM) shuffle (vector(T) vec1, vector(T) vec2, index...);
+
+   This builds a VEC_PERM_EXPR if VEC0 and VEC1 are vector types, VEC0 has the
+   same element type as VEC1, and the number of elements in INDEX is a valid
+   power of two.  The original call expression is held in CALLEXP.  */
+
+static tree
+expand_intrinsic_vec_shufflevector (tree callexp)
+{
+  tree vec0 = CALL_EXPR_ARG (callexp, 0);
+  tree vec1 = CALL_EXPR_ARG (callexp, 1);
+
+  unsigned HOST_WIDE_INT v0elems, v1elems;
+  TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec0)).is_constant (&v0elems);
+  TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec1)).is_constant (&v1elems);
+
+  unsigned HOST_WIDE_INT num_indices = call_expr_nargs (callexp) - 2;
+  unsigned HOST_WIDE_INT masklen = MAX (num_indices, MAX (v0elems, v1elems));
+  unsigned HOST_WIDE_INT pad_size = (v0elems < masklen ? masklen - v0elems : 0);
+  vec_perm_builder sel (masklen, masklen, 1);
+
+  unsigned n = 0;
+  for (; n < num_indices; ++n)
+    {
+      tree idx = CALL_EXPR_ARG (callexp, n + 2);
+      HOST_WIDE_INT iidx = tree_to_shwi (idx);
+      /* VEC_PERM_EXPR does not allow different sized inputs.  */
+      if ((unsigned HOST_WIDE_INT) iidx >= v0elems)
+	iidx += pad_size;
+
+      sel.quick_push (iidx);
+    }
+
+  /* VEC_PERM_EXPR does not support a result that is smaller than the inputs.  */
+  for (; n < masklen; ++n)
+    sel.quick_push (n);
+
+  vec_perm_indices indices (sel, 2, masklen);
+
+  /* Pad out arguments to the common vector size.  */
+  tree ret_type = build_vector_type (TREE_TYPE (TREE_TYPE (vec0)), masklen);
+  if (v0elems < masklen)
+    {
+      constructor_elt elt = { NULL_TREE, build_zero_cst (TREE_TYPE (vec0)) };
+      vec0 = build_constructor_single (ret_type, NULL_TREE, vec0);
+      for (unsigned i = 1; i < masklen / v0elems; ++i)
+        vec_safe_push (CONSTRUCTOR_ELTS (vec0), elt);
+    }
+
+  if (v1elems < masklen)
+    {
+      constructor_elt elt = { NULL_TREE, build_zero_cst (TREE_TYPE (vec1)) };
+      vec1 = build_constructor_single (ret_type, NULL_TREE, vec1);
+      for (unsigned i = 1; i < masklen / v1elems; ++i)
+        vec_safe_push (CONSTRUCTOR_ELTS (vec1), elt);
+    }
+
+  tree mask_type = build_vector_type (build_nonstandard_integer_type
+                (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (ret_type))), 1),
+                masklen);
+  tree ret = build3_loc (EXPR_LOCATION (callexp), VEC_PERM_EXPR, ret_type, vec0,
+			 vec1, vec_perm_indices_to_tree (mask_type, indices));
+
+  /* Get the low part we are interested in.  */
+  if (num_indices < masklen)
+    {
+      ret = build3_loc (EXPR_LOCATION (callexp), BIT_FIELD_REF,
+			TREE_TYPE (callexp), ret,
+			TYPE_SIZE (TREE_TYPE (callexp)), bitsize_zero_node);
+      /* Wrap the low part operation in a TARGET_EXPR so it gets a separate
+         temporary during gimplification.  */
+      ret = force_target_expr (ret);
+    }
+
+  return ret;
+}
+
+/* Expand a front-end instrinsic call to loadUnaligned().  This takes one
+   argument, the signature to which is:
+
+	vector(T) loadUnaligned (vector(T)* ptr)
+
+   This generates a load of a vector from an unaligned address PTR.
+   The original call expression is held in CALLEXP.  */
+
+static tree
+expand_intrinsic_vec_load_unaligned (tree callexp)
+{
+  tree ptr = CALL_EXPR_ARG (callexp, 0);
+
+  tree unaligned_type = build_variant_type_copy (TREE_TYPE (TREE_TYPE (ptr)));
+  SET_TYPE_ALIGN (unaligned_type, 1 * BITS_PER_UNIT);
+  TYPE_USER_ALIGN (unaligned_type) = 1;
+
+  tree load = indirect_ref (unaligned_type, ptr);
+  return convert (TREE_TYPE (callexp), load);
+}
+
+/* Expand a front-end instrinsic call to storeUnaligned().  This takes two
+   arguments, the signature to which is:
+
+	vector(T) storeUnaligned (vector(T)* ptr, vector(T) value)
+
+   This generates an assignment of a vector VALUE to an unaligned address PTR.
+   The original call expression is held in CALLEXP.  */
+
+static tree
+expand_intrinsic_vec_store_unaligned (tree callexp)
+{
+  tree ptr = CALL_EXPR_ARG (callexp, 0);
+  tree vec = CALL_EXPR_ARG (callexp, 1);
+
+  tree unaligned_type = build_variant_type_copy (TREE_TYPE (TREE_TYPE (ptr)));
+  SET_TYPE_ALIGN (unaligned_type, 1 * BITS_PER_UNIT);
+  TYPE_USER_ALIGN (unaligned_type) = 1;
+
+  tree load = indirect_ref (unaligned_type, ptr);
+  return build_assign (MODIFY_EXPR, load, vec);
+}
+
 /* If CALLEXP is for an intrinsic , expand and return inlined compiler
    generated instructions.  Most map directly to GCC builtins, others
    require a little extra work around them.  */
@@ -766,6 +1314,15 @@ maybe_expand_intrinsic (tree callexp)
   if (DECL_BUILT_IN_CTFE (callee) && !doing_semantic_analysis_p)
     return callexp;
 
+  /* Gate the expansion of the intrinsic with constraint checks, if any fail
+     then bail out without any lowering.  */
+  if (maybe_warn_intrinsic_mismatch (callee, callexp))
+    {
+      /* Reset the built-in flag so that we don't trip fold_builtin.  */
+      set_decl_built_in_function (callee, NOT_BUILT_IN, 0);
+      return callexp;
+    }
+
   intrinsic_code intrinsic = DECL_INTRINSIC_CODE (callee);
   built_in_function code;
 
@@ -913,6 +1470,36 @@ maybe_expand_intrinsic (tree callexp)
     case INTRINSIC_VSTORE64:
       return expand_volatile_store (callexp);
 
+    case INTRINSIC_LOADUNALIGNED:
+      return expand_intrinsic_vec_load_unaligned (callexp);
+
+    case INTRINSIC_STOREUNALIGNED:
+      return expand_intrinsic_vec_store_unaligned (callexp);
+
+    case INTRINSIC_SHUFFLE:
+      return expand_intrinsic_vec_shuffle (callexp);
+
+    case INTRINSIC_SHUFFLEVECTOR:
+      return expand_intrinsic_vec_shufflevector (callexp);
+
+    case INTRINSIC_CONVERTVECTOR:
+      return expand_intrinsic_vec_convert (callexp);
+
+    case INTRINSIC_BLENDVECTOR:
+      return expand_intrinsic_vec_blend (callexp);
+
+    case INTRINSIC_EQUALMASK:
+      return expand_intrinsic_vec_cond (EQ_EXPR, callexp);
+
+    case INTRINSIC_NOTEQUALMASK:
+      return expand_intrinsic_vec_cond (NE_EXPR, callexp);
+
+    case INTRINSIC_GREATERMASK:
+      return expand_intrinsic_vec_cond (GT_EXPR, callexp);
+
+    case INTRINSIC_GREATEREQUALMASK:
+      return expand_intrinsic_vec_cond (GE_EXPR, callexp);
+
     default:
       gcc_unreachable ();
     }
diff --git a/gcc/d/intrinsics.def b/gcc/d/intrinsics.def
index 61c1737..b8d1ec5 100644
--- a/gcc/d/intrinsics.def
+++ b/gcc/d/intrinsics.def
@@ -252,5 +252,28 @@ DEF_D_BUILTIN (INTRINSIC_C_VA_ARG, BUILT_IN_NONE, "va_arg", "core.stdc.stdarg",
 DEF_D_BUILTIN (INTRINSIC_VASTART, BUILT_IN_NONE, "va_start", "core.stdc.stdarg",
 	       "FJ@7va_listK@1TZv")
 
+/* gcc.simd intrinsics.  */
+
+DEF_D_BUILTIN (INTRINSIC_LOADUNALIGNED, BUILT_IN_NONE, "loadUnaligned",
+	       "gcc.simd", "FP@1VZ@1V")
+DEF_D_BUILTIN (INTRINSIC_STOREUNALIGNED, BUILT_IN_NONE, "storeUnaligned",
+	       "gcc.simd", "FP@1V@1VZ@1V")
+DEF_D_BUILTIN (INTRINSIC_SHUFFLE, BUILT_IN_NONE, "shuffle", "gcc.simd",
+	       "F@2V0@2V1@1MZ@2V0")
+DEF_D_BUILTIN (INTRINSIC_SHUFFLEVECTOR, BUILT_IN_NONE, "shufflevector",
+	       "gcc.simd", "F@2V1@2V2@1MZNhH@1M@")
+DEF_D_BUILTIN (INTRINSIC_CONVERTVECTOR, BUILT_IN_NONE, "convertvector",
+	       "gcc.simd", "F@1TZ@1V")
+DEF_D_BUILTIN (INTRINSIC_BLENDVECTOR, BUILT_IN_NONE, "blendvector", "gcc.simd",
+	       "F@2V0@2V1@1MZ@2V0")
+DEF_D_BUILTIN (INTRINSIC_EQUALMASK, BUILT_IN_NONE, "equalMask", "gcc.simd",
+	       "F@1V@1VZ@1V")
+DEF_D_BUILTIN (INTRINSIC_NOTEQUALMASK, BUILT_IN_NONE, "notEqualMask",
+	       "gcc.simd", "F@1V@1VZ@1V")
+DEF_D_BUILTIN (INTRINSIC_GREATERMASK, BUILT_IN_NONE, "greaterMask", "gcc.simd",
+	       "F@1V@1VZ@1V")
+DEF_D_BUILTIN (INTRINSIC_GREATEREQUALMASK, BUILT_IN_NONE,
+	       "greaterOrEqualMask", "gcc.simd", "F@1V@1VZ@1V")
+
 #undef DEF_D_BUILTIN
 #undef DEF_CTFE_BUILTIN
diff --git a/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch.d b/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch1.d
index 5340647..5340647 100644
--- a/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch.d
+++ b/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch1.d
diff --git a/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch2.d b/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch2.d
new file mode 100644
index 0000000..9e90c15
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch2.d
@@ -0,0 +1,250 @@
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-do compile { target { avx_runtime || vect_sizes_16B_8B } } }
+module gcc.simd;
+
+alias int4 = __vector(int[4]);
+alias short8 = __vector(short[8]);
+alias float4 = __vector(float[4]);
+alias byte16 = __vector(byte[16]);
+struct fake4 { int[4] v; }
+enum f = fake4();
+
+void test_load_store()
+{
+    loadUnaligned!int(null); // { dg-warning "mismatch in return type" }
+    loadUnaligned!double(null); // { dg-warning "mismatch in return type" }
+    loadUnaligned!int4(null);
+    loadUnaligned!short8(null);
+    loadUnaligned!float4(null);
+    loadUnaligned!byte16(null);
+    loadUnaligned!fake4(null); // { dg-warning "mismatch in return type" }
+
+    storeUnaligned!int(null, 1); // { dg-warning "mismatch in return type" }
+    storeUnaligned!double(null, 1); // { dg-warning "mismatch in return type" }
+    storeUnaligned!int4(null, 1);
+    storeUnaligned!short8(null, 1);
+    storeUnaligned!float4(null, 1);
+    storeUnaligned!byte16(null, 1);
+    storeUnaligned!fake4(null, f); // { dg-warning "mismatch in return type" }
+}
+
+void test_shuffle()
+{
+    shuffle!(int, int, int)(0, 0, 0); // { dg-warning "mismatch in return type" }
+    shuffle!(double, int, int)(0, 0, 0); // { dg-warning "mismatch in return type" }
+    shuffle!(fake4, int, int)(f, 0, 0); // { dg-warning "mismatch in return type" }
+
+    shuffle!(int4, int, int)(0, 0, 0); // { dg-warning "mismatch in argument 2" }
+    shuffle!(int4, double, int)(0, 0, 0); // { dg-warning "mismatch in argument 2" }
+    shuffle!(int4, fake4, int)(0, f, 0); // { dg-warning "mismatch in argument 2" }
+
+    shuffle!(int4, int4, int)(0, 0, 0); // { dg-warning "mismatch in argument 3" }
+    shuffle!(int4, int4, double)(0, 0, 0); // { dg-warning "mismatch in argument 3" }
+    shuffle!(int4, int4, fake4)(0, 0, f); // { dg-warning "mismatch in argument 3" }
+
+    shuffle!(int4, int4, int4)(0, 0, 0);
+    shuffle!(int4, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    shuffle!(int4, float4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    shuffle!(int4, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    shuffle!(int4, int4, short8)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+    shuffle!(int4, int4, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+    shuffle!(int4, int4, byte16)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+
+    shuffle!(float4, int4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    shuffle!(float4, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    shuffle!(float4, float4, int4)(0, 0, 0);
+    shuffle!(float4, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    shuffle!(float4, float4, short8)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+    shuffle!(float4, float4, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+    shuffle!(float4, float4, byte16)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+
+    shuffle!(short8, int4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    shuffle!(short8, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+    shuffle!(short8, float4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    shuffle!(short8, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    shuffle!(short8, short8, short8)(0, 0, 0);
+    shuffle!(short8, short8, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+    shuffle!(short8, short8, byte16)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+
+    shuffle!(byte16, int4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    shuffle!(byte16, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    shuffle!(byte16, float4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    shuffle!(byte16, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+    shuffle!(byte16, byte16, short8)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+    shuffle!(byte16, byte16, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+    shuffle!(byte16, byte16, byte16)(0, 0, 0);
+}
+
+void test_shufflevector()
+{
+    shufflevector!(int, int4, int)(0, 0, 0); // { dg-warning "mismatch in argument 1" }
+    shufflevector!(double, int4, int)(0, 0, 0); // { dg-warning "mismatch in argument 1" }
+    shufflevector!(fake4, int4, int)(f, 0, 0); // { dg-warning "mismatch in argument 1" }
+
+    shufflevector!(int4, int, int)(0, 0, 0); // { dg-warning "mismatch in argument 2" }
+    shufflevector!(int4, double, int)(0, 0, 0); // { dg-warning "mismatch in argument 2" }
+    shufflevector!(int4, int4, int)(0, 0, 0);
+    shufflevector!(int4, short8, int)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    shufflevector!(int4, float4, int)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    shufflevector!(int4, byte16, int)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    shufflevector!(int4, fake4, int)(0, f, 0); // { dg-warning "mismatch in argument 2" }
+
+    shufflevector!(int4, int4, double)(0, 0, 0); // { dg-warning "mismatch in argument 3" }
+    shufflevector!(int4, int4, int4)(0, 0, 0); // { dg-warning "mismatch in argument 3" }
+    shufflevector!(int4, int4, short8)(0, 0, 0); // { dg-warning "mismatch in argument 3" }
+    shufflevector!(int4, int4, float4)(0, 0, 0); // { dg-warning "mismatch in argument 3" }
+    shufflevector!(int4, int4, byte16)(0, 0, 0); // { dg-warning "mismatch in argument 3" }
+
+    shufflevector!(int4, int4, int, double)(0, 0, 0, 0); // { dg-warning "mismatch in argument 4" }
+    shufflevector!(int4, int4, int, int, double, int)(0, 0, 0, 0, 0, 0); // { dg-warning "mismatch in argument 5" }
+    shufflevector!(int4, int4, int, int, int, double)(0, 0, 0, 0, 0, 0); // { dg-warning "mismatch in argument 6" }
+
+    int i;
+    shufflevector!(int4, int4, int)(0, 0, i); // { dg-error "argument .i. cannot be read at compile time" }
+    shufflevector!(int4, int4, int)(0, 0, -1u); // { dg-error "element index .-1. is out of bounds" }
+    shufflevector!(int4, int4, int)(0, 0, 8); // { dg-error "element index .8. is out of bounds" }
+}
+
+void test_convertvector()
+{
+    convertvector!(int, int)(0); // { dg-warning "mismatch in return type" }
+    convertvector!(double, int)(0); // { dg-warning "mismatch in return type" }
+    convertvector!(fake4, int)(0); // { dg-warning "mismatch in return type" }
+
+    convertvector!(int4, int)(0); // { dg-warning "mismatch in argument 1" }
+    convertvector!(int4, double)(0); // { dg-warning "mismatch in argument 1" }
+    convertvector!(int4, int4)(0);
+    convertvector!(int4, short8)(0); // { dg-error "mismatch in argument 1" }
+    convertvector!(int4, float4)(0);
+    convertvector!(int4, byte16)(0); // { dg-error "mismatch in argument 1" }
+    convertvector!(int4, fake4)(f); // { dg-warning "mismatch in argument 1" }
+
+    convertvector!(short8, int)(0); // { dg-warning "mismatch in argument 1" }
+    convertvector!(short8, double)(0); // { dg-warning "mismatch in argument 1" }
+    convertvector!(short8, int4)(0); // { dg-error "mismatch in argument 1" }
+    convertvector!(short8, short8)(0);
+    convertvector!(short8, float4)(0); // { dg-error "mismatch in argument 1" }
+    convertvector!(short8, byte16)(0); // { dg-error "mismatch in argument 1" }
+    convertvector!(short8, fake4)(f); // { dg-warning "mismatch in argument 1" }
+
+    convertvector!(float4, int)(0); // { dg-warning "mismatch in argument 1" }
+    convertvector!(float4, double)(0); // { dg-warning "mismatch in argument 1" }
+    convertvector!(float4, int4)(0);
+    convertvector!(float4, short8)(0); // { dg-error "mismatch in argument 1" }
+    convertvector!(float4, float4)(0);
+    convertvector!(float4, byte16)(0); // { dg-error "mismatch in argument 1" }
+    convertvector!(float4, fake4)(f); // { dg-warning "mismatch in argument 1" }
+
+    convertvector!(byte16, int)(0); // { dg-warning "mismatch in argument 1" }
+    convertvector!(byte16, double)(0); // { dg-warning "mismatch in argument 1" }
+    convertvector!(byte16, int4)(0); // { dg-error "mismatch in argument 1" }
+    convertvector!(byte16, short8)(0); // { dg-error "mismatch in argument 1" }
+    convertvector!(byte16, float4)(0); // { dg-error "mismatch in argument 1" }
+    convertvector!(byte16, byte16)(0);
+    convertvector!(byte16, fake4)(f); // { dg-warning "mismatch in argument 1" }
+}
+
+void test_blendvector()
+{
+    blendvector!(int, int, int)(0, 0, 0); // { dg-warning "mismatch in return type" }
+    blendvector!(double, int, int)(0, 0, 0); // { dg-warning "mismatch in return type" }
+    blendvector!(fake4, int, int)(f, 0, 0); // { dg-warning "mismatch in return type" }
+
+    blendvector!(int4, int, int)(0, 0, 0); // { dg-warning "mismatch in argument 2" }
+    blendvector!(int4, double, int)(0, 0, 0); // { dg-warning "mismatch in argument 2" }
+    blendvector!(int4, fake4, int)(0, f, 0); // { dg-warning "mismatch in argument 2" }
+
+    blendvector!(int4, int4, int)(0, 0, 0); // { dg-warning "mismatch in argument 3" }
+    blendvector!(int4, int4, double)(0, 0, 0); // { dg-warning "mismatch in argument 3" }
+    blendvector!(int4, int4, fake4)(0, 0, f); // { dg-warning "mismatch in argument 3" }
+
+    blendvector!(int4, int4, int4)(0, 0, 0);
+    blendvector!(int4, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    blendvector!(int4, float4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    blendvector!(int4, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    blendvector!(int4, int4, short8)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+    blendvector!(int4, int4, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+    blendvector!(int4, int4, byte16)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+
+    blendvector!(float4, int4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    blendvector!(float4, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    blendvector!(float4, float4, int4)(0, 0, 0);
+    blendvector!(float4, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    blendvector!(float4, float4, short8)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+    blendvector!(float4, float4, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+    blendvector!(float4, float4, byte16)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+
+    blendvector!(short8, int4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    blendvector!(short8, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+    blendvector!(short8, float4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    blendvector!(short8, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    blendvector!(short8, short8, short8)(0, 0, 0);
+    blendvector!(short8, short8, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+    blendvector!(short8, short8, byte16)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+
+    blendvector!(byte16, int4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    blendvector!(byte16, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    blendvector!(byte16, float4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+    blendvector!(byte16, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+    blendvector!(byte16, byte16, short8)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+    blendvector!(byte16, byte16, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+    blendvector!(byte16, byte16, byte16)(0, 0, 0);
+}
+
+void test_comparison()
+{
+    equalMask!int(0, 0); // { dg-warning "mismatch in return type" }
+    equalMask!double(0, 0); // { dg-warning "mismatch in return type" }
+    equalMask!int4(0, 0);
+    equalMask!short8(0, 0);
+    equalMask!float4(0, 0);
+    equalMask!byte16(0, 0);
+    equalMask!fake4(f, f); // { dg-warning "mismatch in return type" }
+
+    notEqualMask!int(0, 0); // { dg-warning "mismatch in return type" }
+    notEqualMask!double(0, 0); // { dg-warning "mismatch in return type" }
+    notEqualMask!int4(0, 0);
+    notEqualMask!short8(0, 0);
+    notEqualMask!float4(0, 0);
+    notEqualMask!byte16(0, 0);
+    notEqualMask!fake4(f, f); // { dg-warning "mismatch in return type" }
+
+    greaterMask!int(0, 0); // { dg-warning "mismatch in return type" }
+    greaterMask!double(0, 0); // { dg-warning "mismatch in return type" }
+    greaterMask!int4(0, 0);
+    greaterMask!short8(0, 0);
+    greaterMask!float4(0, 0);
+    greaterMask!byte16(0, 0);
+    greaterMask!fake4(f, f); // { dg-warning "mismatch in return type" }
+
+    greaterOrEqualMask!int(0, 0); // { dg-warning "mismatch in return type" }
+    greaterOrEqualMask!double(0, 0); // { dg-warning "mismatch in return type" }
+    greaterOrEqualMask!int4(0, 0);
+    greaterOrEqualMask!short8(0, 0);
+    greaterOrEqualMask!float4(0, 0);
+    greaterOrEqualMask!byte16(0, 0);
+    greaterOrEqualMask!fake4(f, f); // { dg-warning "mismatch in return type" }
+}
+
+// The following declarations of the simd intrinsics are without any guards
+// to verify `d/intrinsics.cc` is doing checks to prevent invalid lowerings.
+V loadUnaligned(V)(const V*);
+V storeUnaligned(V)(V*, V);
+
+V0 shuffle(V0, V1, M)(V0, V1, M);
+
+// Use overloads to test different argument positions.
+template E(V) { alias typeof(V.array[0]) E; }
+enum isV(T) = is(T : __vector(V[N]), V, size_t N);
+
+__vector(E!V1[M.length]) shufflevector(V1, V2, M...)(V1, V2, M) if (isV!V1 && !isV!V2);
+__vector(E!V2[M.length]) shufflevector(V1, V2, M...)(V1, V2, M) if (isV!V2 && !isV!V1);
+__vector(E!V1[M.length]) shufflevector(V1, V2, M...)(V1, V2, M) if (isV!V1 && isV!V2);
+
+V convertvector(V, T)(T);
+V0 blendvector(V0, V1, M)(V0, V1, M);
+
+V equalMask(V)(V, V);
+V notEqualMask(V)(V, V);
+V greaterMask(V)(V, V);
+V greaterOrEqualMask(V)(V, V);
diff --git a/gcc/testsuite/gdc.dg/torture/simd_blendvector.d b/gcc/testsuite/gdc.dg/torture/simd_blendvector.d
new file mode 100644
index 0000000..42459bd
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/simd_blendvector.d
@@ -0,0 +1,345 @@
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
+import gcc.simd;
+
+void testblendvector(V, VI = V)()
+{
+    alias E = typeof(V.array[0]);
+    enum numElements = V.sizeof / E.sizeof;
+
+    static if (numElements == 16)
+    {
+        // Test fragment for vectors with 16 elements
+        immutable V[5] in1 =
+            [[ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ],
+             [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ],
+             [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ],
+             [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ],
+             [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ]];
+
+        immutable V in2 =
+            [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45 ];
+
+        immutable VI[5] mask1 =
+            [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ],
+             [ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ],
+             [ 7, 6, 5, 4, 16, 17, 18, 19, 31, 30, 29, 28, 3, 2, 1, 0 ],
+             [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+             [ 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 ]];
+
+        immutable V[5] out1 =
+            [[30, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25],
+             [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25],
+             [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 45],
+             [30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45],
+             [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]];
+    }
+    else static if (numElements == 8)
+    {
+        // Test fragment for vectors with 8 elements
+        static if (is(E == uint))
+        {
+            enum E A1 = 0x11121314;
+            enum E B1 = 0x21222324;
+            enum E C1 = 0x31323334;
+            enum E D1 = 0x41424344;
+            enum E E1 = 0x51525354;
+            enum E F1 = 0x61626364;
+            enum E G1 = 0x71727374;
+            enum E H1 = 0x81828384;
+
+            enum E A2 = 0x91929394;
+            enum E B2 = 0xa1a2a3a4;
+            enum E C2 = 0xb1b2b3b4;
+            enum E D2 = 0xc1c2c3c4;
+            enum E E2 = 0xd1d2d3d4;
+            enum E F2 = 0xe1e2e3e4;
+            enum E G2 = 0xf1f2f3f4;
+            enum E H2 = 0x01020304;
+        }
+        else static if (is(E == ushort))
+        {
+            enum E A1 = 0x1112;
+            enum E B1 = 0x2122;
+            enum E C1 = 0x3132;
+            enum E D1 = 0x4142;
+            enum E E1 = 0x5152;
+            enum E F1 = 0x6162;
+            enum E G1 = 0x7172;
+            enum E H1 = 0x8182;
+
+            enum E A2 = 0x9192;
+            enum E B2 = 0xa1a2;
+            enum E C2 = 0xb1b2;
+            enum E D2 = 0xc1c2;
+            enum E E2 = 0xd1d2;
+            enum E F2 = 0xe1e2;
+            enum E G2 = 0xf1f2;
+            enum E H2 = 0x0102;
+        }
+        else static if (is(E == ubyte))
+        {
+            enum E A1 = 0x11;
+            enum E B1 = 0x12;
+            enum E C1 = 0x13;
+            enum E D1 = 0x14;
+            enum E E1 = 0x15;
+            enum E F1 = 0x16;
+            enum E G1 = 0x17;
+            enum E H1 = 0x18;
+
+            enum E A2 = 0xf1;
+            enum E B2 = 0xf2;
+            enum E C2 = 0xf3;
+            enum E D2 = 0xf4;
+            enum E E2 = 0xf5;
+            enum E F2 = 0xf6;
+            enum E G2 = 0xf7;
+            enum E H2 = 0xf8;
+        }
+        else
+            enum unsupported = true;
+
+        static if (!__traits(compiles, unsupported))
+        {
+            immutable V[6] in1 =
+                [[ A1, B1, C1, D1, E1, F1, G1, H1 ],
+                 [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+                 [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+                 [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+                 [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+                 [ A1, B1, C1, D1, E1, F1, G1, H1 ]];
+
+
+            immutable V in2 =
+                [ A2, B2, C2, D2, E2, F2, G2, H2 ];
+
+            immutable VI[6] mask1 =
+                [[ 0,  1,  2 , 3 , 4 , 5 , 6 , 0 ],
+                 [ 8,  9,  0, 11, 12, 13,  0, 15 ],
+                 [ 0,  8,  1,  0,  2,  0,  3, 11 ],
+                 [ 0, 15,  4, 11,  0,  3,  7,  8 ],
+                 [ 0,  0,  0,  0,  0,  0,  0,  0 ],
+                 [ 0x1e, 0x2e, 0x3e, 0x4e, 0x5e, 0x6e, 0x7e, 0x8e ]];
+
+            immutable V[6] out1 =
+                [[ A2, B1, C1, D1, E1, F1, G1, H2 ],
+                 [ A1, B1, C2, D1, E1, F1, G2, H1 ],
+                 [ A2, B1, C1, D2, E1, F2, G1, H1 ],
+                 [ A2, B1, C1, D1, E2, F1, G1, H1 ],
+                 [ A2, B2, C2, D2, E2, F2, G2, H2 ],
+                 [ A1, B1, C1, D1, E1, F1, G1, H1 ]];
+        }
+    }
+    else static if (numElements == 4)
+    {
+        // Test fragment for vectors with 4 elements
+        static if (is(E == double))
+        {
+            enum E A = 0.69314718055994530942;
+            enum E B = 2.7182818284590452354;
+            enum E C = 2.30258509299404568402;
+            enum E D = 1.4426950408889634074;
+
+            enum E W = 0.31830988618379067154;
+            enum E X = 3.14159265358979323846;
+            enum E Y = 1.41421356237309504880;
+            enum E Z = 0.70710678118654752440;
+        }
+        else static if (is(E == float))
+        {
+            enum E A = 0.69314718055994530942f;
+            enum E B = 2.7182818284590452354f;
+            enum E C = 2.30258509299404568402f;
+            enum E D = 1.4426950408889634074f;
+
+            enum E W = 0.31830988618379067154f;
+            enum E X = 3.14159265358979323846f;
+            enum E Y = 1.41421356237309504880f;
+            enum E Z = 0.70710678118654752440f;
+        }
+        else static if (is(E == ulong))
+        {
+            enum E A = 0x1112131415161718;
+            enum E B = 0x2122232425262728;
+            enum E C = 0x3132333435363738;
+            enum E D = 0x4142434445464748;
+
+            enum E W = 0xc1c2c3c4c5c6c7c8;
+            enum E X = 0xd1d2d3d4d5d6d7d8;
+            enum E Y = 0xe1e2e3e4e5e6e7e8;
+            enum E Z = 0xf1f2f3f4f5f6f7f8;
+        }
+        else static if (is(E == uint))
+        {
+            enum E A = 0x11121314;
+            enum E B = 0x21222324;
+            enum E C = 0x31323334;
+            enum E D = 0x41424344;
+
+            enum E W = 0xc1c2c3c4;
+            enum E X = 0xd1d2d3d4;
+            enum E Y = 0xe1e2e3e4;
+            enum E Z = 0xf1f2f3f4;
+        }
+        else
+            enum unsupported = true;
+
+        static if (!__traits(compiles, unsupported))
+        {
+            immutable V[6] in1 =
+                [[ A, B, C, D ],
+                 [ A, B, C, D ],
+                 [ A, B, C, D ],
+                 [ A, B, C, D ],
+                 [ A, B, C, D ],
+                 [ A, B, C, D ]];
+
+            immutable V in2 = [ W, X, Y, Z ];
+
+            immutable VI[6] mask1 =
+                [[ 0, 1, 2, 3 ],
+                 [ 4, 0, 6, 7 ],
+                 [ 0, 4, 0, 5 ],
+                 [ 0, 7, 4, 0 ],
+                 [ 0, 0, 0, 0 ],
+                 [ 7, 7, 7, 7 ]];
+
+            immutable V[6] out1 =
+                [[ W, B, C, D ],
+                 [ A, X, C, D ],
+                 [ W, B, Y, D ],
+                 [ W, B, C, Z ],
+                 [ W, X, Y, Z ],
+                 [ A, B, C, D ]];
+        }
+    }
+    else static if (numElements == 2)
+    {
+        // Test fragment for vectors with 2 elements
+        static if (is(E == double))
+        {
+            enum E A = 0.69314718055994530942;
+            enum E B = 2.7182818284590452354;
+
+            enum E X = 3.14159265358979323846;
+            enum E Y = 1.41421356237309504880;
+        }
+        else static if (is(E == float))
+        {
+            enum E A = 0.69314718055994530942f;
+            enum E B = 2.7182818284590452354f;
+
+            enum E X = 3.14159265358979323846f;
+            enum E Y = 1.41421356237309504880f;
+        }
+        else static if (is(E == ulong))
+        {
+            enum E A = 0x1112131415161718;
+            enum E B = 0x2122232425262728;
+
+            enum E X = 0xc1c2c3c4c5c6c7c8;
+            enum E Y = 0xd1d2d3d4d5d6d7d8;
+        }
+        else static if (is(E == uint))
+        {
+            enum E A = 0x11121314;
+            enum E B = 0x21222324;
+
+            enum E X = 0xd1d2d3d4;
+            enum E Y = 0xe1e2e3e4;
+        }
+        else
+            enum unsupported = true;
+
+        static if (!__traits(compiles, unsupported))
+        {
+            immutable V[7] in1 =
+                [[ A, B ],
+                 [ A, B ],
+                 [ A, B ],
+                 [ A, B ],
+                 [ A, B ],
+                 [ A, B ],
+                 [ A, B ]];
+
+            immutable V in2 = [ X, Y ];
+
+            immutable VI[7] mask1 =
+                [[ 0, 1 ],
+                 [ 2, 3 ],
+                 [ 0, 2 ],
+                 [ 2, 1 ],
+                 [ 3, 0 ],
+                 [ 0, 0 ],
+                 [ 3, 3 ]];
+
+            immutable V[7] out1 =
+                [[ X, B ],
+                 [ A, B ],
+                 [ X, B ],
+                 [ A, B ],
+                 [ A, Y ],
+                 [ X, Y ],
+                 [ A, B ]];
+        }
+    }
+    else
+        enum unsupported = true;
+
+    static if (!__traits(compiles, unsupported))
+    {
+        static foreach (i; 0 .. in1.length)
+            assert(blendvector(in1[i], in2, mask1[i]).array == out1[i].array);
+    }
+}
+
+void main()
+{
+    static if (__traits(compiles, __vector(ubyte[16])))
+        testblendvector!(__vector(ubyte[16]))();
+
+    static if (__traits(compiles, __vector(ushort[16])))
+        testblendvector!(__vector(ushort[16]))();
+
+    static if (__traits(compiles, __vector(ubyte[8])))
+        testblendvector!(__vector(ubyte[8]))();
+
+    static if (__traits(compiles, __vector(ushort[8])))
+        testblendvector!(__vector(ushort[8]))();
+
+    static if (__traits(compiles, __vector(uint[8])))
+        testblendvector!(__vector(uint[8]))();
+
+    static if (__traits(compiles, __vector(ulong[4])))
+    {
+        testblendvector!(__vector(ulong[4]));
+
+        static if (__traits(compiles, __vector(double[4])))
+            testblendvector!(__vector(double[4]), __vector(ulong[4]));
+    }
+
+    static if (__traits(compiles, __vector(uint[4])))
+    {
+        testblendvector!(__vector(uint[4]));
+
+        static if (__traits(compiles, __vector(float[4])))
+            testblendvector!(__vector(float[4]), __vector(uint[4]));
+    }
+
+    static if (__traits(compiles, __vector(ulong[2])))
+    {
+        testblendvector!(__vector(ulong[2]));
+
+        static if (__traits(compiles, __vector(double[2])))
+            testblendvector!(__vector(double[2]), __vector(ulong[2]));
+    }
+
+    static if (__traits(compiles, __vector(uint[2])))
+    {
+        testblendvector!(__vector(uint[2]));
+
+        static if (__traits(compiles, __vector(float[2])))
+            testblendvector!(__vector(float[2]), __vector(uint[2]));
+    }
+}
diff --git a/gcc/testsuite/gdc.dg/torture/simd_cond.d b/gcc/testsuite/gdc.dg/torture/simd_cond.d
new file mode 100644
index 0000000..1548956
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/simd_cond.d
@@ -0,0 +1,17 @@
+// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
+
+import gcc.simd;
+
+void main()
+{
+    static if (__traits(compiles, __vector(int[4])))
+    {
+        __gshared __vector(int[4]) a = [1,3,5,7];
+        __gshared __vector(int[4]) b = [2,3,4,5];
+
+        assert(equalMask(a, b).array == [0,-1,0,0]);
+        assert(notEqualMask(a, b).array == [-1,0,-1,-1]);
+        assert(greaterMask(a, b).array == [0,0,-1,-1]);
+        assert(greaterOrEqualMask(a, b).array == [0,-1,-1,-1]);
+    }
+}
diff --git a/gcc/testsuite/gdc.dg/torture/simd_convertvector.d b/gcc/testsuite/gdc.dg/torture/simd_convertvector.d
new file mode 100644
index 0000000..0d6b18e
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/simd_convertvector.d
@@ -0,0 +1,122 @@
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
+import gcc.simd;
+
+void main ()
+{
+    static if (__traits(compiles, __vector(int[4])))
+        alias int4 = __vector(int[4]);
+    static if (__traits(compiles, __vector(uint[4])))
+        alias uint4 = __vector(uint[4]);
+    static if (__traits(compiles, __vector(float[4])))
+        alias float4 = __vector(float[4]);
+    static if (__traits(compiles, __vector(double[4])))
+        alias double4 = __vector(double[4]);
+
+    static if (__traits(compiles, int4))
+    {
+        union U1 { int4 v; int[4] a; }
+        U1 u1;
+    }
+    static if (__traits(compiles, uint4))
+    {
+        union U2 { uint4 v; uint[4] a; }
+        U2 u2;
+    }
+    static if (__traits(compiles, float4))
+    {
+        union U3 { float4 v; float[4] a; }
+        U3 u3;
+    }
+    static if (__traits(compiles, double4))
+    {
+        union U4 { double4 v; double[4] a; }
+        U4 u4;
+    }
+
+    static if (__traits(compiles, u1) && __traits(compiles, u2))
+    {
+        static void f1(ref uint4 x, out int4 y)
+        {
+            y = convertvector!int4(x);
+        }
+        static foreach (i; 0 .. 4)
+            u2.a[i] = i * 2;
+        f1(u2.v, u1.v);
+        static foreach (i; 0 .. 4)
+            assert(u1.a[i] == i * 2);
+    }
+
+    static if (__traits(compiles, u1) && __traits(compiles, u3))
+    {
+        static void f2(ref float4 x, out int4 y)
+        {
+            y = convertvector!int4(x);
+        }
+
+        static void f3(ref int4 x, out float4 y)
+        {
+            y = convertvector!float4(x);
+        }
+
+        static foreach (i; 0 .. 4)
+            u3.a[i] = i - 2.25f;
+        f2(u3.v, u1.v);
+        static foreach (i; 0 .. 4)
+            assert(u1.a[i] == (i == 3 ? 0 : i - 2));
+
+        static foreach (i; 0 .. 4)
+            u3.a[i] = i + 0.75f;
+        f2(u3.v, u1.v);
+        static foreach (i; 0 .. 4)
+            assert(u1.a[i] == i);
+
+        static foreach (i; 0 .. 4)
+            u1.a[i] = 7 * i - 5;
+        f3(u1.v, u3.v);
+        static foreach (i; 0 .. 4)
+            assert(u3.a[i] == 7 * i - 5);
+    }
+    static if (__traits(compiles, u1) && __traits(compiles, u4))
+    {
+        static void f4(ref double4 x, out int4 y)
+        {
+            y = convertvector!int4(x);
+        }
+
+        static void f5(ref int4 x, out double4 y)
+        {
+            y = convertvector!double4(x);
+        }
+
+        static foreach (i; 0 .. 4)
+            u4.a[i] = i - 2.25;
+        f4(u4.v, u1.v);
+        static foreach (i; 0 .. 4)
+            assert(u1.a[i] == (i == 3 ? 0 : i - 2));
+
+        static foreach (i; 0 .. 4)
+            u4.a[i] = i + 0.75;
+        f4(u4.v, u1.v);
+        static foreach (i; 0 .. 4)
+            assert(u1.a[i] == i);
+
+        static foreach (i; 0 .. 4)
+            u1.a[i] = 7 * i - 5;
+        f5(u1.v, u4.v);
+        static foreach (i; 0 .. 4)
+            assert(u4.a[i] == 7 * i - 5);
+    }
+    static if (__traits(compiles, u4))
+    {
+        static void f6(out double4 x)
+        {
+            int4 a = [1, 2, -3, -4];
+            x = convertvector!double4(a);
+        }
+
+        f6(u4.v);
+        static foreach (i; 0 .. 4)
+            assert(u4.a[i] == (i >= 2 ? -1 - i : i + 1));
+    }
+}
diff --git a/gcc/testsuite/gdc.dg/torture/simd_load.d b/gcc/testsuite/gdc.dg/torture/simd_load.d
new file mode 100644
index 0000000..188ffda
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/simd_load.d
@@ -0,0 +1,52 @@
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
+import gcc.simd;
+
+void main()
+{
+    ubyte[32] data;
+    foreach (i; 0..data.length)
+    {   
+        data[i] = cast(ubyte)i;
+    }
+
+    // to test all alignments from 1 ~ 16
+    foreach (i; 0..16)
+    {
+        ubyte* d = &data[i];
+
+        void test(T)()
+        {
+            // load the data
+            T v = loadUnaligned(cast(T*)d);
+
+            // check that the data was loaded correctly
+            ubyte* ptrToV = cast(ubyte*)&v;
+            foreach (j; 0..T.sizeof)
+                assert(ptrToV[j] == d[j]);
+        }
+
+        static if (__traits(compiles, __vector(void[16])))
+            test!(__vector(void[16]))();
+        static if (__traits(compiles, __vector(byte[16])))
+            test!(__vector(byte[16]))();
+        static if (__traits(compiles, __vector(ubyte[16])))
+            test!(__vector(ubyte[16]))();
+        static if (__traits(compiles, __vector(short[8])))
+            test!(__vector(short[8]))();
+        static if (__traits(compiles, __vector(ushort[8])))
+            test!(__vector(ushort[8]))();
+        static if (__traits(compiles, __vector(int[4])))
+            test!(__vector(int[4]))();
+        static if (__traits(compiles, __vector(uint[4])))
+            test!(__vector(uint[4]))();
+        static if (__traits(compiles, __vector(long[2])))
+            test!(__vector(long[2]))();
+        static if (__traits(compiles, __vector(ulong[2])))
+            test!(__vector(ulong[2]))();
+        static if (__traits(compiles, __vector(double[2])))
+            test!(__vector(double[2]))();
+        static if (__traits(compiles, __vector(float[4])))
+            test!(__vector(float[4]))();
+    }
+}
diff --git a/gcc/testsuite/gdc.dg/torture/simd_logical.d b/gcc/testsuite/gdc.dg/torture/simd_logical.d
new file mode 100644
index 0000000..e9c23f5
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/simd_logical.d
@@ -0,0 +1,19 @@
+// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
+
+import gcc.simd;
+
+void main()
+{
+    static if (__traits(compiles, __vector(int[4])))
+    {
+        __gshared __vector(int[4]) a = [1,0,-1,2];
+
+        assert(notMask(a).array == [0,-1,0,0]);
+
+        assert(andAndMask(a, 1).array == [-1,0,-1,-1]);
+        assert(andAndMask(a, 0).array == [0,0,0,0]);
+
+        assert(orOrMask(a, 1).array == [-1,-1,-1,-1]);
+        assert(orOrMask(a, 0).array == [-1,0,-1,-1]);
+    }
+}
diff --git a/gcc/testsuite/gdc.dg/torture/simd_shuffle.d b/gcc/testsuite/gdc.dg/torture/simd_shuffle.d
new file mode 100644
index 0000000..3629cee
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/simd_shuffle.d
@@ -0,0 +1,454 @@
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
+import gcc.simd;
+
+void testshuffle(V, VI = V)()
+{
+    alias E = typeof(V.array[0]);
+    enum numElements = V.sizeof / E.sizeof;
+
+    static if (numElements == 16)
+    {
+        // Test fragment for vectors with 16 elements
+        immutable V[5] in1 =
+            [[ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ],
+             [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ],
+             [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ],
+             [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ],
+             [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ]];
+
+        immutable VI[5] mask1 =
+            [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ],
+             [ 0x10, 0x21, 0x32, 0x43, 0x54, 0x65, 0x76, 0x87,
+               0x98, 0xa9, 0xba, 0xcb, 0xdc, 0xed, 0xfe, 0xff ]	,
+             [ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ],
+             [ 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 ],
+             [ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 ]];
+
+        immutable V[5] out1 =
+            [[ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ],
+             [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ],
+             [ 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 ],
+             [ 1, 3, 5, 7, 9, 11, 13, 15, 2, 4, 6, 8, 10, 12, 14, 16 ],
+             [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ]];
+
+        immutable V[5] in2 =
+            [[ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ],
+             [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ],
+             [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ],
+             [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ],
+             [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ]];
+
+        immutable V in3 =
+            [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45 ];
+
+        immutable VI[5] mask2 =
+            [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ],
+             [ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ],
+             [ 7, 6, 5, 4, 16, 17, 18, 19, 31, 30, 29, 28, 3, 2, 1, 0 ],
+             [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+             [ 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 ]];
+
+        immutable V[5] out2 =
+            [[ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ],
+             [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45 ],
+             [ 17, 16, 15, 14, 30, 31, 32, 33, 45, 44, 43, 42, 13, 12, 11, 10 ],
+             [ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 ],
+             [ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45 ]];
+    }
+    else static if (numElements == 8)
+    {
+        // Test fragment for vectors with 8 elements
+        static if (is(E == uint))
+        {
+            enum E A1 = 0x11121314;
+            enum E B1 = 0x21222324;
+            enum E C1 = 0x31323334;
+            enum E D1 = 0x41424344;
+            enum E E1 = 0x51525354;
+            enum E F1 = 0x61626364;
+            enum E G1 = 0x71727374;
+            enum E H1 = 0x81828384;
+
+            enum E A2 = 0x91929394;
+            enum E B2 = 0xa1a2a3a4;
+            enum E C2 = 0xb1b2b3b4;
+            enum E D2 = 0xc1c2c3c4;
+            enum E E2 = 0xd1d2d3d4;
+            enum E F2 = 0xe1e2e3e4;
+            enum E G2 = 0xf1f2f3f4;
+            enum E H2 = 0x01020304;
+        }
+        else static if (is(E == ushort))
+        {
+            enum E A1 = 0x1112;
+            enum E B1 = 0x2122;
+            enum E C1 = 0x3132;
+            enum E D1 = 0x4142;
+            enum E E1 = 0x5152;
+            enum E F1 = 0x6162;
+            enum E G1 = 0x7172;
+            enum E H1 = 0x8182;
+
+            enum E A2 = 0x9192;
+            enum E B2 = 0xa1a2;
+            enum E C2 = 0xb1b2;
+            enum E D2 = 0xc1c2;
+            enum E E2 = 0xd1d2;
+            enum E F2 = 0xe1e2;
+            enum E G2 = 0xf1f2;
+            enum E H2 = 0x0102;
+        }
+        else static if (is(E == ubyte))
+        {
+            enum E A1 = 0x11;
+            enum E B1 = 0x12;
+            enum E C1 = 0x13;
+            enum E D1 = 0x14;
+            enum E E1 = 0x15;
+            enum E F1 = 0x16;
+            enum E G1 = 0x17;
+            enum E H1 = 0x18;
+
+            enum E A2 = 0xf1;
+            enum E B2 = 0xf2;
+            enum E C2 = 0xf3;
+            enum E D2 = 0xf4;
+            enum E E2 = 0xf5;
+            enum E F2 = 0xf6;
+            enum E G2 = 0xf7;
+            enum E H2 = 0xf8;
+        }
+        else
+            enum unsupported = true;
+
+        static if (!__traits(compiles, unsupported))
+        {
+            immutable V[8] in1 =
+                [[ A1, B1, C1, D1, E1, F1, G1, H1 ],
+                 [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+                 [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+                 [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+                 [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+                 [ A2, B2, C2, D2, E2, F2, G2, H2 ],
+                 [ A2, B2, C2, D2, E2, F2, G2, H2 ],
+                 [ A2, B2, C2, D2, E2, F2, G2, H2 ]];
+
+            immutable VI[8] mask1 =
+                [[  0,  1,  2,  3,  4,  5,  6,  7 ],
+                 [ 0x10, 0x21, 0x32, 0x43, 0x54, 0x65, 0x76, 0x87 ],
+                 [  7,  6,  5,  4,  3,  2,  1,  0 ],
+                 [  7,  0,  5,  3,  2,  4,  1,  6 ],
+                 [  0,  2,  1,  3,  4,  6,  5,  7 ],
+                 [  3,  1,  2,  0,  7,  5,  6,  4 ],
+                 [ 0, 0, 0, 0 ],
+                 [  1,  6,  1,  6,  1,  6,  1,  6 ]];
+
+            immutable V[8] out1 =
+                [[ A1, B1, C1, D1, E1, F1, G1, H1 ],
+                 [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+                 [ H1, G1, F1, E1, D1, C1, B1, A1 ],
+                 [ H1, A1, F1, D1, C1, E1, B1, G1 ],
+                 [ A1, C1, B1, D1, E1, G1, F1, H1 ],
+                 [ D2, B2, C2, A2, H2, F2, G2, E2 ],
+                 [ A2, A2, A2, A2, A2, A2, A2, A2 ],
+                 [ B2, G2, B2, G2, B2, G2, B2, G2 ]];
+
+            immutable V[6] in2 =
+                [[ A1, B1, C1, D1, E1, F1, G1, H1 ],
+                 [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+                 [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+                 [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+                 [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+                 [ A1, B1, C1, D1, E1, F1, G1, H1 ]];
+
+
+            immutable V in3 =
+                [ A2, B2, C2, D2, E2, F2, G2, H2 ];
+
+            immutable VI[6] mask2 =
+                [[ 0, 1, 2, 3, 4, 5, 6, 7 ],
+                 [  8,  9, 10, 11, 12, 13, 14, 15 ],
+                 [  0,  8,  1,  9,  2, 10,  3, 11 ],
+                 [  0, 15,  4, 11, 12,  3,  7,  8 ],
+                 [  0,  0,  0,  0,  0,  0,  0,  0 ],
+                 [ 0x1e, 0x2e, 0x3e, 0x4e, 0x5e, 0x6e, 0x7e, 0x8e ]];
+
+            immutable V[6] out2 =
+                [[ A1, B1, C1, D1, E1, F1, G1, H1 ],
+                 [ A2, B2, C2, D2, E2, F2, G2, H2 ],
+                 [ A1, A2, B1, B2, C1, C2, D1, D2 ],
+                 [ A1, H2, E1, D2, E2, D1, H1, A2 ],
+                 [ A1, A1, A1, A1, A1, A1, A1, A1 ],
+                 [ G2, G2, G2, G2, G2, G2, G2, G2 ]];
+        }
+    }
+    else static if (numElements == 4)
+    {
+        // Test fragment for vectors with 4 elements
+        static if (is(E == double))
+        {
+            enum E A = 0.69314718055994530942;
+            enum E B = 2.7182818284590452354;
+            enum E C = 2.30258509299404568402;
+            enum E D = 1.4426950408889634074;
+
+            enum E W = 0.31830988618379067154;
+            enum E X = 3.14159265358979323846;
+            enum E Y = 1.41421356237309504880;
+            enum E Z = 0.70710678118654752440;
+        }
+        else static if (is(E == float))
+        {
+            enum E A = 0.69314718055994530942f;
+            enum E B = 2.7182818284590452354f;
+            enum E C = 2.30258509299404568402f;
+            enum E D = 1.4426950408889634074f;
+
+            enum E W = 0.31830988618379067154f;
+            enum E X = 3.14159265358979323846f;
+            enum E Y = 1.41421356237309504880f;
+            enum E Z = 0.70710678118654752440f;
+        }
+        else static if (is(E == ulong))
+        {
+            enum E A = 0x1112131415161718;
+            enum E B = 0x2122232425262728;
+            enum E C = 0x3132333435363738;
+            enum E D = 0x4142434445464748;
+
+            enum E W = 0xc1c2c3c4c5c6c7c8;
+            enum E X = 0xd1d2d3d4d5d6d7d8;
+            enum E Y = 0xe1e2e3e4e5e6e7e8;
+            enum E Z = 0xf1f2f3f4f5f6f7f8;
+        }
+        else static if (is(E == uint))
+        {
+            enum E A = 0x11121314;
+            enum E B = 0x21222324;
+            enum E C = 0x31323334;
+            enum E D = 0x41424344;
+
+            enum E W = 0xc1c2c3c4;
+            enum E X = 0xd1d2d3d4;
+            enum E Y = 0xe1e2e3e4;
+            enum E Z = 0xf1f2f3f4;
+        }
+        else
+            enum unsupported = true;
+
+        static if (!__traits(compiles, unsupported))
+        {
+            immutable V[8] in1 =
+                [[ A, B, C, D ],
+                 [ A, B, C, D ],
+                 [ A, B, C, D ],
+                 [ A, B, C, D ],
+                 [ A, B, C, D ],
+                 [ W, X, Y, Z ],
+                 [ W, X, Y, Z ],
+                 [ W, X, Y, Z ]];
+
+            immutable VI[8] mask1 =
+                [[ 0, 1, 2, 3 ],
+                 [ 0+1*4, 1+2*4, 2+3*4, 3+4*4 ],
+                 [ 3, 2, 1, 0 ],
+                 [ 0, 3, 2, 1 ],
+                 [ 0, 2, 1, 3 ],
+                 [ 3, 1, 2, 0 ],
+                 [ 0, 0, 0, 0 ],
+                 [ 1, 2, 1, 2 ]];
+
+            immutable V[8] out1 =
+                [[ A, B, C, D ],
+                 [ A, B, C, D ],
+                 [ D, C, B, A ],
+                 [ A, D, C, B ],
+                 [ A, C, B, D ],
+                 [ Z, X, Y, W ],
+                 [ W, W, W, W ],
+                 [ X, Y, X, Y ]];
+
+
+            immutable V[6] in2 =
+                [[ A, B, C, D ],
+                 [ A, B, C, D ],
+                 [ A, B, C, D ],
+                 [ A, B, C, D ],
+                 [ A, B, C, D ],
+                 [ A, B, C, D ]];
+
+            immutable V in3 = [ W, X, Y, Z ];
+
+            immutable VI[6] mask2 =
+                [[ 0, 1, 2, 3 ],
+                 [ 4, 5, 6, 7 ],
+                 [ 0, 4, 1, 5 ],
+                 [ 0, 7, 4, 3 ],
+                 [ 0, 0, 0, 0 ],
+                 [ 7, 7, 7, 7 ]];
+
+            immutable V[6] out2 =
+                [[ A, B, C, D ],
+                 [ W, X, Y, Z ],
+                 [ A, W, B, X ],
+                 [ A, Z, W, D ],
+                 [ A, A, A, A ],
+                 [ Z, Z, Z, Z ]];
+        }
+    }
+    else static if (numElements == 2)
+    {
+        // Test fragment for vectors with 2 elements
+        static if (is(E == double))
+        {
+            enum E A = 0.69314718055994530942;
+            enum E B = 2.7182818284590452354;
+
+            enum E X = 3.14159265358979323846;
+            enum E Y = 1.41421356237309504880;
+        }
+        else static if (is(E == float))
+        {
+            enum E A = 0.69314718055994530942f;
+            enum E B = 2.7182818284590452354f;
+
+            enum E X = 3.14159265358979323846f;
+            enum E Y = 1.41421356237309504880f;
+        }
+        else static if (is(E == ulong))
+        {
+            enum E A = 0x1112131415161718;
+            enum E B = 0x2122232425262728;
+
+            enum E X = 0xc1c2c3c4c5c6c7c8;
+            enum E Y = 0xd1d2d3d4d5d6d7d8;
+        }
+        else static if (is(E == uint))
+        {
+            enum E A = 0x11121314;
+            enum E B = 0x21222324;
+
+            enum E X = 0xd1d2d3d4;
+            enum E Y = 0xe1e2e3e4;
+        }
+        else
+            enum unsupported = true;
+
+        static if (!__traits(compiles, unsupported))
+        {
+            immutable V[6] in1 =
+                [[ A, B ],
+                 [ A, B ],
+                 [ A, B ],
+                 [ A, B ],
+                 [ X, Y ],
+                 [ X, Y ]];
+
+            immutable VI[6] mask1 =
+                [[ 0, 1 ],
+                 [ -16, 1 ],
+                 [ 1, 0 ],
+                 [ 0, 0 ],
+                 [ 1, 1 ],
+                 [ 1, 0 ]];
+
+            immutable V[6] out1 =
+                [[ A, B ],
+                 [ A, B ],
+                 [ B, A ],
+                 [ A, A ],
+                 [ Y, Y ],
+                 [ Y, X ]];
+
+            immutable V[7] in2 =
+                [[ A, B ],
+                 [ A, B ],
+                 [ A, B ],
+                 [ A, B ],
+                 [ A, B ],
+                 [ A, B ],
+                 [ A, B ]];
+
+            immutable V in3 = [ X, Y ];
+
+            immutable VI[7] mask2 =
+                [[ 0, 1 ],
+                 [ 2, 3 ],
+                 [ 0, 2 ],
+                 [ 2, 1 ],
+                 [ 3, 0 ],
+                 [ 0, 0 ],
+                 [ 3, 3 ]];
+
+            immutable V[7] out2 =
+                [[ A, B ],
+                 [ X, Y ],
+                 [ A, X ],
+                 [ X, B ],
+                 [ Y, A ],
+                 [ A, A ],
+                 [ Y, Y ]];
+        }
+    }
+    else
+        enum unsupported = true;
+
+    static if (!__traits(compiles, unsupported))
+    {
+        static foreach (i; 0 .. in1.length)
+            assert(shuffle(in1[i], mask1[i]).array == out1[i].array);
+        static foreach (i; 0 .. in2.length)
+            assert(shuffle(in2[i], in3, mask2[i]).array == out2[i].array);
+    }
+}
+
+void main()
+{
+    static if (__traits(compiles, __vector(ubyte[16])))
+        testshuffle!(__vector(ubyte[16]))();
+
+    static if (__traits(compiles, __vector(ushort[16])))
+        testshuffle!(__vector(ushort[16]))();
+
+    static if (__traits(compiles, __vector(ubyte[8])))
+        testshuffle!(__vector(ubyte[8]))();
+
+    static if (__traits(compiles, __vector(ushort[8])))
+        testshuffle!(__vector(ushort[8]))();
+
+    static if (__traits(compiles, __vector(uint[8])))
+        testshuffle!(__vector(uint[8]))();
+
+    static if (__traits(compiles, __vector(ulong[4])))
+    {
+        testshuffle!(__vector(ulong[4]));
+
+        static if (__traits(compiles, __vector(double[4])))
+            testshuffle!(__vector(double[4]), __vector(ulong[4]));
+    }
+
+    static if (__traits(compiles, __vector(uint[4])))
+    {
+        testshuffle!(__vector(uint[4]));
+
+        static if (__traits(compiles, __vector(float[4])))
+            testshuffle!(__vector(float[4]), __vector(uint[4]));
+    }
+
+    static if (__traits(compiles, __vector(ulong[2])))
+    {
+        testshuffle!(__vector(ulong[2]));
+
+        static if (__traits(compiles, __vector(double[2])))
+            testshuffle!(__vector(double[2]), __vector(ulong[2]));
+    }
+
+    static if (__traits(compiles, __vector(uint[2])))
+    {
+        testshuffle!(__vector(uint[2]));
+
+        static if (__traits(compiles, __vector(float[2])))
+            testshuffle!(__vector(float[2]), __vector(uint[2]));
+    }
+}
diff --git a/gcc/testsuite/gdc.dg/torture/simd_shufflevector.d b/gcc/testsuite/gdc.dg/torture/simd_shufflevector.d
new file mode 100644
index 0000000..cc55999
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/simd_shufflevector.d
@@ -0,0 +1,55 @@
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
+import gcc.simd;
+import gcc.attributes;
+
+void main()
+{
+    static if (__traits(compiles, __vector(int[4])))
+        alias int4 = __vector(int[4]);
+    static if (__traits(compiles, __vector(int[8])))
+        alias int8 = __vector(int[8]);
+
+    static if (__traits(compiles, int4) && __traits(compiles, int8))
+    {
+        __gshared int4[5] res;
+        __gshared int4 a;
+        __gshared int4 b;
+        __gshared int8[3] res8;
+        __gshared int8 a8;
+        __gshared int8 b8;
+
+        @noipa static void foo()
+        {
+            res[0] = shufflevector(a, b, 0, 1, 4, 5);
+            res[1] = shufflevector(a, b, 0, 1, 2, 5);
+            res8[0] = shufflevector(a, b, 0, 1, 2, 2 + 1, 4, 5, 6, 7);
+            res[2] = shufflevector(a8, b8, 0, 8, 1, 9);
+            res[3] = shufflevector(a8, b, 0, 8, 1, 9);
+            res[4] = shufflevector(a, b8, 0, 4, 1, 5);
+            res8[1] = shufflevector(a8, b, 0, 8, 1, 9, 10, 11, 2, 3);
+            res8[2] = shufflevector(a, b8, 0, 4, 1, 5, 4, 5, 6, 7);
+        }
+
+        a = [0, 1, 2, 3];
+        b = [4, 5, 6, 7];
+        a8 = [0, 1, 2, 3, 4, 5, 6, 7];
+        b8 = [8, 9, 10, 11, 12, 13, 14, 15];
+        foo();
+        assert(res[0].array == [0, 1, 4, 5]);
+
+        res[1][2] = 9;
+        assert(res[1].array == [0, 1, 9, 5]);
+        assert(res8[0].array == [0, 1, 2, 3, 4, 5, 6, 7]);
+        assert(res[2].array == [0, 8, 1, 9]);
+        assert(res[3].array == [0, 4, 1, 5]);
+        assert(res[4].array == [0, 8, 1, 9]);
+        assert(res8[1].array == [0, 4, 1, 5, 6, 7, 2, 3]);
+
+        res8[2][4] = 42;
+        res8[2][5] = 42;
+        res8[2][6] = 42;
+        res8[2][7] = 42;
+        assert(res8[2].array == [0, 8, 1, 9, 42, 42, 42, 42]);
+    }
+}
diff --git a/gcc/testsuite/gdc.dg/torture/simd_store.d b/gcc/testsuite/gdc.dg/torture/simd_store.d
new file mode 100644
index 0000000..b96ed42
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/simd_store.d
@@ -0,0 +1,54 @@
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
+import gcc.simd;
+
+void main()
+{
+    ubyte[32] data;
+
+    // to test all alignments from 1 ~ 16
+    foreach (i; 0..16)
+    {
+        ubyte* d = &data[i];
+
+        void test(T)()
+        {
+            T v;
+
+            // populate v` with data
+            ubyte* ptrToV = cast(ubyte*)&v;
+            foreach (j; 0..T.sizeof)
+                ptrToV[j] = cast(ubyte)j;
+
+            // store `v` to location pointed to by `d`
+            storeUnaligned(cast(T*)d, v);
+
+            // check that the the data was stored correctly
+            foreach (j; 0..T.sizeof)
+                assert(ptrToV[j] == d[j]);
+        }
+
+        static if (__traits(compiles, __vector(void[16])))
+            test!(__vector(void[16]))();
+        static if (__traits(compiles, __vector(byte[16])))
+            test!(__vector(byte[16]))();
+        static if (__traits(compiles, __vector(ubyte[16])))
+            test!(__vector(ubyte[16]))();
+        static if (__traits(compiles, __vector(short[8])))
+            test!(__vector(short[8]))();
+        static if (__traits(compiles, __vector(ushort[8])))
+            test!(__vector(ushort[8]))();
+        static if (__traits(compiles, __vector(int[4])))
+            test!(__vector(int[4]))();
+        static if (__traits(compiles, __vector(uint[4])))
+            test!(__vector(uint[4]))();
+        static if (__traits(compiles, __vector(long[2])))
+            test!(__vector(long[2]))();
+        static if (__traits(compiles, __vector(ulong[2])))
+            test!(__vector(ulong[2]))();
+        static if (__traits(compiles, __vector(double[2])))
+            test!(__vector(double[2]))();
+        static if (__traits(compiles, __vector(float[4])))
+            test!(__vector(float[4]))();
+    }
+}
diff --git a/libphobos/libdruntime/Makefile.am b/libphobos/libdruntime/Makefile.am
index d963aa9..56b332d 100644
--- a/libphobos/libdruntime/Makefile.am
+++ b/libphobos/libdruntime/Makefile.am
@@ -207,14 +207,14 @@ DRUNTIME_DSOURCES = core/atomic.d core/attribute.d core/bitop.d \
 	core/vararg.d core/volatile.d gcc/attribute.d gcc/attributes.d \
 	gcc/backtrace.d gcc/builtins.d gcc/deh.d gcc/emutls.d gcc/gthread.d \
 	gcc/sections/common.d gcc/sections/elf.d gcc/sections/macho.d \
-	gcc/sections/package.d gcc/sections/pecoff.d gcc/unwind/arm.d \
-	gcc/unwind/arm_common.d gcc/unwind/c6x.d gcc/unwind/generic.d \
-	gcc/unwind/package.d gcc/unwind/pe.d object.d rt/aApply.d rt/aApplyR.d \
-	rt/aaA.d rt/adi.d rt/arrayassign.d rt/arraycat.d rt/cast_.d \
-	rt/config.d rt/critical_.d rt/deh.d rt/dmain2.d rt/ehalloc.d \
-	rt/invariant.d rt/lifetime.d rt/memory.d rt/minfo.d rt/monitor_.d \
-	rt/profilegc.d rt/sections.d rt/tlsgc.d rt/util/typeinfo.d \
-	rt/util/utility.d
+	gcc/sections/package.d gcc/sections/pecoff.d gcc/simd.d \
+	gcc/unwind/arm.d gcc/unwind/arm_common.d gcc/unwind/c6x.d \
+	gcc/unwind/generic.d gcc/unwind/package.d gcc/unwind/pe.d object.d \
+	rt/aApply.d rt/aApplyR.d rt/aaA.d rt/adi.d rt/arrayassign.d \
+	rt/arraycat.d rt/cast_.d rt/config.d rt/critical_.d rt/deh.d \
+	rt/dmain2.d rt/ehalloc.d rt/invariant.d rt/lifetime.d rt/memory.d \
+	rt/minfo.d rt/monitor_.d rt/profilegc.d rt/sections.d rt/tlsgc.d \
+	rt/util/typeinfo.d rt/util/utility.d
 
 DRUNTIME_DSOURCES_STDCXX = core/stdcpp/allocator.d core/stdcpp/array.d \
 	core/stdcpp/exception.d core/stdcpp/memory.d core/stdcpp/new_.d \
diff --git a/libphobos/libdruntime/Makefile.in b/libphobos/libdruntime/Makefile.in
index 45e086a..24865fb 100644
--- a/libphobos/libdruntime/Makefile.in
+++ b/libphobos/libdruntime/Makefile.in
@@ -236,7 +236,7 @@ am__objects_1 = core/atomic.lo core/attribute.lo core/bitop.lo \
 	gcc/backtrace.lo gcc/builtins.lo gcc/deh.lo gcc/emutls.lo \
 	gcc/gthread.lo gcc/sections/common.lo gcc/sections/elf.lo \
 	gcc/sections/macho.lo gcc/sections/package.lo \
-	gcc/sections/pecoff.lo gcc/unwind/arm.lo \
+	gcc/sections/pecoff.lo gcc/simd.lo gcc/unwind/arm.lo \
 	gcc/unwind/arm_common.lo gcc/unwind/c6x.lo \
 	gcc/unwind/generic.lo gcc/unwind/package.lo gcc/unwind/pe.lo \
 	object.lo rt/aApply.lo rt/aApplyR.lo rt/aaA.lo rt/adi.lo \
@@ -874,14 +874,14 @@ DRUNTIME_DSOURCES = core/atomic.d core/attribute.d core/bitop.d \
 	core/vararg.d core/volatile.d gcc/attribute.d gcc/attributes.d \
 	gcc/backtrace.d gcc/builtins.d gcc/deh.d gcc/emutls.d gcc/gthread.d \
 	gcc/sections/common.d gcc/sections/elf.d gcc/sections/macho.d \
-	gcc/sections/package.d gcc/sections/pecoff.d gcc/unwind/arm.d \
-	gcc/unwind/arm_common.d gcc/unwind/c6x.d gcc/unwind/generic.d \
-	gcc/unwind/package.d gcc/unwind/pe.d object.d rt/aApply.d rt/aApplyR.d \
-	rt/aaA.d rt/adi.d rt/arrayassign.d rt/arraycat.d rt/cast_.d \
-	rt/config.d rt/critical_.d rt/deh.d rt/dmain2.d rt/ehalloc.d \
-	rt/invariant.d rt/lifetime.d rt/memory.d rt/minfo.d rt/monitor_.d \
-	rt/profilegc.d rt/sections.d rt/tlsgc.d rt/util/typeinfo.d \
-	rt/util/utility.d
+	gcc/sections/package.d gcc/sections/pecoff.d gcc/simd.d \
+	gcc/unwind/arm.d gcc/unwind/arm_common.d gcc/unwind/c6x.d \
+	gcc/unwind/generic.d gcc/unwind/package.d gcc/unwind/pe.d object.d \
+	rt/aApply.d rt/aApplyR.d rt/aaA.d rt/adi.d rt/arrayassign.d \
+	rt/arraycat.d rt/cast_.d rt/config.d rt/critical_.d rt/deh.d \
+	rt/dmain2.d rt/ehalloc.d rt/invariant.d rt/lifetime.d rt/memory.d \
+	rt/minfo.d rt/monitor_.d rt/profilegc.d rt/sections.d rt/tlsgc.d \
+	rt/util/typeinfo.d rt/util/utility.d
 
 DRUNTIME_DSOURCES_STDCXX = core/stdcpp/allocator.d core/stdcpp/array.d \
 	core/stdcpp/exception.d core/stdcpp/memory.d core/stdcpp/new_.d \
@@ -1340,6 +1340,7 @@ gcc/sections/elf.lo: gcc/sections/$(am__dirstamp)
 gcc/sections/macho.lo: gcc/sections/$(am__dirstamp)
 gcc/sections/package.lo: gcc/sections/$(am__dirstamp)
 gcc/sections/pecoff.lo: gcc/sections/$(am__dirstamp)
+gcc/simd.lo: gcc/$(am__dirstamp)
 gcc/unwind/$(am__dirstamp):
 	@$(MKDIR_P) gcc/unwind
 	@: > gcc/unwind/$(am__dirstamp)
diff --git a/libphobos/libdruntime/gcc/simd.d b/libphobos/libdruntime/gcc/simd.d
new file mode 100644
index 0000000..ffca50f
--- /dev/null
+++ b/libphobos/libdruntime/gcc/simd.d
@@ -0,0 +1,359 @@
+// GNU D Compiler SIMD support functions and intrinsics.
+// Copyright (C) 2022 Free Software Foundation, Inc.
+
+// GCC is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3, or (at your option) any later
+// version.
+
+// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+// <http://www.gnu.org/licenses/>.
+
+module gcc.simd;
+
+pure:
+nothrow:
+@safe:
+@nogc:
+pragma(inline, true):
+
+/**
+* Emit prefetch instruction.
+* Params:
+*    address = address to be prefetched
+*    writeFetch = true for write fetch, false for read fetch
+*    locality = 0..3 (0 meaning least local, 3 meaning most local)
+*/
+void prefetch(bool writeFetch, ubyte locality)(const(void)* address)
+{
+    static assert(locality < 4, "0..3 expected for locality");
+    import gcc.builtins : __builtin_prefetch;
+    __builtin_prefetch(address, writeFetch, locality);
+}
+
+/**
+ * Load unaligned vector from address.
+ * This is a compiler intrinsic.
+ * Params:
+ *    p = pointer to vector
+ * Returns:
+ *    vector
+ */
+V loadUnaligned(V)(const V* p) if (isVectorType!V);
+
+/**
+ * Store vector to unaligned address.
+ * This is a compiler intrinsic.
+ * Params:
+ *    p = pointer to vector
+ *    value = value to store
+ * Returns:
+ *    value
+ */
+V storeUnaligned(V)(V* p, V value) if (isVectorType!V);
+
+/**
+ * Construct a permutation of elements from one or two vectors, returning a
+ * vector of the same type as the input vector(s). The `mask` is an integral
+ * vector with the same width and element count as the output vector.
+ * Params:
+ *    op1 = input vector
+ *    op2 = input vector
+ *    mask = integer vector mask
+ * Returns:
+ *    vector with the same type as `op1` and `op2`
+ * Example:
+ * ---
+ * int4 a = [1, 2, 3, 4];
+ * int4 b = [5, 6, 7, 8];
+ * int4 mask1 = [0, 1, 1, 3];
+ * int4 mask2 = [0, 4, 2, 5];
+ * assert(shuffle(a, mask1).array == [1, 2, 2, 4]);
+ * assert(shuffle(a, b, mask2).array == [1, 5, 3, 6]);
+ * ---
+ */
+template shuffle(V0, V1, M)
+{
+    static assert(isVectorType!V0, "first argument must be vector");
+    static assert(isVectorType!V1, "second argument must be vector");
+    static assert(is(BaseType!V0 == BaseType!V1),
+                  "first and second argument vectors must have the same element type");
+    static assert(isVectorType!M && is(BaseType!M : long),
+                  "last argument must be an integer vector");
+    static assert(numElements!V0 == numElements!M && numElements!V1 == numElements!M,
+                  "argument vectors and mask vector should have the same number of elements");
+    static assert(BaseType!V0.sizeof == BaseType!M.sizeof,
+                  "argument vectors and mask vector should have the same element type size");
+
+    V0 shuffle(V0 op1, V1 op2, M mask);
+}
+
+/// Ditto
+template shuffle(V, M)
+{
+    static assert(isVectorType!V, "first argument must be a vector");
+    static assert(isVectorType!M && is(BaseType!M : long),
+                  "last argument must be an integer vector");
+    static assert(numElements!V == numElements!M,
+                  "argument vector and mask vector should have the same number of elements");
+    static assert(BaseType!V.sizeof == BaseType!M.sizeof,
+                  "argument vector and mask vector should have the same element type size");
+
+    V shuffle(V op1, M mask)
+    {
+        return shuffle(op1, op1, mask);
+    }
+}
+
+/**
+ * Construct a permutation of elements from two vectors, returning a vector with
+ * the same element type as the input vector(s), and same length as the `mask`.
+ * Params:
+ *    op1 = input vector
+ *    op2 = input vector
+ *    index = elements indices of the vectors that should be extracted and returned
+ * Returns:
+ *    vector with the same element type as `op1` and `op2`, but has an element count
+ *    equal to the number of indices in `index`.
+ * Example:
+ * ---
+ * int8 a = [1, -2, 3, -4, 5, -6, 7, -8];
+ * int4 b = shufflevector(a, a, 0, 2, 4, 6);
+ * assert(b.array == [1, 3, 5, 7]);
+ * int4 c = [-2, -4, -6, -8];
+ * int d = shufflevector(c, b, 4, 0, 5, 1, 6, 2, 7, 3);
+ * assert(d.array == a.array);
+ * ---
+ */
+template shufflevector(V1, V2, M...)
+{
+    static assert(isVectorType!V1, "first argument must be vector");
+    static assert(isVectorType!V2, "second argument must be vector");
+    static assert(is(BaseType!V1 == BaseType!V2),
+                  "first and second argument vectors must have the same element type");
+    static assert(isPowerOf2!(M.length),
+                  "number of index arguments must be a power of 2");
+
+    __vector(BaseType!V1[M.length]) shufflevector(V1 op1, V2 op2, M index);
+}
+
+/// Ditto
+template shufflevector(V, index...)
+{
+    // Defined for compatibility with LDC.
+    static assert(isVectorType!V, "first argument must be a vector type");
+    static assert(numElements!V == index.length,
+                  "number of index arguments must be the same number of vector elements");
+
+    private template ctfeConstants(m...)
+    {
+        static if (m.length == 0) enum ctfeConstants = 1;
+        else enum ctfeConstants = m[0] | ctfeConstants!(m[1 .. $]);
+    }
+    static assert(__traits(compiles, ctfeConstants!index),
+                  "all index arguments must be compile time constants");
+
+    private template validIndexes(m...)
+    {
+        static if (m.length == 0) enum validIndexes = true;
+        else enum validIndexes = (cast(long)m[0] > -1) && validIndexes!(m[1 .. $]);
+    }
+    static assert(validIndexes!index,
+                  "all index arguments must be greater than or equal to 0");
+
+    V shufflevector(V op1, V op2)
+    {
+        return shufflevector(op1, op2, index);
+    }
+}
+
+/**
+ * Extracts a single scalar element from a vector at a specified index.
+ * Defined for compatibility with LDC.
+ * Params:
+ *    val = vector to extract element from
+ *    idx = index indicating the position from which to extract the element
+ * Returns:
+ *    scalar of the same type as the element type of val
+ * Example:
+ * ---
+ * int4 a = [0, 10, 20, 30];
+ * int k = extractelement!(int4, 2)(a);
+ * assert(k == 20);
+ * ---
+ */
+BaseType!V extractelement(V, int idx)(V val)
+    if (isVectorType!V && idx < numElements!V)
+{
+    return val[idx];
+}
+
+/**
+ * Inserts a scalar element into a vector at a specified index.
+ * Defined for compatibility with LDC.
+ * Params:
+ *    val = vector to assign element to
+ *    elt = scalar whose type is the element type of val
+ *    idx = index indicating the position from which to extract the element
+ * Returns:
+ *    vector of the same type as val
+ * Example:
+ * ---
+ * int4 a = [0, 10, 20, 30];
+ * int4 b = insertelement!(int4, 2)(a, 50);
+ * assert(b.array == [0, 10, 50, 30]);
+ * ---
+ */
+V insertelement(V, int idx)(V val, BaseType!V elt)
+    if (isVectorType!V && idx < numElements!V)
+{
+    val[idx] = elt;
+    return val;
+}
+
+/**
+ * Convert a vector from one integral or floating vector type to another.
+ * The result is an integral or floating vector that has had every element
+ * cast to the element type of the return type.
+ * Params:
+ *    from = input vector
+ * Returns:
+ *    converted vector
+ * Example:
+ * ---
+ * int4 a = [1, -2, 3, -4];
+ * float4 b = [1.5, -2.5, 3, 7];
+ * assert(convertvector!float4(a).array == [1, -2, 3, -4]);
+ * assert(convertvector!double4(a).array == [1, -2, 3, -4]);
+ * assert(convertvector!double4(b).array == [1.5, -2.5, 3, 7]);
+ * assert(convertvector!int4(b).array == [1, -2, 3, 7]);
+ * ---
+ */
+
+template convertvector(V, T)
+{
+    static assert(isVectorType!V && (is(BaseType!V : long) || is(BaseType!V : real)),
+                  "first argument must be an integer or floating vector type");
+    static assert(isVectorType!T && (is(BaseType!T : long) || is(BaseType!T : real)),
+                  "second argument must be an integer or floating vector");
+    static assert(numElements!V == numElements!T,
+                  "first and second argument vectors should have the same number of elements");
+
+    V convertvector(T);
+}
+
+/**
+ * Construct a conditional merge of elements from two vectors, returning a
+ * vector of the same type as the input vector(s). The `mask` is an integral
+ * vector with the same width and element count as the output vector.
+ * Params:
+ *    op1 = input vector
+ *    op2 = input vector
+ *    mask = integer vector mask
+ * Returns:
+ *    vector with the same type as `op1` and `op2`
+ * Example:
+ * ---
+ * int4 a = [1, 2, 3, 4];
+ * int4 b = [5, 6, 7, 8];
+ * int4 mask1 = [0, 1, 1, 3];
+ * int4 mask2 = [0, 4, 2, 5];
+ * assert(shuffle(a, mask1).array == [1, 2, 2, 4]);
+ * assert(shuffle(a, b, mask2).array == [1, 5, 3, 6]);
+ * ---
+ */
+template blendvector(V0, V1, M)
+{
+    static assert(isVectorType!V0, "first argument must be vector");
+    static assert(isVectorType!V1, "second argument must be vector");
+    static assert(is(BaseType!V0 == BaseType!V1),
+                  "first and second argument vectors must have the same element type");
+    static assert(isVectorType!M && is(BaseType!M : long),
+                  "last argument must be an integer vector");
+    static assert(numElements!V0 == numElements!M && numElements!V1 == numElements!M,
+                  "argument vectors and mask vector should have the same number of elements");
+    static assert(BaseType!V0.sizeof == BaseType!M.sizeof,
+                  "argument vectors and mask vector should have the same element type size");
+
+    V0 blendvector(V0 op1, V1 op2, M mask);
+}
+
+/**
+ * Perform an element-wise comparison between two vectors, producing `0` when
+ * the comparison is false and `-1` (all bits are set to 1) otherwise.
+ * Params:
+ *    op1 = input vector
+ *    op2 = input vector
+ * Returns:
+ *    vector of the same width and number of elements as the comparison
+ *    operands with a signed integral element type
+ * Example:
+ * ---
+ * float4 a = [1, 3, 5, 7];
+ * float4 b = [2, 3, 4, 5];
+ * int4 c = greaterMask!float4(a, b);
+ * assert(c.array == [0, 0, -1, -1]);
+ * ---
+ */
+V equalMask(V)(V op1, V op2) if (isVectorType!V);
+/// Ditto
+V notEqualMask(V)(V op1, V op2) if (isVectorType!V);
+/// Ditto
+V greaterMask(V)(V op1, V op2) if (isVectorType!V);
+/// Ditto
+V greaterOrEqualMask(V)(V op1, V op2) if (isVectorType!V);
+
+/**
+ * Perform an element-wise logical comparison between two vectors, producing
+ * `0` when the comparison is false and `-1` (all bits are set to 1) otherwise.
+ * Params:
+ *    op1 = input vector
+ *    op2 = input vector
+ * Returns:
+ *    vector of the same width and number of elements as the comparison
+ *    operands with a signed integral element type
+ */
+V notMask(V)(V op1) if (isVectorType!V)
+{
+    return equalMask(op1, 0);
+}
+
+/// Ditto
+V andAndMask(V)(V op1, V op2) if (isVectorType!V)
+{
+    return notEqualMask(op1, 0) & notEqualMask(op2, 0);
+}
+
+/// Ditto
+V orOrMask(V)(V op1, V op2) if (isVectorType!V)
+{
+    return notEqualMask(op1, 0) | notEqualMask(op2, 0);
+}
+
+// Private helper templates.
+private:
+
+enum bool isVectorType(T) = is(T : __vector(V[N]), V, size_t N);
+
+template BaseType(V)
+{
+    alias typeof(V.array[0]) BaseType;
+}
+
+template numElements(V)
+{
+    enum numElements = V.sizeof / BaseType!(V).sizeof;
+}
+
+enum bool isPowerOf2(int Y) = Y && (Y & -Y) == Y;