aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/d/intrinsics.cc587
-rw-r--r--gcc/d/intrinsics.def23
-rw-r--r--gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch1.d (renamed from gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch.d)0
-rw-r--r--gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch2.d250
-rw-r--r--gcc/testsuite/gdc.dg/torture/simd_blendvector.d345
-rw-r--r--gcc/testsuite/gdc.dg/torture/simd_cond.d17
-rw-r--r--gcc/testsuite/gdc.dg/torture/simd_convertvector.d122
-rw-r--r--gcc/testsuite/gdc.dg/torture/simd_load.d52
-rw-r--r--gcc/testsuite/gdc.dg/torture/simd_logical.d19
-rw-r--r--gcc/testsuite/gdc.dg/torture/simd_shuffle.d454
-rw-r--r--gcc/testsuite/gdc.dg/torture/simd_shufflevector.d55
-rw-r--r--gcc/testsuite/gdc.dg/torture/simd_store.d54
-rw-r--r--libphobos/libdruntime/Makefile.am16
-rw-r--r--libphobos/libdruntime/Makefile.in19
-rw-r--r--libphobos/libdruntime/gcc/simd.d359
15 files changed, 2355 insertions, 17 deletions
diff --git a/gcc/d/intrinsics.cc b/gcc/d/intrinsics.cc
index 0dd5543..454d940 100644
--- a/gcc/d/intrinsics.cc
+++ b/gcc/d/intrinsics.cc
@@ -29,9 +29,12 @@ along with GCC; see the file COPYING3. If not see
#include "tm.h"
#include "function.h"
#include "tree.h"
+#include "diagnostic.h"
+#include "langhooks.h"
#include "fold-const.h"
#include "stringpool.h"
#include "builtins.h"
+#include "vec-perm-indices.h"
#include "d-tree.h"
@@ -161,6 +164,16 @@ maybe_set_intrinsic (FuncDeclaration *decl)
case INTRINSIC_MULUL:
case INTRINSIC_NEGS:
case INTRINSIC_NEGSL:
+ case INTRINSIC_LOADUNALIGNED:
+ case INTRINSIC_STOREUNALIGNED:
+ case INTRINSIC_SHUFFLE:
+ case INTRINSIC_SHUFFLEVECTOR:
+ case INTRINSIC_CONVERTVECTOR:
+ case INTRINSIC_BLENDVECTOR:
+ case INTRINSIC_EQUALMASK:
+ case INTRINSIC_NOTEQUALMASK:
+ case INTRINSIC_GREATERMASK:
+ case INTRINSIC_GREATEREQUALMASK:
case INTRINSIC_VLOAD8:
case INTRINSIC_VLOAD16:
case INTRINSIC_VLOAD32:
@@ -169,6 +182,8 @@ maybe_set_intrinsic (FuncDeclaration *decl)
case INTRINSIC_VSTORE16:
case INTRINSIC_VSTORE32:
case INTRINSIC_VSTORE64:
+ /* Cannot interpret function during CTFE. If the library
+ provides a definition, its body will be used instead. */
break;
case INTRINSIC_POW:
@@ -196,6 +211,314 @@ maybe_set_intrinsic (FuncDeclaration *decl)
}
}
+/* Helper function for maybe_warn_intrinsic_mismatch. Issue warning about
+ mismatch in the EXPECTED return type in call to the intrinsic function in
+ CALLEXP, and return TRUE. */
+
+static bool
+warn_mismatched_return_type (tree callexp, const char *expected)
+{
+ warning_at (EXPR_LOCATION (callexp), OPT_Wbuiltin_declaration_mismatch,
+ "mismatch in return type of intrinsic function %qD "
+ "(%qT, should be %qs)", get_callee_fndecl (callexp),
+ TREE_TYPE (callexp), expected);
+ return true;
+}
+
+/* Helper function for maybe_warn_intrinsic_mismatch. Issue warning or error
+ about mismatch in the EXPECTED argument type at ARGNO in call to the
+ intrinsic function in CALLEXP, and return TRUE. */
+
+static bool
+warn_mismatched_argument (tree callexp, unsigned argno, const char *expected)
+{
+ warning_at (EXPR_LOCATION (callexp), OPT_Wbuiltin_declaration_mismatch,
+ "mismatch in argument %u type of intrinsic function %qD "
+ "(%qT, should be %qs)", argno + 1, get_callee_fndecl (callexp),
+ TREE_TYPE (CALL_EXPR_ARG (callexp, argno)), expected);
+ return true;
+}
+
+static bool
+warn_mismatched_argument (tree callexp, unsigned argno, tree expected,
+ bool error_p = false)
+{
+ if (error_p)
+ error_at (EXPR_LOCATION (callexp),
+ "mismatch in argument %u type of intrinsic function %qD "
+ "(%qT, should be %qT)", argno + 1, get_callee_fndecl (callexp),
+ TREE_TYPE (CALL_EXPR_ARG (callexp, argno)), expected);
+ else
+ warning_at (EXPR_LOCATION (callexp), OPT_Wbuiltin_declaration_mismatch,
+ "mismatch in argument %u type of intrinsic function %qD "
+ "(%qT, should be %qT)", argno + 1, get_callee_fndecl (callexp),
+ TREE_TYPE (CALL_EXPR_ARG (callexp, argno)), expected);
+
+ return true;
+}
+
+/* Helper function for maybe_warn_intrinsic_mismatch. Builds a vector integer
+ type suitable for the mask argument of INTRINSIC_SHUFFLE from the given
+ input argument TYPE. */
+
+static tree
+build_shuffle_mask_type (tree type)
+{
+ const unsigned bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type)));
+ const int unsignedp = TYPE_UNSIGNED (TREE_TYPE (type));
+ tree inner = lang_hooks.types.type_for_size (bits, unsignedp);
+ gcc_assert (inner && TREE_CODE (inner) == INTEGER_TYPE);
+
+ /* %% Get the front-end type for the vector so the D type will be
+ printed (this should really be handled by a D tree printer). */
+ Type *t = build_frontend_type (inner);
+ gcc_assert (t != NULL);
+ unsigned HOST_WIDE_INT nunits;
+ TYPE_VECTOR_SUBPARTS (type).is_constant (&nunits);
+
+ return build_ctype (TypeVector::create (t->sarrayOf (nunits)));
+}
+
+/* Checks if call to intrinsic FUNCTION in CALLEXP matches the internal
+ type and value constraints that we expect from the library definitions.
+ Returns TRUE and issues a warning if there is a mismatch.
+
+ Note: The return type and parameters are encoded into the signature `deco'
+ string that we match on in maybe_set_intrinsic(), so if the deco mangle
+ string has 'i' in the part that specifies the return type, then the matched
+ intrinsic will always have the return type `int'.
+
+ For templated intrinsics however, we rely on template constraints to ensure
+ that the generic type matches what we expect it to be. There is still an
+ enforced relationship between a template argument and its instantiated type.
+ For example: `T func(T)(T*)' would have the generic return type `@1T' and
+ generic parameter type `@1PT', so it can be assumed that if the return type
+ matches what we expect then all parameters are fine as well. Otherwise it
+ can be assumed that some internal_error has occurred for this to be the case.
+ Where a templated intrinsic has multiple template arguments, each generic
+ type will need to be checked for its validity. */
+
+static bool
+maybe_warn_intrinsic_mismatch (tree function, tree callexp)
+{
+ switch (DECL_INTRINSIC_CODE (function))
+ {
+ case INTRINSIC_NONE:
+ default:
+ return false;
+
+ case INTRINSIC_LOADUNALIGNED:
+ {
+ /* Expects the signature:
+ vector(T) loadUnaligned (vector(T)*); */
+ gcc_assert (call_expr_nargs (callexp) == 1);
+
+ tree ptr = TREE_TYPE (CALL_EXPR_ARG (callexp, 0));
+ if (!VECTOR_TYPE_P (TREE_TYPE (callexp))
+ || !POINTER_TYPE_P (ptr) || !VECTOR_TYPE_P (TREE_TYPE (ptr)))
+ return warn_mismatched_return_type (callexp, "__vector(T)");
+
+ return false;
+ }
+
+ case INTRINSIC_STOREUNALIGNED:
+ {
+ /* Expects the signature:
+ vector(T) storeUnaligned (vector(T)*, vector(T)); */
+ gcc_assert (call_expr_nargs (callexp) == 2);
+
+ tree ptr = TREE_TYPE (CALL_EXPR_ARG (callexp, 0));
+ tree val = TREE_TYPE (CALL_EXPR_ARG (callexp, 1));
+ if (!VECTOR_TYPE_P (TREE_TYPE (callexp))
+ || !POINTER_TYPE_P (ptr) || !VECTOR_TYPE_P (TREE_TYPE (ptr))
+ || !VECTOR_TYPE_P (val))
+ return warn_mismatched_return_type (callexp, "__vector(T)");
+
+ return false;
+ }
+
+ case INTRINSIC_SHUFFLE:
+ case INTRINSIC_BLENDVECTOR:
+ {
+ /* Expects the signature:
+ vector(T) shuffle (vector(T), vector(U), vector(V));
+ vector(T) blendvector (vector(T), vector(U), vector(V)); */
+ gcc_assert (call_expr_nargs (callexp) == 3);
+
+ tree vec0 = TREE_TYPE (CALL_EXPR_ARG (callexp, 0));
+ if (!VECTOR_TYPE_P (TREE_TYPE (callexp))
+ || !VECTOR_TYPE_P (vec0))
+ return warn_mismatched_return_type (callexp, "__vector(T)");
+
+ tree vec1 = TREE_TYPE (CALL_EXPR_ARG (callexp, 1));
+ if (!VECTOR_TYPE_P (vec1))
+ return warn_mismatched_argument (callexp, 1, vec0);
+
+ tree mask = TREE_TYPE (CALL_EXPR_ARG (callexp, 2));
+ if (!VECTOR_TYPE_P (mask) || !VECTOR_INTEGER_TYPE_P (mask))
+ {
+ tree expected = build_shuffle_mask_type (vec0);
+ return warn_mismatched_argument (callexp, 2, expected,
+ VECTOR_TYPE_P (mask));
+ }
+
+ /* Types have been validated, now issue errors about violations on the
+ constraints of the intrinsic. */
+ if (TYPE_MAIN_VARIANT (vec0) != TYPE_MAIN_VARIANT (vec1))
+ return warn_mismatched_argument (callexp, 1, vec0, true);
+
+ /* Vector element sizes should be equal between arguments and mask. */
+ if (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vec0)))
+ != GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (mask)))
+ || maybe_ne (TYPE_VECTOR_SUBPARTS (vec0),
+ TYPE_VECTOR_SUBPARTS (mask))
+ || maybe_ne (TYPE_VECTOR_SUBPARTS (vec1),
+ TYPE_VECTOR_SUBPARTS (mask)))
+ {
+ tree expected = build_shuffle_mask_type (vec0);
+ return warn_mismatched_argument (callexp, 2, expected, true);
+ }
+
+ return false;
+ }
+
+ case INTRINSIC_SHUFFLEVECTOR:
+ {
+ /* Expects the signature:
+ vector(T[N]) shufflevector (vector(T), vector(U), N...); */
+ gcc_assert (call_expr_nargs (callexp) >= 3);
+ gcc_assert (VECTOR_TYPE_P (TREE_TYPE (callexp)));
+
+ tree vec0 = TREE_TYPE (CALL_EXPR_ARG (callexp, 0));
+ if (!VECTOR_TYPE_P (vec0))
+ return warn_mismatched_argument (callexp, 0, "__vector(T)");
+
+ tree vec1 = TREE_TYPE (CALL_EXPR_ARG (callexp, 1));
+ if (!VECTOR_TYPE_P (vec1))
+ return warn_mismatched_argument (callexp, 1, vec0);
+
+ for (int i = 2; i < call_expr_nargs (callexp); i++)
+ {
+ tree idx = TREE_TYPE (CALL_EXPR_ARG (callexp, i));
+ if (TREE_CODE (idx) != INTEGER_TYPE)
+ return warn_mismatched_argument (callexp, i, d_int_type);
+ }
+
+ /* Types have been validated, now issue errors about violations on the
+ constraints of the intrinsic. */
+ if (TYPE_MAIN_VARIANT (TREE_TYPE (vec0))
+ != TYPE_MAIN_VARIANT (TREE_TYPE (vec1)))
+ {
+ /* %% Get the front-end type for the vector so the D type will be
+ printed (this should really be handled by a D tree printer). */
+ unsigned HOST_WIDE_INT nunits;
+ if (!TYPE_VECTOR_SUBPARTS (vec1).is_constant (&nunits))
+ break;
+
+ Type *inner = build_frontend_type (TREE_TYPE (vec0));
+ Type *vector = TypeVector::create (inner->sarrayOf (nunits));
+ return warn_mismatched_argument (callexp, 1,
+ build_ctype (vector), true);
+ }
+
+ /* Vector sizes should be known, and number of indices a power of 2. */
+ unsigned HOST_WIDE_INT vec0_length;
+ unsigned HOST_WIDE_INT vec1_length;
+ if (!TYPE_VECTOR_SUBPARTS (vec0).is_constant (&vec0_length)
+ || !TYPE_VECTOR_SUBPARTS (vec1).is_constant (&vec1_length)
+ || !pow2p_hwi (call_expr_nargs (callexp) - 2))
+ break;
+
+ /* All index arguments must be valid constants as well. */
+ for (int i = 2; i < call_expr_nargs (callexp); i++)
+ {
+ tree idx = CALL_EXPR_ARG (callexp, i);
+ if (!tree_fits_shwi_p (idx))
+ {
+ error_at (EXPR_LOCATION (callexp),
+ "argument %qE cannot be read at compile time", idx);
+ return true;
+ }
+
+ HOST_WIDE_INT iidx = tree_to_shwi (idx);
+ if (iidx < 0
+ || (unsigned HOST_WIDE_INT) iidx >= vec0_length + vec1_length)
+ {
+ error_at (EXPR_LOCATION (callexp),
+ "element index %qE is out of bounds %<[0 .. %E]%>",
+ idx, build_integer_cst (vec0_length + vec1_length));
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ case INTRINSIC_CONVERTVECTOR:
+ {
+ /* Expects the signature:
+ vector(T) convertvector (vector(U)); */
+ gcc_assert (call_expr_nargs (callexp) == 1);
+
+ tree ret = TREE_TYPE (callexp);
+ if (!VECTOR_TYPE_P (ret)
+ || (!VECTOR_INTEGER_TYPE_P (ret) && !VECTOR_FLOAT_TYPE_P (ret)))
+ return warn_mismatched_return_type (callexp, "__vector(T)");
+
+ tree arg = TREE_TYPE (CALL_EXPR_ARG (callexp, 0));
+ if (!VECTOR_TYPE_P (arg)
+ || (!VECTOR_INTEGER_TYPE_P (arg) && !VECTOR_FLOAT_TYPE_P (arg)))
+ return warn_mismatched_argument (callexp, 0, "__vector(T)");
+
+ /* Types have been validated, now issue errors about violations on the
+ constraints of the intrinsic. */
+ if (maybe_ne (TYPE_VECTOR_SUBPARTS (ret), TYPE_VECTOR_SUBPARTS (arg)))
+ {
+ /* %% Get the front-end type for the vector so the D type will be
+ printed (this should really be handled by a D tree printer). */
+ unsigned HOST_WIDE_INT nunits;
+ if (!TYPE_VECTOR_SUBPARTS (ret).is_constant (&nunits))
+ break;
+
+ Type *inner = build_frontend_type (TREE_TYPE (arg));
+ Type *vector = TypeVector::create (inner->sarrayOf (nunits));
+ return warn_mismatched_argument (callexp, 0,
+ build_ctype (vector), true);
+ }
+
+ return false;
+ }
+
+ case INTRINSIC_EQUALMASK:
+ case INTRINSIC_NOTEQUALMASK:
+ case INTRINSIC_GREATERMASK:
+ case INTRINSIC_GREATEREQUALMASK:
+ {
+ /* Expects the signature:
+ vector(T) equalMask(vector(T), vector(T));
+ vector(T) notEqualMask(vector(T), vector(T));
+ vector(T) greaterMask(vector(T), vector(T));
+ vector(T) greateOrEqualMask(vector(T), vector(T)); */
+ gcc_assert (call_expr_nargs (callexp) == 2);
+
+ tree vec0 = TREE_TYPE (CALL_EXPR_ARG (callexp, 0));
+ tree vec1 = TREE_TYPE (CALL_EXPR_ARG (callexp, 1));
+ if (!VECTOR_TYPE_P (TREE_TYPE (callexp))
+ || !VECTOR_TYPE_P (vec0)
+ || !VECTOR_TYPE_P (vec1)
+ || TYPE_MAIN_VARIANT (vec0) != TYPE_MAIN_VARIANT (vec1))
+ return warn_mismatched_return_type (callexp, "__vector(T)");
+
+ return false;
+ }
+ }
+
+ /* Generic mismatch warning if it hasn't already been handled. */
+ warning_at (EXPR_LOCATION (callexp), OPT_Wbuiltin_declaration_mismatch,
+ "mismatch in call of intrinsic function %qD", function);
+ return true;
+}
+
/* Construct a function call to the built-in function CODE, N is the number of
arguments, and the `...' parameters are the argument expressions.
The original call expression is held in CALLEXP. */
@@ -750,6 +1073,231 @@ expand_volatile_store (tree callexp)
return modify_expr (result, value);
}
+/* Expand a front-end intrinsic call to a vector comparison intrinsic, which is
+ either a call to equalMask(), notEqualMask(), greaterMask(), or
+ greaterOrEqualMask(). These intrinsics take two arguments, the signature to
+ which can be either:
+
+ vector(T) equalMask(vector(T) vec0, vector(T) vec1);
+ vector(T) notEqualMask(vector(T) vec0, vector(T) vec1);
+ vector(T) greaterMask(vector(T) vec0, vector(T) vec1);
+ vector(T) greaterOrEqualMask(vector(T) vec0, vector(T) vec1);
+
+ This performs an element-wise comparison between two vectors VEC0 and VEC1,
+ returning a vector with signed integral elements. */
+
+static tree
+expand_intrinsic_vec_cond (tree_code code, tree callexp)
+{
+ tree vec0 = CALL_EXPR_ARG (callexp, 0);
+ tree vec1 = CALL_EXPR_ARG (callexp, 1);
+ tree type = TREE_TYPE (callexp);
+
+ tree cmp = fold_build2_loc (EXPR_LOCATION (callexp), code,
+ truth_type_for (type), vec0, vec1);
+ return fold_build3_loc (EXPR_LOCATION (callexp), VEC_COND_EXPR, type, cmp,
+ build_minus_one_cst (type), build_zero_cst (type));
+}
+
+/* Expand a front-end instrinsic call to convertvector(). This takes one
+ argument, the signature to which is:
+
+ vector(T) convertvector (vector(F) vec);
+
+ This converts a vector VEC to TYPE by casting every element in VEC to the
+ element type of TYPE. The original call expression is held in CALLEXP. */
+
+static tree
+expand_intrinsic_vec_convert (tree callexp)
+{
+ tree vec = CALL_EXPR_ARG (callexp, 0);
+ tree type = TREE_TYPE (callexp);
+
+ /* Use VIEW_CONVERT for simple vector conversions. */
+ if ((TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (vec)))
+ == TYPE_MAIN_VARIANT (TREE_TYPE (type)))
+ || (VECTOR_INTEGER_TYPE_P (TREE_TYPE (vec))
+ && VECTOR_INTEGER_TYPE_P (type)
+ && (TYPE_PRECISION (TREE_TYPE (TREE_TYPE (vec)))
+ == TYPE_PRECISION (TREE_TYPE (type)))))
+ return build1_loc (EXPR_LOCATION (callexp), VIEW_CONVERT_EXPR, type, vec);
+
+ return build_call_expr_internal_loc (EXPR_LOCATION (callexp), IFN_VEC_CONVERT,
+ type, 1, vec);
+}
+
+/* Expand a front-end instrinsic call to blendvector(). This expects to take
+ three arguments, the signature to which is:
+
+ vector(T) blendvector (vector(T) vec0, vector(U) vec1, vector(M) mask);
+
+ This builds a VEC_COND_EXPR if VEC0, VEC1, and MASK are vector types, VEC0
+ has the same type as VEC1, and the number of elements of VEC0, VEC1, and MASK
+ are the same. The original call expression is held in CALLEXP. */
+
+static tree
+expand_intrinsic_vec_blend (tree callexp)
+{
+ tree vec0 = CALL_EXPR_ARG (callexp, 0);
+ tree vec1 = CALL_EXPR_ARG (callexp, 1);
+ tree mask = CALL_EXPR_ARG (callexp, 2);
+
+ tree cmp = fold_build2_loc (EXPR_LOCATION (callexp), NE_EXPR,
+ truth_type_for (TREE_TYPE (mask)),
+ mask, build_zero_cst (TREE_TYPE (mask)));
+
+ tree ret = fold_build3_loc (EXPR_LOCATION (callexp), VEC_COND_EXPR,
+ TREE_TYPE (callexp), cmp, vec0, vec1);
+
+ if (!CONSTANT_CLASS_P (vec0) || !CONSTANT_CLASS_P (vec1))
+ ret = force_target_expr (ret);
+
+ return ret;
+}
+
+/* Expand a front-end instrinsic call to shuffle(). This expects to take three
+ arguments, the signature to which is:
+
+ vector(T) shuffle (vector(T) vec0, vector(T) vec1, vector(M) mask);
+
+ This builds a VEC_PERM_EXPR if VEC0, VEC1, and MASK are vector types, VEC0
+ has the same type as VEC1, and the number of elements of VEC0, VEC1, and MASK
+ are the same. The original call expression is held in CALLEXP. */
+
+static tree
+expand_intrinsic_vec_shuffle (tree callexp)
+{
+ tree vec0 = CALL_EXPR_ARG (callexp, 0);
+ tree vec1 = CALL_EXPR_ARG (callexp, 1);
+ tree mask = CALL_EXPR_ARG (callexp, 2);
+
+ return build3_loc (EXPR_LOCATION (callexp), VEC_PERM_EXPR,
+ TREE_TYPE (callexp), vec0, vec1, mask);
+}
+
+/* Expand a front-end instrinsic call to shufflevector(). This takes two
+ positional arguments and a variadic list, the signature to which is:
+
+ vector(TM) shuffle (vector(T) vec1, vector(T) vec2, index...);
+
+ This builds a VEC_PERM_EXPR if VEC0 and VEC1 are vector types, VEC0 has the
+ same element type as VEC1, and the number of elements in INDEX is a valid
+ power of two. The original call expression is held in CALLEXP. */
+
+static tree
+expand_intrinsic_vec_shufflevector (tree callexp)
+{
+ tree vec0 = CALL_EXPR_ARG (callexp, 0);
+ tree vec1 = CALL_EXPR_ARG (callexp, 1);
+
+ unsigned HOST_WIDE_INT v0elems, v1elems;
+ TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec0)).is_constant (&v0elems);
+ TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec1)).is_constant (&v1elems);
+
+ unsigned HOST_WIDE_INT num_indices = call_expr_nargs (callexp) - 2;
+ unsigned HOST_WIDE_INT masklen = MAX (num_indices, MAX (v0elems, v1elems));
+ unsigned HOST_WIDE_INT pad_size = (v0elems < masklen ? masklen - v0elems : 0);
+ vec_perm_builder sel (masklen, masklen, 1);
+
+ unsigned n = 0;
+ for (; n < num_indices; ++n)
+ {
+ tree idx = CALL_EXPR_ARG (callexp, n + 2);
+ HOST_WIDE_INT iidx = tree_to_shwi (idx);
+ /* VEC_PERM_EXPR does not allow different sized inputs. */
+ if ((unsigned HOST_WIDE_INT) iidx >= v0elems)
+ iidx += pad_size;
+
+ sel.quick_push (iidx);
+ }
+
+ /* VEC_PERM_EXPR does not support a result that is smaller than the inputs. */
+ for (; n < masklen; ++n)
+ sel.quick_push (n);
+
+ vec_perm_indices indices (sel, 2, masklen);
+
+ /* Pad out arguments to the common vector size. */
+ tree ret_type = build_vector_type (TREE_TYPE (TREE_TYPE (vec0)), masklen);
+ if (v0elems < masklen)
+ {
+ constructor_elt elt = { NULL_TREE, build_zero_cst (TREE_TYPE (vec0)) };
+ vec0 = build_constructor_single (ret_type, NULL_TREE, vec0);
+ for (unsigned i = 1; i < masklen / v0elems; ++i)
+ vec_safe_push (CONSTRUCTOR_ELTS (vec0), elt);
+ }
+
+ if (v1elems < masklen)
+ {
+ constructor_elt elt = { NULL_TREE, build_zero_cst (TREE_TYPE (vec1)) };
+ vec1 = build_constructor_single (ret_type, NULL_TREE, vec1);
+ for (unsigned i = 1; i < masklen / v1elems; ++i)
+ vec_safe_push (CONSTRUCTOR_ELTS (vec1), elt);
+ }
+
+ tree mask_type = build_vector_type (build_nonstandard_integer_type
+ (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (ret_type))), 1),
+ masklen);
+ tree ret = build3_loc (EXPR_LOCATION (callexp), VEC_PERM_EXPR, ret_type, vec0,
+ vec1, vec_perm_indices_to_tree (mask_type, indices));
+
+ /* Get the low part we are interested in. */
+ if (num_indices < masklen)
+ {
+ ret = build3_loc (EXPR_LOCATION (callexp), BIT_FIELD_REF,
+ TREE_TYPE (callexp), ret,
+ TYPE_SIZE (TREE_TYPE (callexp)), bitsize_zero_node);
+ /* Wrap the low part operation in a TARGET_EXPR so it gets a separate
+ temporary during gimplification. */
+ ret = force_target_expr (ret);
+ }
+
+ return ret;
+}
+
+/* Expand a front-end instrinsic call to loadUnaligned(). This takes one
+ argument, the signature to which is:
+
+ vector(T) loadUnaligned (vector(T)* ptr)
+
+ This generates a load of a vector from an unaligned address PTR.
+ The original call expression is held in CALLEXP. */
+
+static tree
+expand_intrinsic_vec_load_unaligned (tree callexp)
+{
+ tree ptr = CALL_EXPR_ARG (callexp, 0);
+
+ tree unaligned_type = build_variant_type_copy (TREE_TYPE (TREE_TYPE (ptr)));
+ SET_TYPE_ALIGN (unaligned_type, 1 * BITS_PER_UNIT);
+ TYPE_USER_ALIGN (unaligned_type) = 1;
+
+ tree load = indirect_ref (unaligned_type, ptr);
+ return convert (TREE_TYPE (callexp), load);
+}
+
+/* Expand a front-end instrinsic call to storeUnaligned(). This takes two
+ arguments, the signature to which is:
+
+ vector(T) storeUnaligned (vector(T)* ptr, vector(T) value)
+
+ This generates an assignment of a vector VALUE to an unaligned address PTR.
+ The original call expression is held in CALLEXP. */
+
+static tree
+expand_intrinsic_vec_store_unaligned (tree callexp)
+{
+ tree ptr = CALL_EXPR_ARG (callexp, 0);
+ tree vec = CALL_EXPR_ARG (callexp, 1);
+
+ tree unaligned_type = build_variant_type_copy (TREE_TYPE (TREE_TYPE (ptr)));
+ SET_TYPE_ALIGN (unaligned_type, 1 * BITS_PER_UNIT);
+ TYPE_USER_ALIGN (unaligned_type) = 1;
+
+ tree load = indirect_ref (unaligned_type, ptr);
+ return build_assign (MODIFY_EXPR, load, vec);
+}
+
/* If CALLEXP is for an intrinsic , expand and return inlined compiler
generated instructions. Most map directly to GCC builtins, others
require a little extra work around them. */
@@ -766,6 +1314,15 @@ maybe_expand_intrinsic (tree callexp)
if (DECL_BUILT_IN_CTFE (callee) && !doing_semantic_analysis_p)
return callexp;
+ /* Gate the expansion of the intrinsic with constraint checks, if any fail
+ then bail out without any lowering. */
+ if (maybe_warn_intrinsic_mismatch (callee, callexp))
+ {
+ /* Reset the built-in flag so that we don't trip fold_builtin. */
+ set_decl_built_in_function (callee, NOT_BUILT_IN, 0);
+ return callexp;
+ }
+
intrinsic_code intrinsic = DECL_INTRINSIC_CODE (callee);
built_in_function code;
@@ -913,6 +1470,36 @@ maybe_expand_intrinsic (tree callexp)
case INTRINSIC_VSTORE64:
return expand_volatile_store (callexp);
+ case INTRINSIC_LOADUNALIGNED:
+ return expand_intrinsic_vec_load_unaligned (callexp);
+
+ case INTRINSIC_STOREUNALIGNED:
+ return expand_intrinsic_vec_store_unaligned (callexp);
+
+ case INTRINSIC_SHUFFLE:
+ return expand_intrinsic_vec_shuffle (callexp);
+
+ case INTRINSIC_SHUFFLEVECTOR:
+ return expand_intrinsic_vec_shufflevector (callexp);
+
+ case INTRINSIC_CONVERTVECTOR:
+ return expand_intrinsic_vec_convert (callexp);
+
+ case INTRINSIC_BLENDVECTOR:
+ return expand_intrinsic_vec_blend (callexp);
+
+ case INTRINSIC_EQUALMASK:
+ return expand_intrinsic_vec_cond (EQ_EXPR, callexp);
+
+ case INTRINSIC_NOTEQUALMASK:
+ return expand_intrinsic_vec_cond (NE_EXPR, callexp);
+
+ case INTRINSIC_GREATERMASK:
+ return expand_intrinsic_vec_cond (GT_EXPR, callexp);
+
+ case INTRINSIC_GREATEREQUALMASK:
+ return expand_intrinsic_vec_cond (GE_EXPR, callexp);
+
default:
gcc_unreachable ();
}
diff --git a/gcc/d/intrinsics.def b/gcc/d/intrinsics.def
index 61c1737..b8d1ec5 100644
--- a/gcc/d/intrinsics.def
+++ b/gcc/d/intrinsics.def
@@ -252,5 +252,28 @@ DEF_D_BUILTIN (INTRINSIC_C_VA_ARG, BUILT_IN_NONE, "va_arg", "core.stdc.stdarg",
DEF_D_BUILTIN (INTRINSIC_VASTART, BUILT_IN_NONE, "va_start", "core.stdc.stdarg",
"FJ@7va_listK@1TZv")
+/* gcc.simd intrinsics. */
+
+DEF_D_BUILTIN (INTRINSIC_LOADUNALIGNED, BUILT_IN_NONE, "loadUnaligned",
+ "gcc.simd", "FP@1VZ@1V")
+DEF_D_BUILTIN (INTRINSIC_STOREUNALIGNED, BUILT_IN_NONE, "storeUnaligned",
+ "gcc.simd", "FP@1V@1VZ@1V")
+DEF_D_BUILTIN (INTRINSIC_SHUFFLE, BUILT_IN_NONE, "shuffle", "gcc.simd",
+ "F@2V0@2V1@1MZ@2V0")
+DEF_D_BUILTIN (INTRINSIC_SHUFFLEVECTOR, BUILT_IN_NONE, "shufflevector",
+ "gcc.simd", "F@2V1@2V2@1MZNhH@1M@")
+DEF_D_BUILTIN (INTRINSIC_CONVERTVECTOR, BUILT_IN_NONE, "convertvector",
+ "gcc.simd", "F@1TZ@1V")
+DEF_D_BUILTIN (INTRINSIC_BLENDVECTOR, BUILT_IN_NONE, "blendvector", "gcc.simd",
+ "F@2V0@2V1@1MZ@2V0")
+DEF_D_BUILTIN (INTRINSIC_EQUALMASK, BUILT_IN_NONE, "equalMask", "gcc.simd",
+ "F@1V@1VZ@1V")
+DEF_D_BUILTIN (INTRINSIC_NOTEQUALMASK, BUILT_IN_NONE, "notEqualMask",
+ "gcc.simd", "F@1V@1VZ@1V")
+DEF_D_BUILTIN (INTRINSIC_GREATERMASK, BUILT_IN_NONE, "greaterMask", "gcc.simd",
+ "F@1V@1VZ@1V")
+DEF_D_BUILTIN (INTRINSIC_GREATEREQUALMASK, BUILT_IN_NONE,
+ "greaterOrEqualMask", "gcc.simd", "F@1V@1VZ@1V")
+
#undef DEF_D_BUILTIN
#undef DEF_CTFE_BUILTIN
diff --git a/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch.d b/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch1.d
index 5340647..5340647 100644
--- a/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch.d
+++ b/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch1.d
diff --git a/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch2.d b/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch2.d
new file mode 100644
index 0000000..9e90c15
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch2.d
@@ -0,0 +1,250 @@
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-do compile { target { avx_runtime || vect_sizes_16B_8B } } }
+module gcc.simd;
+
+alias int4 = __vector(int[4]);
+alias short8 = __vector(short[8]);
+alias float4 = __vector(float[4]);
+alias byte16 = __vector(byte[16]);
+struct fake4 { int[4] v; }
+enum f = fake4();
+
+void test_load_store()
+{
+ loadUnaligned!int(null); // { dg-warning "mismatch in return type" }
+ loadUnaligned!double(null); // { dg-warning "mismatch in return type" }
+ loadUnaligned!int4(null);
+ loadUnaligned!short8(null);
+ loadUnaligned!float4(null);
+ loadUnaligned!byte16(null);
+ loadUnaligned!fake4(null); // { dg-warning "mismatch in return type" }
+
+ storeUnaligned!int(null, 1); // { dg-warning "mismatch in return type" }
+ storeUnaligned!double(null, 1); // { dg-warning "mismatch in return type" }
+ storeUnaligned!int4(null, 1);
+ storeUnaligned!short8(null, 1);
+ storeUnaligned!float4(null, 1);
+ storeUnaligned!byte16(null, 1);
+ storeUnaligned!fake4(null, f); // { dg-warning "mismatch in return type" }
+}
+
+void test_shuffle()
+{
+ shuffle!(int, int, int)(0, 0, 0); // { dg-warning "mismatch in return type" }
+ shuffle!(double, int, int)(0, 0, 0); // { dg-warning "mismatch in return type" }
+ shuffle!(fake4, int, int)(f, 0, 0); // { dg-warning "mismatch in return type" }
+
+ shuffle!(int4, int, int)(0, 0, 0); // { dg-warning "mismatch in argument 2" }
+ shuffle!(int4, double, int)(0, 0, 0); // { dg-warning "mismatch in argument 2" }
+ shuffle!(int4, fake4, int)(0, f, 0); // { dg-warning "mismatch in argument 2" }
+
+ shuffle!(int4, int4, int)(0, 0, 0); // { dg-warning "mismatch in argument 3" }
+ shuffle!(int4, int4, double)(0, 0, 0); // { dg-warning "mismatch in argument 3" }
+ shuffle!(int4, int4, fake4)(0, 0, f); // { dg-warning "mismatch in argument 3" }
+
+ shuffle!(int4, int4, int4)(0, 0, 0);
+ shuffle!(int4, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ shuffle!(int4, float4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ shuffle!(int4, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ shuffle!(int4, int4, short8)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+ shuffle!(int4, int4, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+ shuffle!(int4, int4, byte16)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+
+ shuffle!(float4, int4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ shuffle!(float4, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ shuffle!(float4, float4, int4)(0, 0, 0);
+ shuffle!(float4, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ shuffle!(float4, float4, short8)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+ shuffle!(float4, float4, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+ shuffle!(float4, float4, byte16)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+
+ shuffle!(short8, int4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ shuffle!(short8, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+ shuffle!(short8, float4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ shuffle!(short8, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ shuffle!(short8, short8, short8)(0, 0, 0);
+ shuffle!(short8, short8, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+ shuffle!(short8, short8, byte16)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+
+ shuffle!(byte16, int4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ shuffle!(byte16, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ shuffle!(byte16, float4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ shuffle!(byte16, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+ shuffle!(byte16, byte16, short8)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+ shuffle!(byte16, byte16, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+ shuffle!(byte16, byte16, byte16)(0, 0, 0);
+}
+
+void test_shufflevector()
+{
+ shufflevector!(int, int4, int)(0, 0, 0); // { dg-warning "mismatch in argument 1" }
+ shufflevector!(double, int4, int)(0, 0, 0); // { dg-warning "mismatch in argument 1" }
+ shufflevector!(fake4, int4, int)(f, 0, 0); // { dg-warning "mismatch in argument 1" }
+
+ shufflevector!(int4, int, int)(0, 0, 0); // { dg-warning "mismatch in argument 2" }
+ shufflevector!(int4, double, int)(0, 0, 0); // { dg-warning "mismatch in argument 2" }
+ shufflevector!(int4, int4, int)(0, 0, 0);
+ shufflevector!(int4, short8, int)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ shufflevector!(int4, float4, int)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ shufflevector!(int4, byte16, int)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ shufflevector!(int4, fake4, int)(0, f, 0); // { dg-warning "mismatch in argument 2" }
+
+ shufflevector!(int4, int4, double)(0, 0, 0); // { dg-warning "mismatch in argument 3" }
+ shufflevector!(int4, int4, int4)(0, 0, 0); // { dg-warning "mismatch in argument 3" }
+ shufflevector!(int4, int4, short8)(0, 0, 0); // { dg-warning "mismatch in argument 3" }
+ shufflevector!(int4, int4, float4)(0, 0, 0); // { dg-warning "mismatch in argument 3" }
+ shufflevector!(int4, int4, byte16)(0, 0, 0); // { dg-warning "mismatch in argument 3" }
+
+ shufflevector!(int4, int4, int, double)(0, 0, 0, 0); // { dg-warning "mismatch in argument 4" }
+ shufflevector!(int4, int4, int, int, double, int)(0, 0, 0, 0, 0, 0); // { dg-warning "mismatch in argument 5" }
+ shufflevector!(int4, int4, int, int, int, double)(0, 0, 0, 0, 0, 0); // { dg-warning "mismatch in argument 6" }
+
+ int i;
+ shufflevector!(int4, int4, int)(0, 0, i); // { dg-error "argument .i. cannot be read at compile time" }
+ shufflevector!(int4, int4, int)(0, 0, -1u); // { dg-error "element index .-1. is out of bounds" }
+ shufflevector!(int4, int4, int)(0, 0, 8); // { dg-error "element index .8. is out of bounds" }
+}
+
+void test_convertvector()
+{
+ convertvector!(int, int)(0); // { dg-warning "mismatch in return type" }
+ convertvector!(double, int)(0); // { dg-warning "mismatch in return type" }
+ convertvector!(fake4, int)(0); // { dg-warning "mismatch in return type" }
+
+ convertvector!(int4, int)(0); // { dg-warning "mismatch in argument 1" }
+ convertvector!(int4, double)(0); // { dg-warning "mismatch in argument 1" }
+ convertvector!(int4, int4)(0);
+ convertvector!(int4, short8)(0); // { dg-error "mismatch in argument 1" }
+ convertvector!(int4, float4)(0);
+ convertvector!(int4, byte16)(0); // { dg-error "mismatch in argument 1" }
+ convertvector!(int4, fake4)(f); // { dg-warning "mismatch in argument 1" }
+
+ convertvector!(short8, int)(0); // { dg-warning "mismatch in argument 1" }
+ convertvector!(short8, double)(0); // { dg-warning "mismatch in argument 1" }
+ convertvector!(short8, int4)(0); // { dg-error "mismatch in argument 1" }
+ convertvector!(short8, short8)(0);
+ convertvector!(short8, float4)(0); // { dg-error "mismatch in argument 1" }
+ convertvector!(short8, byte16)(0); // { dg-error "mismatch in argument 1" }
+ convertvector!(short8, fake4)(f); // { dg-warning "mismatch in argument 1" }
+
+ convertvector!(float4, int)(0); // { dg-warning "mismatch in argument 1" }
+ convertvector!(float4, double)(0); // { dg-warning "mismatch in argument 1" }
+ convertvector!(float4, int4)(0);
+ convertvector!(float4, short8)(0); // { dg-error "mismatch in argument 1" }
+ convertvector!(float4, float4)(0);
+ convertvector!(float4, byte16)(0); // { dg-error "mismatch in argument 1" }
+ convertvector!(float4, fake4)(f); // { dg-warning "mismatch in argument 1" }
+
+ convertvector!(byte16, int)(0); // { dg-warning "mismatch in argument 1" }
+ convertvector!(byte16, double)(0); // { dg-warning "mismatch in argument 1" }
+ convertvector!(byte16, int4)(0); // { dg-error "mismatch in argument 1" }
+ convertvector!(byte16, short8)(0); // { dg-error "mismatch in argument 1" }
+ convertvector!(byte16, float4)(0); // { dg-error "mismatch in argument 1" }
+ convertvector!(byte16, byte16)(0);
+ convertvector!(byte16, fake4)(f); // { dg-warning "mismatch in argument 1" }
+}
+
+void test_blendvector()
+{
+ blendvector!(int, int, int)(0, 0, 0); // { dg-warning "mismatch in return type" }
+ blendvector!(double, int, int)(0, 0, 0); // { dg-warning "mismatch in return type" }
+ blendvector!(fake4, int, int)(f, 0, 0); // { dg-warning "mismatch in return type" }
+
+ blendvector!(int4, int, int)(0, 0, 0); // { dg-warning "mismatch in argument 2" }
+ blendvector!(int4, double, int)(0, 0, 0); // { dg-warning "mismatch in argument 2" }
+ blendvector!(int4, fake4, int)(0, f, 0); // { dg-warning "mismatch in argument 2" }
+
+ blendvector!(int4, int4, int)(0, 0, 0); // { dg-warning "mismatch in argument 3" }
+ blendvector!(int4, int4, double)(0, 0, 0); // { dg-warning "mismatch in argument 3" }
+ blendvector!(int4, int4, fake4)(0, 0, f); // { dg-warning "mismatch in argument 3" }
+
+ blendvector!(int4, int4, int4)(0, 0, 0);
+ blendvector!(int4, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ blendvector!(int4, float4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ blendvector!(int4, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ blendvector!(int4, int4, short8)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+ blendvector!(int4, int4, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+ blendvector!(int4, int4, byte16)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+
+ blendvector!(float4, int4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ blendvector!(float4, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ blendvector!(float4, float4, int4)(0, 0, 0);
+ blendvector!(float4, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ blendvector!(float4, float4, short8)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+ blendvector!(float4, float4, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+ blendvector!(float4, float4, byte16)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+
+ blendvector!(short8, int4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ blendvector!(short8, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+ blendvector!(short8, float4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ blendvector!(short8, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ blendvector!(short8, short8, short8)(0, 0, 0);
+ blendvector!(short8, short8, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+ blendvector!(short8, short8, byte16)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+
+ blendvector!(byte16, int4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ blendvector!(byte16, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ blendvector!(byte16, float4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" }
+ blendvector!(byte16, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+ blendvector!(byte16, byte16, short8)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+ blendvector!(byte16, byte16, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" }
+ blendvector!(byte16, byte16, byte16)(0, 0, 0);
+}
+
+void test_comparison()
+{
+ equalMask!int(0, 0); // { dg-warning "mismatch in return type" }
+ equalMask!double(0, 0); // { dg-warning "mismatch in return type" }
+ equalMask!int4(0, 0);
+ equalMask!short8(0, 0);
+ equalMask!float4(0, 0);
+ equalMask!byte16(0, 0);
+ equalMask!fake4(f, f); // { dg-warning "mismatch in return type" }
+
+ notEqualMask!int(0, 0); // { dg-warning "mismatch in return type" }
+ notEqualMask!double(0, 0); // { dg-warning "mismatch in return type" }
+ notEqualMask!int4(0, 0);
+ notEqualMask!short8(0, 0);
+ notEqualMask!float4(0, 0);
+ notEqualMask!byte16(0, 0);
+ notEqualMask!fake4(f, f); // { dg-warning "mismatch in return type" }
+
+ greaterMask!int(0, 0); // { dg-warning "mismatch in return type" }
+ greaterMask!double(0, 0); // { dg-warning "mismatch in return type" }
+ greaterMask!int4(0, 0);
+ greaterMask!short8(0, 0);
+ greaterMask!float4(0, 0);
+ greaterMask!byte16(0, 0);
+ greaterMask!fake4(f, f); // { dg-warning "mismatch in return type" }
+
+ greaterOrEqualMask!int(0, 0); // { dg-warning "mismatch in return type" }
+ greaterOrEqualMask!double(0, 0); // { dg-warning "mismatch in return type" }
+ greaterOrEqualMask!int4(0, 0);
+ greaterOrEqualMask!short8(0, 0);
+ greaterOrEqualMask!float4(0, 0);
+ greaterOrEqualMask!byte16(0, 0);
+ greaterOrEqualMask!fake4(f, f); // { dg-warning "mismatch in return type" }
+}
+
+// The following declarations of the simd intrinsics are without any guards
+// to verify `d/intrinsics.cc` is doing checks to prevent invalid lowerings.
+V loadUnaligned(V)(const V*);
+V storeUnaligned(V)(V*, V);
+
+V0 shuffle(V0, V1, M)(V0, V1, M);
+
+// Use overloads to test different argument positions.
+template E(V) { alias typeof(V.array[0]) E; }
+enum isV(T) = is(T : __vector(V[N]), V, size_t N);
+
+__vector(E!V1[M.length]) shufflevector(V1, V2, M...)(V1, V2, M) if (isV!V1 && !isV!V2);
+__vector(E!V2[M.length]) shufflevector(V1, V2, M...)(V1, V2, M) if (isV!V2 && !isV!V1);
+__vector(E!V1[M.length]) shufflevector(V1, V2, M...)(V1, V2, M) if (isV!V1 && isV!V2);
+
+V convertvector(V, T)(T);
+V0 blendvector(V0, V1, M)(V0, V1, M);
+
+V equalMask(V)(V, V);
+V notEqualMask(V)(V, V);
+V greaterMask(V)(V, V);
+V greaterOrEqualMask(V)(V, V);
diff --git a/gcc/testsuite/gdc.dg/torture/simd_blendvector.d b/gcc/testsuite/gdc.dg/torture/simd_blendvector.d
new file mode 100644
index 0000000..42459bd
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/simd_blendvector.d
@@ -0,0 +1,345 @@
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
+import gcc.simd;
+
+void testblendvector(V, VI = V)()
+{
+ alias E = typeof(V.array[0]);
+ enum numElements = V.sizeof / E.sizeof;
+
+ static if (numElements == 16)
+ {
+ // Test fragment for vectors with 16 elements
+ immutable V[5] in1 =
+ [[ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ],
+ [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ],
+ [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ],
+ [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ],
+ [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ]];
+
+ immutable V in2 =
+ [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45 ];
+
+ immutable VI[5] mask1 =
+ [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ],
+ [ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ],
+ [ 7, 6, 5, 4, 16, 17, 18, 19, 31, 30, 29, 28, 3, 2, 1, 0 ],
+ [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 ]];
+
+ immutable V[5] out1 =
+ [[30, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25],
+ [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25],
+ [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 45],
+ [30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45],
+ [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]];
+ }
+ else static if (numElements == 8)
+ {
+ // Test fragment for vectors with 8 elements
+ static if (is(E == uint))
+ {
+ enum E A1 = 0x11121314;
+ enum E B1 = 0x21222324;
+ enum E C1 = 0x31323334;
+ enum E D1 = 0x41424344;
+ enum E E1 = 0x51525354;
+ enum E F1 = 0x61626364;
+ enum E G1 = 0x71727374;
+ enum E H1 = 0x81828384;
+
+ enum E A2 = 0x91929394;
+ enum E B2 = 0xa1a2a3a4;
+ enum E C2 = 0xb1b2b3b4;
+ enum E D2 = 0xc1c2c3c4;
+ enum E E2 = 0xd1d2d3d4;
+ enum E F2 = 0xe1e2e3e4;
+ enum E G2 = 0xf1f2f3f4;
+ enum E H2 = 0x01020304;
+ }
+ else static if (is(E == ushort))
+ {
+ enum E A1 = 0x1112;
+ enum E B1 = 0x2122;
+ enum E C1 = 0x3132;
+ enum E D1 = 0x4142;
+ enum E E1 = 0x5152;
+ enum E F1 = 0x6162;
+ enum E G1 = 0x7172;
+ enum E H1 = 0x8182;
+
+ enum E A2 = 0x9192;
+ enum E B2 = 0xa1a2;
+ enum E C2 = 0xb1b2;
+ enum E D2 = 0xc1c2;
+ enum E E2 = 0xd1d2;
+ enum E F2 = 0xe1e2;
+ enum E G2 = 0xf1f2;
+ enum E H2 = 0x0102;
+ }
+ else static if (is(E == ubyte))
+ {
+ enum E A1 = 0x11;
+ enum E B1 = 0x12;
+ enum E C1 = 0x13;
+ enum E D1 = 0x14;
+ enum E E1 = 0x15;
+ enum E F1 = 0x16;
+ enum E G1 = 0x17;
+ enum E H1 = 0x18;
+
+ enum E A2 = 0xf1;
+ enum E B2 = 0xf2;
+ enum E C2 = 0xf3;
+ enum E D2 = 0xf4;
+ enum E E2 = 0xf5;
+ enum E F2 = 0xf6;
+ enum E G2 = 0xf7;
+ enum E H2 = 0xf8;
+ }
+ else
+ enum unsupported = true;
+
+ static if (!__traits(compiles, unsupported))
+ {
+ immutable V[6] in1 =
+ [[ A1, B1, C1, D1, E1, F1, G1, H1 ],
+ [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+ [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+ [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+ [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+ [ A1, B1, C1, D1, E1, F1, G1, H1 ]];
+
+
+ immutable V in2 =
+ [ A2, B2, C2, D2, E2, F2, G2, H2 ];
+
+ immutable VI[6] mask1 =
+ [[ 0, 1, 2 , 3 , 4 , 5 , 6 , 0 ],
+ [ 8, 9, 0, 11, 12, 13, 0, 15 ],
+ [ 0, 8, 1, 0, 2, 0, 3, 11 ],
+ [ 0, 15, 4, 11, 0, 3, 7, 8 ],
+ [ 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 0x1e, 0x2e, 0x3e, 0x4e, 0x5e, 0x6e, 0x7e, 0x8e ]];
+
+ immutable V[6] out1 =
+ [[ A2, B1, C1, D1, E1, F1, G1, H2 ],
+ [ A1, B1, C2, D1, E1, F1, G2, H1 ],
+ [ A2, B1, C1, D2, E1, F2, G1, H1 ],
+ [ A2, B1, C1, D1, E2, F1, G1, H1 ],
+ [ A2, B2, C2, D2, E2, F2, G2, H2 ],
+ [ A1, B1, C1, D1, E1, F1, G1, H1 ]];
+ }
+ }
+ else static if (numElements == 4)
+ {
+ // Test fragment for vectors with 4 elements
+ static if (is(E == double))
+ {
+ enum E A = 0.69314718055994530942;
+ enum E B = 2.7182818284590452354;
+ enum E C = 2.30258509299404568402;
+ enum E D = 1.4426950408889634074;
+
+ enum E W = 0.31830988618379067154;
+ enum E X = 3.14159265358979323846;
+ enum E Y = 1.41421356237309504880;
+ enum E Z = 0.70710678118654752440;
+ }
+ else static if (is(E == float))
+ {
+ enum E A = 0.69314718055994530942f;
+ enum E B = 2.7182818284590452354f;
+ enum E C = 2.30258509299404568402f;
+ enum E D = 1.4426950408889634074f;
+
+ enum E W = 0.31830988618379067154f;
+ enum E X = 3.14159265358979323846f;
+ enum E Y = 1.41421356237309504880f;
+ enum E Z = 0.70710678118654752440f;
+ }
+ else static if (is(E == ulong))
+ {
+ enum E A = 0x1112131415161718;
+ enum E B = 0x2122232425262728;
+ enum E C = 0x3132333435363738;
+ enum E D = 0x4142434445464748;
+
+ enum E W = 0xc1c2c3c4c5c6c7c8;
+ enum E X = 0xd1d2d3d4d5d6d7d8;
+ enum E Y = 0xe1e2e3e4e5e6e7e8;
+ enum E Z = 0xf1f2f3f4f5f6f7f8;
+ }
+ else static if (is(E == uint))
+ {
+ enum E A = 0x11121314;
+ enum E B = 0x21222324;
+ enum E C = 0x31323334;
+ enum E D = 0x41424344;
+
+ enum E W = 0xc1c2c3c4;
+ enum E X = 0xd1d2d3d4;
+ enum E Y = 0xe1e2e3e4;
+ enum E Z = 0xf1f2f3f4;
+ }
+ else
+ enum unsupported = true;
+
+ static if (!__traits(compiles, unsupported))
+ {
+ immutable V[6] in1 =
+ [[ A, B, C, D ],
+ [ A, B, C, D ],
+ [ A, B, C, D ],
+ [ A, B, C, D ],
+ [ A, B, C, D ],
+ [ A, B, C, D ]];
+
+ immutable V in2 = [ W, X, Y, Z ];
+
+ immutable VI[6] mask1 =
+ [[ 0, 1, 2, 3 ],
+ [ 4, 0, 6, 7 ],
+ [ 0, 4, 0, 5 ],
+ [ 0, 7, 4, 0 ],
+ [ 0, 0, 0, 0 ],
+ [ 7, 7, 7, 7 ]];
+
+ immutable V[6] out1 =
+ [[ W, B, C, D ],
+ [ A, X, C, D ],
+ [ W, B, Y, D ],
+ [ W, B, C, Z ],
+ [ W, X, Y, Z ],
+ [ A, B, C, D ]];
+ }
+ }
+ else static if (numElements == 2)
+ {
+ // Test fragment for vectors with 2 elements
+ static if (is(E == double))
+ {
+ enum E A = 0.69314718055994530942;
+ enum E B = 2.7182818284590452354;
+
+ enum E X = 3.14159265358979323846;
+ enum E Y = 1.41421356237309504880;
+ }
+ else static if (is(E == float))
+ {
+ enum E A = 0.69314718055994530942f;
+ enum E B = 2.7182818284590452354f;
+
+ enum E X = 3.14159265358979323846f;
+ enum E Y = 1.41421356237309504880f;
+ }
+ else static if (is(E == ulong))
+ {
+ enum E A = 0x1112131415161718;
+ enum E B = 0x2122232425262728;
+
+ enum E X = 0xc1c2c3c4c5c6c7c8;
+ enum E Y = 0xd1d2d3d4d5d6d7d8;
+ }
+ else static if (is(E == uint))
+ {
+ enum E A = 0x11121314;
+ enum E B = 0x21222324;
+
+ enum E X = 0xd1d2d3d4;
+ enum E Y = 0xe1e2e3e4;
+ }
+ else
+ enum unsupported = true;
+
+ static if (!__traits(compiles, unsupported))
+ {
+ immutable V[7] in1 =
+ [[ A, B ],
+ [ A, B ],
+ [ A, B ],
+ [ A, B ],
+ [ A, B ],
+ [ A, B ],
+ [ A, B ]];
+
+ immutable V in2 = [ X, Y ];
+
+ immutable VI[7] mask1 =
+ [[ 0, 1 ],
+ [ 2, 3 ],
+ [ 0, 2 ],
+ [ 2, 1 ],
+ [ 3, 0 ],
+ [ 0, 0 ],
+ [ 3, 3 ]];
+
+ immutable V[7] out1 =
+ [[ X, B ],
+ [ A, B ],
+ [ X, B ],
+ [ A, B ],
+ [ A, Y ],
+ [ X, Y ],
+ [ A, B ]];
+ }
+ }
+ else
+ enum unsupported = true;
+
+ static if (!__traits(compiles, unsupported))
+ {
+ static foreach (i; 0 .. in1.length)
+ assert(blendvector(in1[i], in2, mask1[i]).array == out1[i].array);
+ }
+}
+
+void main()
+{
+ static if (__traits(compiles, __vector(ubyte[16])))
+ testblendvector!(__vector(ubyte[16]))();
+
+ static if (__traits(compiles, __vector(ushort[16])))
+ testblendvector!(__vector(ushort[16]))();
+
+ static if (__traits(compiles, __vector(ubyte[8])))
+ testblendvector!(__vector(ubyte[8]))();
+
+ static if (__traits(compiles, __vector(ushort[8])))
+ testblendvector!(__vector(ushort[8]))();
+
+ static if (__traits(compiles, __vector(uint[8])))
+ testblendvector!(__vector(uint[8]))();
+
+ static if (__traits(compiles, __vector(ulong[4])))
+ {
+ testblendvector!(__vector(ulong[4]));
+
+ static if (__traits(compiles, __vector(double[4])))
+ testblendvector!(__vector(double[4]), __vector(ulong[4]));
+ }
+
+ static if (__traits(compiles, __vector(uint[4])))
+ {
+ testblendvector!(__vector(uint[4]));
+
+ static if (__traits(compiles, __vector(float[4])))
+ testblendvector!(__vector(float[4]), __vector(uint[4]));
+ }
+
+ static if (__traits(compiles, __vector(ulong[2])))
+ {
+ testblendvector!(__vector(ulong[2]));
+
+ static if (__traits(compiles, __vector(double[2])))
+ testblendvector!(__vector(double[2]), __vector(ulong[2]));
+ }
+
+ static if (__traits(compiles, __vector(uint[2])))
+ {
+ testblendvector!(__vector(uint[2]));
+
+ static if (__traits(compiles, __vector(float[2])))
+ testblendvector!(__vector(float[2]), __vector(uint[2]));
+ }
+}
diff --git a/gcc/testsuite/gdc.dg/torture/simd_cond.d b/gcc/testsuite/gdc.dg/torture/simd_cond.d
new file mode 100644
index 0000000..1548956
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/simd_cond.d
@@ -0,0 +1,17 @@
+// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
+
+import gcc.simd;
+
+void main()
+{
+ static if (__traits(compiles, __vector(int[4])))
+ {
+ __gshared __vector(int[4]) a = [1,3,5,7];
+ __gshared __vector(int[4]) b = [2,3,4,5];
+
+ assert(equalMask(a, b).array == [0,-1,0,0]);
+ assert(notEqualMask(a, b).array == [-1,0,-1,-1]);
+ assert(greaterMask(a, b).array == [0,0,-1,-1]);
+ assert(greaterOrEqualMask(a, b).array == [0,-1,-1,-1]);
+ }
+}
diff --git a/gcc/testsuite/gdc.dg/torture/simd_convertvector.d b/gcc/testsuite/gdc.dg/torture/simd_convertvector.d
new file mode 100644
index 0000000..0d6b18e
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/simd_convertvector.d
@@ -0,0 +1,122 @@
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
+import gcc.simd;
+
+void main ()
+{
+ static if (__traits(compiles, __vector(int[4])))
+ alias int4 = __vector(int[4]);
+ static if (__traits(compiles, __vector(uint[4])))
+ alias uint4 = __vector(uint[4]);
+ static if (__traits(compiles, __vector(float[4])))
+ alias float4 = __vector(float[4]);
+ static if (__traits(compiles, __vector(double[4])))
+ alias double4 = __vector(double[4]);
+
+ static if (__traits(compiles, int4))
+ {
+ union U1 { int4 v; int[4] a; }
+ U1 u1;
+ }
+ static if (__traits(compiles, uint4))
+ {
+ union U2 { uint4 v; uint[4] a; }
+ U2 u2;
+ }
+ static if (__traits(compiles, float4))
+ {
+ union U3 { float4 v; float[4] a; }
+ U3 u3;
+ }
+ static if (__traits(compiles, double4))
+ {
+ union U4 { double4 v; double[4] a; }
+ U4 u4;
+ }
+
+ static if (__traits(compiles, u1) && __traits(compiles, u2))
+ {
+ static void f1(ref uint4 x, out int4 y)
+ {
+ y = convertvector!int4(x);
+ }
+ static foreach (i; 0 .. 4)
+ u2.a[i] = i * 2;
+ f1(u2.v, u1.v);
+ static foreach (i; 0 .. 4)
+ assert(u1.a[i] == i * 2);
+ }
+
+ static if (__traits(compiles, u1) && __traits(compiles, u3))
+ {
+ static void f2(ref float4 x, out int4 y)
+ {
+ y = convertvector!int4(x);
+ }
+
+ static void f3(ref int4 x, out float4 y)
+ {
+ y = convertvector!float4(x);
+ }
+
+ static foreach (i; 0 .. 4)
+ u3.a[i] = i - 2.25f;
+ f2(u3.v, u1.v);
+ static foreach (i; 0 .. 4)
+ assert(u1.a[i] == (i == 3 ? 0 : i - 2));
+
+ static foreach (i; 0 .. 4)
+ u3.a[i] = i + 0.75f;
+ f2(u3.v, u1.v);
+ static foreach (i; 0 .. 4)
+ assert(u1.a[i] == i);
+
+ static foreach (i; 0 .. 4)
+ u1.a[i] = 7 * i - 5;
+ f3(u1.v, u3.v);
+ static foreach (i; 0 .. 4)
+ assert(u3.a[i] == 7 * i - 5);
+ }
+ static if (__traits(compiles, u1) && __traits(compiles, u4))
+ {
+ static void f4(ref double4 x, out int4 y)
+ {
+ y = convertvector!int4(x);
+ }
+
+ static void f5(ref int4 x, out double4 y)
+ {
+ y = convertvector!double4(x);
+ }
+
+ static foreach (i; 0 .. 4)
+ u4.a[i] = i - 2.25;
+ f4(u4.v, u1.v);
+ static foreach (i; 0 .. 4)
+ assert(u1.a[i] == (i == 3 ? 0 : i - 2));
+
+ static foreach (i; 0 .. 4)
+ u4.a[i] = i + 0.75;
+ f4(u4.v, u1.v);
+ static foreach (i; 0 .. 4)
+ assert(u1.a[i] == i);
+
+ static foreach (i; 0 .. 4)
+ u1.a[i] = 7 * i - 5;
+ f5(u1.v, u4.v);
+ static foreach (i; 0 .. 4)
+ assert(u4.a[i] == 7 * i - 5);
+ }
+ static if (__traits(compiles, u4))
+ {
+ static void f6(out double4 x)
+ {
+ int4 a = [1, 2, -3, -4];
+ x = convertvector!double4(a);
+ }
+
+ f6(u4.v);
+ static foreach (i; 0 .. 4)
+ assert(u4.a[i] == (i >= 2 ? -1 - i : i + 1));
+ }
+}
diff --git a/gcc/testsuite/gdc.dg/torture/simd_load.d b/gcc/testsuite/gdc.dg/torture/simd_load.d
new file mode 100644
index 0000000..188ffda
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/simd_load.d
@@ -0,0 +1,52 @@
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
+import gcc.simd;
+
+void main()
+{
+ ubyte[32] data;
+ foreach (i; 0..data.length)
+ {
+ data[i] = cast(ubyte)i;
+ }
+
+ // to test all alignments from 1 ~ 16
+ foreach (i; 0..16)
+ {
+ ubyte* d = &data[i];
+
+ void test(T)()
+ {
+ // load the data
+ T v = loadUnaligned(cast(T*)d);
+
+ // check that the data was loaded correctly
+ ubyte* ptrToV = cast(ubyte*)&v;
+ foreach (j; 0..T.sizeof)
+ assert(ptrToV[j] == d[j]);
+ }
+
+ static if (__traits(compiles, __vector(void[16])))
+ test!(__vector(void[16]))();
+ static if (__traits(compiles, __vector(byte[16])))
+ test!(__vector(byte[16]))();
+ static if (__traits(compiles, __vector(ubyte[16])))
+ test!(__vector(ubyte[16]))();
+ static if (__traits(compiles, __vector(short[8])))
+ test!(__vector(short[8]))();
+ static if (__traits(compiles, __vector(ushort[8])))
+ test!(__vector(ushort[8]))();
+ static if (__traits(compiles, __vector(int[4])))
+ test!(__vector(int[4]))();
+ static if (__traits(compiles, __vector(uint[4])))
+ test!(__vector(uint[4]))();
+ static if (__traits(compiles, __vector(long[2])))
+ test!(__vector(long[2]))();
+ static if (__traits(compiles, __vector(ulong[2])))
+ test!(__vector(ulong[2]))();
+ static if (__traits(compiles, __vector(double[2])))
+ test!(__vector(double[2]))();
+ static if (__traits(compiles, __vector(float[4])))
+ test!(__vector(float[4]))();
+ }
+}
diff --git a/gcc/testsuite/gdc.dg/torture/simd_logical.d b/gcc/testsuite/gdc.dg/torture/simd_logical.d
new file mode 100644
index 0000000..e9c23f5
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/simd_logical.d
@@ -0,0 +1,19 @@
+// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
+
+import gcc.simd;
+
+void main()
+{
+ static if (__traits(compiles, __vector(int[4])))
+ {
+ __gshared __vector(int[4]) a = [1,0,-1,2];
+
+ assert(notMask(a).array == [0,-1,0,0]);
+
+ assert(andAndMask(a, 1).array == [-1,0,-1,-1]);
+ assert(andAndMask(a, 0).array == [0,0,0,0]);
+
+ assert(orOrMask(a, 1).array == [-1,-1,-1,-1]);
+ assert(orOrMask(a, 0).array == [-1,0,-1,-1]);
+ }
+}
diff --git a/gcc/testsuite/gdc.dg/torture/simd_shuffle.d b/gcc/testsuite/gdc.dg/torture/simd_shuffle.d
new file mode 100644
index 0000000..3629cee
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/simd_shuffle.d
@@ -0,0 +1,454 @@
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
+import gcc.simd;
+
+void testshuffle(V, VI = V)()
+{
+ alias E = typeof(V.array[0]);
+ enum numElements = V.sizeof / E.sizeof;
+
+ static if (numElements == 16)
+ {
+ // Test fragment for vectors with 16 elements
+ immutable V[5] in1 =
+ [[ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ],
+ [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ],
+ [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ],
+ [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ],
+ [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ]];
+
+ immutable VI[5] mask1 =
+ [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ],
+ [ 0x10, 0x21, 0x32, 0x43, 0x54, 0x65, 0x76, 0x87,
+ 0x98, 0xa9, 0xba, 0xcb, 0xdc, 0xed, 0xfe, 0xff ] ,
+ [ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ],
+ [ 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 ],
+ [ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 ]];
+
+ immutable V[5] out1 =
+ [[ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ],
+ [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ],
+ [ 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 ],
+ [ 1, 3, 5, 7, 9, 11, 13, 15, 2, 4, 6, 8, 10, 12, 14, 16 ],
+ [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ]];
+
+ immutable V[5] in2 =
+ [[ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ],
+ [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ],
+ [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ],
+ [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ],
+ [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ]];
+
+ immutable V in3 =
+ [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45 ];
+
+ immutable VI[5] mask2 =
+ [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ],
+ [ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ],
+ [ 7, 6, 5, 4, 16, 17, 18, 19, 31, 30, 29, 28, 3, 2, 1, 0 ],
+ [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 ]];
+
+ immutable V[5] out2 =
+ [[ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ],
+ [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45 ],
+ [ 17, 16, 15, 14, 30, 31, 32, 33, 45, 44, 43, 42, 13, 12, 11, 10 ],
+ [ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 ],
+ [ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45 ]];
+ }
+ else static if (numElements == 8)
+ {
+ // Test fragment for vectors with 8 elements
+ static if (is(E == uint))
+ {
+ enum E A1 = 0x11121314;
+ enum E B1 = 0x21222324;
+ enum E C1 = 0x31323334;
+ enum E D1 = 0x41424344;
+ enum E E1 = 0x51525354;
+ enum E F1 = 0x61626364;
+ enum E G1 = 0x71727374;
+ enum E H1 = 0x81828384;
+
+ enum E A2 = 0x91929394;
+ enum E B2 = 0xa1a2a3a4;
+ enum E C2 = 0xb1b2b3b4;
+ enum E D2 = 0xc1c2c3c4;
+ enum E E2 = 0xd1d2d3d4;
+ enum E F2 = 0xe1e2e3e4;
+ enum E G2 = 0xf1f2f3f4;
+ enum E H2 = 0x01020304;
+ }
+ else static if (is(E == ushort))
+ {
+ enum E A1 = 0x1112;
+ enum E B1 = 0x2122;
+ enum E C1 = 0x3132;
+ enum E D1 = 0x4142;
+ enum E E1 = 0x5152;
+ enum E F1 = 0x6162;
+ enum E G1 = 0x7172;
+ enum E H1 = 0x8182;
+
+ enum E A2 = 0x9192;
+ enum E B2 = 0xa1a2;
+ enum E C2 = 0xb1b2;
+ enum E D2 = 0xc1c2;
+ enum E E2 = 0xd1d2;
+ enum E F2 = 0xe1e2;
+ enum E G2 = 0xf1f2;
+ enum E H2 = 0x0102;
+ }
+ else static if (is(E == ubyte))
+ {
+ enum E A1 = 0x11;
+ enum E B1 = 0x12;
+ enum E C1 = 0x13;
+ enum E D1 = 0x14;
+ enum E E1 = 0x15;
+ enum E F1 = 0x16;
+ enum E G1 = 0x17;
+ enum E H1 = 0x18;
+
+ enum E A2 = 0xf1;
+ enum E B2 = 0xf2;
+ enum E C2 = 0xf3;
+ enum E D2 = 0xf4;
+ enum E E2 = 0xf5;
+ enum E F2 = 0xf6;
+ enum E G2 = 0xf7;
+ enum E H2 = 0xf8;
+ }
+ else
+ enum unsupported = true;
+
+ static if (!__traits(compiles, unsupported))
+ {
+ immutable V[8] in1 =
+ [[ A1, B1, C1, D1, E1, F1, G1, H1 ],
+ [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+ [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+ [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+ [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+ [ A2, B2, C2, D2, E2, F2, G2, H2 ],
+ [ A2, B2, C2, D2, E2, F2, G2, H2 ],
+ [ A2, B2, C2, D2, E2, F2, G2, H2 ]];
+
+ immutable VI[8] mask1 =
+ [[ 0, 1, 2, 3, 4, 5, 6, 7 ],
+ [ 0x10, 0x21, 0x32, 0x43, 0x54, 0x65, 0x76, 0x87 ],
+ [ 7, 6, 5, 4, 3, 2, 1, 0 ],
+ [ 7, 0, 5, 3, 2, 4, 1, 6 ],
+ [ 0, 2, 1, 3, 4, 6, 5, 7 ],
+ [ 3, 1, 2, 0, 7, 5, 6, 4 ],
+ [ 0, 0, 0, 0 ],
+ [ 1, 6, 1, 6, 1, 6, 1, 6 ]];
+
+ immutable V[8] out1 =
+ [[ A1, B1, C1, D1, E1, F1, G1, H1 ],
+ [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+ [ H1, G1, F1, E1, D1, C1, B1, A1 ],
+ [ H1, A1, F1, D1, C1, E1, B1, G1 ],
+ [ A1, C1, B1, D1, E1, G1, F1, H1 ],
+ [ D2, B2, C2, A2, H2, F2, G2, E2 ],
+ [ A2, A2, A2, A2, A2, A2, A2, A2 ],
+ [ B2, G2, B2, G2, B2, G2, B2, G2 ]];
+
+ immutable V[6] in2 =
+ [[ A1, B1, C1, D1, E1, F1, G1, H1 ],
+ [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+ [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+ [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+ [ A1, B1, C1, D1, E1, F1, G1, H1 ],
+ [ A1, B1, C1, D1, E1, F1, G1, H1 ]];
+
+
+ immutable V in3 =
+ [ A2, B2, C2, D2, E2, F2, G2, H2 ];
+
+ immutable VI[6] mask2 =
+ [[ 0, 1, 2, 3, 4, 5, 6, 7 ],
+ [ 8, 9, 10, 11, 12, 13, 14, 15 ],
+ [ 0, 8, 1, 9, 2, 10, 3, 11 ],
+ [ 0, 15, 4, 11, 12, 3, 7, 8 ],
+ [ 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 0x1e, 0x2e, 0x3e, 0x4e, 0x5e, 0x6e, 0x7e, 0x8e ]];
+
+ immutable V[6] out2 =
+ [[ A1, B1, C1, D1, E1, F1, G1, H1 ],
+ [ A2, B2, C2, D2, E2, F2, G2, H2 ],
+ [ A1, A2, B1, B2, C1, C2, D1, D2 ],
+ [ A1, H2, E1, D2, E2, D1, H1, A2 ],
+ [ A1, A1, A1, A1, A1, A1, A1, A1 ],
+ [ G2, G2, G2, G2, G2, G2, G2, G2 ]];
+ }
+ }
+ else static if (numElements == 4)
+ {
+ // Test fragment for vectors with 4 elements
+ static if (is(E == double))
+ {
+ enum E A = 0.69314718055994530942;
+ enum E B = 2.7182818284590452354;
+ enum E C = 2.30258509299404568402;
+ enum E D = 1.4426950408889634074;
+
+ enum E W = 0.31830988618379067154;
+ enum E X = 3.14159265358979323846;
+ enum E Y = 1.41421356237309504880;
+ enum E Z = 0.70710678118654752440;
+ }
+ else static if (is(E == float))
+ {
+ enum E A = 0.69314718055994530942f;
+ enum E B = 2.7182818284590452354f;
+ enum E C = 2.30258509299404568402f;
+ enum E D = 1.4426950408889634074f;
+
+ enum E W = 0.31830988618379067154f;
+ enum E X = 3.14159265358979323846f;
+ enum E Y = 1.41421356237309504880f;
+ enum E Z = 0.70710678118654752440f;
+ }
+ else static if (is(E == ulong))
+ {
+ enum E A = 0x1112131415161718;
+ enum E B = 0x2122232425262728;
+ enum E C = 0x3132333435363738;
+ enum E D = 0x4142434445464748;
+
+ enum E W = 0xc1c2c3c4c5c6c7c8;
+ enum E X = 0xd1d2d3d4d5d6d7d8;
+ enum E Y = 0xe1e2e3e4e5e6e7e8;
+ enum E Z = 0xf1f2f3f4f5f6f7f8;
+ }
+ else static if (is(E == uint))
+ {
+ enum E A = 0x11121314;
+ enum E B = 0x21222324;
+ enum E C = 0x31323334;
+ enum E D = 0x41424344;
+
+ enum E W = 0xc1c2c3c4;
+ enum E X = 0xd1d2d3d4;
+ enum E Y = 0xe1e2e3e4;
+ enum E Z = 0xf1f2f3f4;
+ }
+ else
+ enum unsupported = true;
+
+ static if (!__traits(compiles, unsupported))
+ {
+ immutable V[8] in1 =
+ [[ A, B, C, D ],
+ [ A, B, C, D ],
+ [ A, B, C, D ],
+ [ A, B, C, D ],
+ [ A, B, C, D ],
+ [ W, X, Y, Z ],
+ [ W, X, Y, Z ],
+ [ W, X, Y, Z ]];
+
+ immutable VI[8] mask1 =
+ [[ 0, 1, 2, 3 ],
+ [ 0+1*4, 1+2*4, 2+3*4, 3+4*4 ],
+ [ 3, 2, 1, 0 ],
+ [ 0, 3, 2, 1 ],
+ [ 0, 2, 1, 3 ],
+ [ 3, 1, 2, 0 ],
+ [ 0, 0, 0, 0 ],
+ [ 1, 2, 1, 2 ]];
+
+ immutable V[8] out1 =
+ [[ A, B, C, D ],
+ [ A, B, C, D ],
+ [ D, C, B, A ],
+ [ A, D, C, B ],
+ [ A, C, B, D ],
+ [ Z, X, Y, W ],
+ [ W, W, W, W ],
+ [ X, Y, X, Y ]];
+
+
+ immutable V[6] in2 =
+ [[ A, B, C, D ],
+ [ A, B, C, D ],
+ [ A, B, C, D ],
+ [ A, B, C, D ],
+ [ A, B, C, D ],
+ [ A, B, C, D ]];
+
+ immutable V in3 = [ W, X, Y, Z ];
+
+ immutable VI[6] mask2 =
+ [[ 0, 1, 2, 3 ],
+ [ 4, 5, 6, 7 ],
+ [ 0, 4, 1, 5 ],
+ [ 0, 7, 4, 3 ],
+ [ 0, 0, 0, 0 ],
+ [ 7, 7, 7, 7 ]];
+
+ immutable V[6] out2 =
+ [[ A, B, C, D ],
+ [ W, X, Y, Z ],
+ [ A, W, B, X ],
+ [ A, Z, W, D ],
+ [ A, A, A, A ],
+ [ Z, Z, Z, Z ]];
+ }
+ }
+ else static if (numElements == 2)
+ {
+ // Test fragment for vectors with 2 elements
+ static if (is(E == double))
+ {
+ enum E A = 0.69314718055994530942;
+ enum E B = 2.7182818284590452354;
+
+ enum E X = 3.14159265358979323846;
+ enum E Y = 1.41421356237309504880;
+ }
+ else static if (is(E == float))
+ {
+ enum E A = 0.69314718055994530942f;
+ enum E B = 2.7182818284590452354f;
+
+ enum E X = 3.14159265358979323846f;
+ enum E Y = 1.41421356237309504880f;
+ }
+ else static if (is(E == ulong))
+ {
+ enum E A = 0x1112131415161718;
+ enum E B = 0x2122232425262728;
+
+ enum E X = 0xc1c2c3c4c5c6c7c8;
+ enum E Y = 0xd1d2d3d4d5d6d7d8;
+ }
+ else static if (is(E == uint))
+ {
+ enum E A = 0x11121314;
+ enum E B = 0x21222324;
+
+ enum E X = 0xd1d2d3d4;
+ enum E Y = 0xe1e2e3e4;
+ }
+ else
+ enum unsupported = true;
+
+ static if (!__traits(compiles, unsupported))
+ {
+ immutable V[6] in1 =
+ [[ A, B ],
+ [ A, B ],
+ [ A, B ],
+ [ A, B ],
+ [ X, Y ],
+ [ X, Y ]];
+
+ immutable VI[6] mask1 =
+ [[ 0, 1 ],
+ [ -16, 1 ],
+ [ 1, 0 ],
+ [ 0, 0 ],
+ [ 1, 1 ],
+ [ 1, 0 ]];
+
+ immutable V[6] out1 =
+ [[ A, B ],
+ [ A, B ],
+ [ B, A ],
+ [ A, A ],
+ [ Y, Y ],
+ [ Y, X ]];
+
+ immutable V[7] in2 =
+ [[ A, B ],
+ [ A, B ],
+ [ A, B ],
+ [ A, B ],
+ [ A, B ],
+ [ A, B ],
+ [ A, B ]];
+
+ immutable V in3 = [ X, Y ];
+
+ immutable VI[7] mask2 =
+ [[ 0, 1 ],
+ [ 2, 3 ],
+ [ 0, 2 ],
+ [ 2, 1 ],
+ [ 3, 0 ],
+ [ 0, 0 ],
+ [ 3, 3 ]];
+
+ immutable V[7] out2 =
+ [[ A, B ],
+ [ X, Y ],
+ [ A, X ],
+ [ X, B ],
+ [ Y, A ],
+ [ A, A ],
+ [ Y, Y ]];
+ }
+ }
+ else
+ enum unsupported = true;
+
+ static if (!__traits(compiles, unsupported))
+ {
+ static foreach (i; 0 .. in1.length)
+ assert(shuffle(in1[i], mask1[i]).array == out1[i].array);
+ static foreach (i; 0 .. in2.length)
+ assert(shuffle(in2[i], in3, mask2[i]).array == out2[i].array);
+ }
+}
+
+void main()
+{
+ static if (__traits(compiles, __vector(ubyte[16])))
+ testshuffle!(__vector(ubyte[16]))();
+
+ static if (__traits(compiles, __vector(ushort[16])))
+ testshuffle!(__vector(ushort[16]))();
+
+ static if (__traits(compiles, __vector(ubyte[8])))
+ testshuffle!(__vector(ubyte[8]))();
+
+ static if (__traits(compiles, __vector(ushort[8])))
+ testshuffle!(__vector(ushort[8]))();
+
+ static if (__traits(compiles, __vector(uint[8])))
+ testshuffle!(__vector(uint[8]))();
+
+ static if (__traits(compiles, __vector(ulong[4])))
+ {
+ testshuffle!(__vector(ulong[4]));
+
+ static if (__traits(compiles, __vector(double[4])))
+ testshuffle!(__vector(double[4]), __vector(ulong[4]));
+ }
+
+ static if (__traits(compiles, __vector(uint[4])))
+ {
+ testshuffle!(__vector(uint[4]));
+
+ static if (__traits(compiles, __vector(float[4])))
+ testshuffle!(__vector(float[4]), __vector(uint[4]));
+ }
+
+ static if (__traits(compiles, __vector(ulong[2])))
+ {
+ testshuffle!(__vector(ulong[2]));
+
+ static if (__traits(compiles, __vector(double[2])))
+ testshuffle!(__vector(double[2]), __vector(ulong[2]));
+ }
+
+ static if (__traits(compiles, __vector(uint[2])))
+ {
+ testshuffle!(__vector(uint[2]));
+
+ static if (__traits(compiles, __vector(float[2])))
+ testshuffle!(__vector(float[2]), __vector(uint[2]));
+ }
+}
diff --git a/gcc/testsuite/gdc.dg/torture/simd_shufflevector.d b/gcc/testsuite/gdc.dg/torture/simd_shufflevector.d
new file mode 100644
index 0000000..cc55999
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/simd_shufflevector.d
@@ -0,0 +1,55 @@
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
+import gcc.simd;
+import gcc.attributes;
+
+void main()
+{
+ static if (__traits(compiles, __vector(int[4])))
+ alias int4 = __vector(int[4]);
+ static if (__traits(compiles, __vector(int[8])))
+ alias int8 = __vector(int[8]);
+
+ static if (__traits(compiles, int4) && __traits(compiles, int8))
+ {
+ __gshared int4[5] res;
+ __gshared int4 a;
+ __gshared int4 b;
+ __gshared int8[3] res8;
+ __gshared int8 a8;
+ __gshared int8 b8;
+
+ @noipa static void foo()
+ {
+ res[0] = shufflevector(a, b, 0, 1, 4, 5);
+ res[1] = shufflevector(a, b, 0, 1, 2, 5);
+ res8[0] = shufflevector(a, b, 0, 1, 2, 2 + 1, 4, 5, 6, 7);
+ res[2] = shufflevector(a8, b8, 0, 8, 1, 9);
+ res[3] = shufflevector(a8, b, 0, 8, 1, 9);
+ res[4] = shufflevector(a, b8, 0, 4, 1, 5);
+ res8[1] = shufflevector(a8, b, 0, 8, 1, 9, 10, 11, 2, 3);
+ res8[2] = shufflevector(a, b8, 0, 4, 1, 5, 4, 5, 6, 7);
+ }
+
+ a = [0, 1, 2, 3];
+ b = [4, 5, 6, 7];
+ a8 = [0, 1, 2, 3, 4, 5, 6, 7];
+ b8 = [8, 9, 10, 11, 12, 13, 14, 15];
+ foo();
+ assert(res[0].array == [0, 1, 4, 5]);
+
+ res[1][2] = 9;
+ assert(res[1].array == [0, 1, 9, 5]);
+ assert(res8[0].array == [0, 1, 2, 3, 4, 5, 6, 7]);
+ assert(res[2].array == [0, 8, 1, 9]);
+ assert(res[3].array == [0, 4, 1, 5]);
+ assert(res[4].array == [0, 8, 1, 9]);
+ assert(res8[1].array == [0, 4, 1, 5, 6, 7, 2, 3]);
+
+ res8[2][4] = 42;
+ res8[2][5] = 42;
+ res8[2][6] = 42;
+ res8[2][7] = 42;
+ assert(res8[2].array == [0, 8, 1, 9, 42, 42, 42, 42]);
+ }
+}
diff --git a/gcc/testsuite/gdc.dg/torture/simd_store.d b/gcc/testsuite/gdc.dg/torture/simd_store.d
new file mode 100644
index 0000000..b96ed42
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/simd_store.d
@@ -0,0 +1,54 @@
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
+import gcc.simd;
+
+void main()
+{
+ ubyte[32] data;
+
+ // to test all alignments from 1 ~ 16
+ foreach (i; 0..16)
+ {
+ ubyte* d = &data[i];
+
+ void test(T)()
+ {
+ T v;
+
+ // populate v` with data
+ ubyte* ptrToV = cast(ubyte*)&v;
+ foreach (j; 0..T.sizeof)
+ ptrToV[j] = cast(ubyte)j;
+
+ // store `v` to location pointed to by `d`
+ storeUnaligned(cast(T*)d, v);
+
+ // check that the the data was stored correctly
+ foreach (j; 0..T.sizeof)
+ assert(ptrToV[j] == d[j]);
+ }
+
+ static if (__traits(compiles, __vector(void[16])))
+ test!(__vector(void[16]))();
+ static if (__traits(compiles, __vector(byte[16])))
+ test!(__vector(byte[16]))();
+ static if (__traits(compiles, __vector(ubyte[16])))
+ test!(__vector(ubyte[16]))();
+ static if (__traits(compiles, __vector(short[8])))
+ test!(__vector(short[8]))();
+ static if (__traits(compiles, __vector(ushort[8])))
+ test!(__vector(ushort[8]))();
+ static if (__traits(compiles, __vector(int[4])))
+ test!(__vector(int[4]))();
+ static if (__traits(compiles, __vector(uint[4])))
+ test!(__vector(uint[4]))();
+ static if (__traits(compiles, __vector(long[2])))
+ test!(__vector(long[2]))();
+ static if (__traits(compiles, __vector(ulong[2])))
+ test!(__vector(ulong[2]))();
+ static if (__traits(compiles, __vector(double[2])))
+ test!(__vector(double[2]))();
+ static if (__traits(compiles, __vector(float[4])))
+ test!(__vector(float[4]))();
+ }
+}
diff --git a/libphobos/libdruntime/Makefile.am b/libphobos/libdruntime/Makefile.am
index d963aa9..56b332d 100644
--- a/libphobos/libdruntime/Makefile.am
+++ b/libphobos/libdruntime/Makefile.am
@@ -207,14 +207,14 @@ DRUNTIME_DSOURCES = core/atomic.d core/attribute.d core/bitop.d \
core/vararg.d core/volatile.d gcc/attribute.d gcc/attributes.d \
gcc/backtrace.d gcc/builtins.d gcc/deh.d gcc/emutls.d gcc/gthread.d \
gcc/sections/common.d gcc/sections/elf.d gcc/sections/macho.d \
- gcc/sections/package.d gcc/sections/pecoff.d gcc/unwind/arm.d \
- gcc/unwind/arm_common.d gcc/unwind/c6x.d gcc/unwind/generic.d \
- gcc/unwind/package.d gcc/unwind/pe.d object.d rt/aApply.d rt/aApplyR.d \
- rt/aaA.d rt/adi.d rt/arrayassign.d rt/arraycat.d rt/cast_.d \
- rt/config.d rt/critical_.d rt/deh.d rt/dmain2.d rt/ehalloc.d \
- rt/invariant.d rt/lifetime.d rt/memory.d rt/minfo.d rt/monitor_.d \
- rt/profilegc.d rt/sections.d rt/tlsgc.d rt/util/typeinfo.d \
- rt/util/utility.d
+ gcc/sections/package.d gcc/sections/pecoff.d gcc/simd.d \
+ gcc/unwind/arm.d gcc/unwind/arm_common.d gcc/unwind/c6x.d \
+ gcc/unwind/generic.d gcc/unwind/package.d gcc/unwind/pe.d object.d \
+ rt/aApply.d rt/aApplyR.d rt/aaA.d rt/adi.d rt/arrayassign.d \
+ rt/arraycat.d rt/cast_.d rt/config.d rt/critical_.d rt/deh.d \
+ rt/dmain2.d rt/ehalloc.d rt/invariant.d rt/lifetime.d rt/memory.d \
+ rt/minfo.d rt/monitor_.d rt/profilegc.d rt/sections.d rt/tlsgc.d \
+ rt/util/typeinfo.d rt/util/utility.d
DRUNTIME_DSOURCES_STDCXX = core/stdcpp/allocator.d core/stdcpp/array.d \
core/stdcpp/exception.d core/stdcpp/memory.d core/stdcpp/new_.d \
diff --git a/libphobos/libdruntime/Makefile.in b/libphobos/libdruntime/Makefile.in
index 45e086a..24865fb 100644
--- a/libphobos/libdruntime/Makefile.in
+++ b/libphobos/libdruntime/Makefile.in
@@ -236,7 +236,7 @@ am__objects_1 = core/atomic.lo core/attribute.lo core/bitop.lo \
gcc/backtrace.lo gcc/builtins.lo gcc/deh.lo gcc/emutls.lo \
gcc/gthread.lo gcc/sections/common.lo gcc/sections/elf.lo \
gcc/sections/macho.lo gcc/sections/package.lo \
- gcc/sections/pecoff.lo gcc/unwind/arm.lo \
+ gcc/sections/pecoff.lo gcc/simd.lo gcc/unwind/arm.lo \
gcc/unwind/arm_common.lo gcc/unwind/c6x.lo \
gcc/unwind/generic.lo gcc/unwind/package.lo gcc/unwind/pe.lo \
object.lo rt/aApply.lo rt/aApplyR.lo rt/aaA.lo rt/adi.lo \
@@ -874,14 +874,14 @@ DRUNTIME_DSOURCES = core/atomic.d core/attribute.d core/bitop.d \
core/vararg.d core/volatile.d gcc/attribute.d gcc/attributes.d \
gcc/backtrace.d gcc/builtins.d gcc/deh.d gcc/emutls.d gcc/gthread.d \
gcc/sections/common.d gcc/sections/elf.d gcc/sections/macho.d \
- gcc/sections/package.d gcc/sections/pecoff.d gcc/unwind/arm.d \
- gcc/unwind/arm_common.d gcc/unwind/c6x.d gcc/unwind/generic.d \
- gcc/unwind/package.d gcc/unwind/pe.d object.d rt/aApply.d rt/aApplyR.d \
- rt/aaA.d rt/adi.d rt/arrayassign.d rt/arraycat.d rt/cast_.d \
- rt/config.d rt/critical_.d rt/deh.d rt/dmain2.d rt/ehalloc.d \
- rt/invariant.d rt/lifetime.d rt/memory.d rt/minfo.d rt/monitor_.d \
- rt/profilegc.d rt/sections.d rt/tlsgc.d rt/util/typeinfo.d \
- rt/util/utility.d
+ gcc/sections/package.d gcc/sections/pecoff.d gcc/simd.d \
+ gcc/unwind/arm.d gcc/unwind/arm_common.d gcc/unwind/c6x.d \
+ gcc/unwind/generic.d gcc/unwind/package.d gcc/unwind/pe.d object.d \
+ rt/aApply.d rt/aApplyR.d rt/aaA.d rt/adi.d rt/arrayassign.d \
+ rt/arraycat.d rt/cast_.d rt/config.d rt/critical_.d rt/deh.d \
+ rt/dmain2.d rt/ehalloc.d rt/invariant.d rt/lifetime.d rt/memory.d \
+ rt/minfo.d rt/monitor_.d rt/profilegc.d rt/sections.d rt/tlsgc.d \
+ rt/util/typeinfo.d rt/util/utility.d
DRUNTIME_DSOURCES_STDCXX = core/stdcpp/allocator.d core/stdcpp/array.d \
core/stdcpp/exception.d core/stdcpp/memory.d core/stdcpp/new_.d \
@@ -1340,6 +1340,7 @@ gcc/sections/elf.lo: gcc/sections/$(am__dirstamp)
gcc/sections/macho.lo: gcc/sections/$(am__dirstamp)
gcc/sections/package.lo: gcc/sections/$(am__dirstamp)
gcc/sections/pecoff.lo: gcc/sections/$(am__dirstamp)
+gcc/simd.lo: gcc/$(am__dirstamp)
gcc/unwind/$(am__dirstamp):
@$(MKDIR_P) gcc/unwind
@: > gcc/unwind/$(am__dirstamp)
diff --git a/libphobos/libdruntime/gcc/simd.d b/libphobos/libdruntime/gcc/simd.d
new file mode 100644
index 0000000..ffca50f
--- /dev/null
+++ b/libphobos/libdruntime/gcc/simd.d
@@ -0,0 +1,359 @@
+// GNU D Compiler SIMD support functions and intrinsics.
+// Copyright (C) 2022 Free Software Foundation, Inc.
+
+// GCC is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3, or (at your option) any later
+// version.
+
+// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+// <http://www.gnu.org/licenses/>.
+
+module gcc.simd;
+
+pure:
+nothrow:
+@safe:
+@nogc:
+pragma(inline, true):
+
+/**
+* Emit prefetch instruction.
+* Params:
+* address = address to be prefetched
+* writeFetch = true for write fetch, false for read fetch
+* locality = 0..3 (0 meaning least local, 3 meaning most local)
+*/
+void prefetch(bool writeFetch, ubyte locality)(const(void)* address)
+{
+ static assert(locality < 4, "0..3 expected for locality");
+ import gcc.builtins : __builtin_prefetch;
+ __builtin_prefetch(address, writeFetch, locality);
+}
+
+/**
+ * Load unaligned vector from address.
+ * This is a compiler intrinsic.
+ * Params:
+ * p = pointer to vector
+ * Returns:
+ * vector
+ */
+V loadUnaligned(V)(const V* p) if (isVectorType!V);
+
+/**
+ * Store vector to unaligned address.
+ * This is a compiler intrinsic.
+ * Params:
+ * p = pointer to vector
+ * value = value to store
+ * Returns:
+ * value
+ */
+V storeUnaligned(V)(V* p, V value) if (isVectorType!V);
+
+/**
+ * Construct a permutation of elements from one or two vectors, returning a
+ * vector of the same type as the input vector(s). The `mask` is an integral
+ * vector with the same width and element count as the output vector.
+ * Params:
+ * op1 = input vector
+ * op2 = input vector
+ * mask = integer vector mask
+ * Returns:
+ * vector with the same type as `op1` and `op2`
+ * Example:
+ * ---
+ * int4 a = [1, 2, 3, 4];
+ * int4 b = [5, 6, 7, 8];
+ * int4 mask1 = [0, 1, 1, 3];
+ * int4 mask2 = [0, 4, 2, 5];
+ * assert(shuffle(a, mask1).array == [1, 2, 2, 4]);
+ * assert(shuffle(a, b, mask2).array == [1, 5, 3, 6]);
+ * ---
+ */
+template shuffle(V0, V1, M)
+{
+ static assert(isVectorType!V0, "first argument must be vector");
+ static assert(isVectorType!V1, "second argument must be vector");
+ static assert(is(BaseType!V0 == BaseType!V1),
+ "first and second argument vectors must have the same element type");
+ static assert(isVectorType!M && is(BaseType!M : long),
+ "last argument must be an integer vector");
+ static assert(numElements!V0 == numElements!M && numElements!V1 == numElements!M,
+ "argument vectors and mask vector should have the same number of elements");
+ static assert(BaseType!V0.sizeof == BaseType!M.sizeof,
+ "argument vectors and mask vector should have the same element type size");
+
+ V0 shuffle(V0 op1, V1 op2, M mask);
+}
+
+/// Ditto
+template shuffle(V, M)
+{
+ static assert(isVectorType!V, "first argument must be a vector");
+ static assert(isVectorType!M && is(BaseType!M : long),
+ "last argument must be an integer vector");
+ static assert(numElements!V == numElements!M,
+ "argument vector and mask vector should have the same number of elements");
+ static assert(BaseType!V.sizeof == BaseType!M.sizeof,
+ "argument vector and mask vector should have the same element type size");
+
+ V shuffle(V op1, M mask)
+ {
+ return shuffle(op1, op1, mask);
+ }
+}
+
+/**
+ * Construct a permutation of elements from two vectors, returning a vector with
+ * the same element type as the input vector(s), and same length as the `mask`.
+ * Params:
+ * op1 = input vector
+ * op2 = input vector
+ * index = elements indices of the vectors that should be extracted and returned
+ * Returns:
+ * vector with the same element type as `op1` and `op2`, but has an element count
+ * equal to the number of indices in `index`.
+ * Example:
+ * ---
+ * int8 a = [1, -2, 3, -4, 5, -6, 7, -8];
+ * int4 b = shufflevector(a, a, 0, 2, 4, 6);
+ * assert(b.array == [1, 3, 5, 7]);
+ * int4 c = [-2, -4, -6, -8];
+ * int d = shufflevector(c, b, 4, 0, 5, 1, 6, 2, 7, 3);
+ * assert(d.array == a.array);
+ * ---
+ */
+template shufflevector(V1, V2, M...)
+{
+ static assert(isVectorType!V1, "first argument must be vector");
+ static assert(isVectorType!V2, "second argument must be vector");
+ static assert(is(BaseType!V1 == BaseType!V2),
+ "first and second argument vectors must have the same element type");
+ static assert(isPowerOf2!(M.length),
+ "number of index arguments must be a power of 2");
+
+ __vector(BaseType!V1[M.length]) shufflevector(V1 op1, V2 op2, M index);
+}
+
+/// Ditto
+template shufflevector(V, index...)
+{
+ // Defined for compatibility with LDC.
+ static assert(isVectorType!V, "first argument must be a vector type");
+ static assert(numElements!V == index.length,
+ "number of index arguments must be the same number of vector elements");
+
+ private template ctfeConstants(m...)
+ {
+ static if (m.length == 0) enum ctfeConstants = 1;
+ else enum ctfeConstants = m[0] | ctfeConstants!(m[1 .. $]);
+ }
+ static assert(__traits(compiles, ctfeConstants!index),
+ "all index arguments must be compile time constants");
+
+ private template validIndexes(m...)
+ {
+ static if (m.length == 0) enum validIndexes = true;
+ else enum validIndexes = (cast(long)m[0] > -1) && validIndexes!(m[1 .. $]);
+ }
+ static assert(validIndexes!index,
+ "all index arguments must be greater than or equal to 0");
+
+ V shufflevector(V op1, V op2)
+ {
+ return shufflevector(op1, op2, index);
+ }
+}
+
+/**
+ * Extracts a single scalar element from a vector at a specified index.
+ * Defined for compatibility with LDC.
+ * Params:
+ * val = vector to extract element from
+ * idx = index indicating the position from which to extract the element
+ * Returns:
+ * scalar of the same type as the element type of val
+ * Example:
+ * ---
+ * int4 a = [0, 10, 20, 30];
+ * int k = extractelement!(int4, 2)(a);
+ * assert(k == 20);
+ * ---
+ */
+BaseType!V extractelement(V, int idx)(V val)
+ if (isVectorType!V && idx < numElements!V)
+{
+ return val[idx];
+}
+
+/**
+ * Inserts a scalar element into a vector at a specified index.
+ * Defined for compatibility with LDC.
+ * Params:
+ * val = vector to assign element to
+ * elt = scalar whose type is the element type of val
+ * idx = index indicating the position from which to extract the element
+ * Returns:
+ * vector of the same type as val
+ * Example:
+ * ---
+ * int4 a = [0, 10, 20, 30];
+ * int4 b = insertelement!(int4, 2)(a, 50);
+ * assert(b.array == [0, 10, 50, 30]);
+ * ---
+ */
+V insertelement(V, int idx)(V val, BaseType!V elt)
+ if (isVectorType!V && idx < numElements!V)
+{
+ val[idx] = elt;
+ return val;
+}
+
+/**
+ * Convert a vector from one integral or floating vector type to another.
+ * The result is an integral or floating vector that has had every element
+ * cast to the element type of the return type.
+ * Params:
+ * from = input vector
+ * Returns:
+ * converted vector
+ * Example:
+ * ---
+ * int4 a = [1, -2, 3, -4];
+ * float4 b = [1.5, -2.5, 3, 7];
+ * assert(convertvector!float4(a).array == [1, -2, 3, -4]);
+ * assert(convertvector!double4(a).array == [1, -2, 3, -4]);
+ * assert(convertvector!double4(b).array == [1.5, -2.5, 3, 7]);
+ * assert(convertvector!int4(b).array == [1, -2, 3, 7]);
+ * ---
+ */
+
+template convertvector(V, T)
+{
+ static assert(isVectorType!V && (is(BaseType!V : long) || is(BaseType!V : real)),
+ "first argument must be an integer or floating vector type");
+ static assert(isVectorType!T && (is(BaseType!T : long) || is(BaseType!T : real)),
+ "second argument must be an integer or floating vector");
+ static assert(numElements!V == numElements!T,
+ "first and second argument vectors should have the same number of elements");
+
+ V convertvector(T);
+}
+
+/**
+ * Construct a conditional merge of elements from two vectors, returning a
+ * vector of the same type as the input vector(s). The `mask` is an integral
+ * vector with the same width and element count as the output vector.
+ * Params:
+ * op1 = input vector
+ * op2 = input vector
+ * mask = integer vector mask
+ * Returns:
+ * vector with the same type as `op1` and `op2`
+ * Example:
+ * ---
+ * int4 a = [1, 2, 3, 4];
+ * int4 b = [5, 6, 7, 8];
+ * int4 mask1 = [0, 1, 1, 3];
+ * int4 mask2 = [0, 4, 2, 5];
+ * assert(shuffle(a, mask1).array == [1, 2, 2, 4]);
+ * assert(shuffle(a, b, mask2).array == [1, 5, 3, 6]);
+ * ---
+ */
+template blendvector(V0, V1, M)
+{
+ static assert(isVectorType!V0, "first argument must be vector");
+ static assert(isVectorType!V1, "second argument must be vector");
+ static assert(is(BaseType!V0 == BaseType!V1),
+ "first and second argument vectors must have the same element type");
+ static assert(isVectorType!M && is(BaseType!M : long),
+ "last argument must be an integer vector");
+ static assert(numElements!V0 == numElements!M && numElements!V1 == numElements!M,
+ "argument vectors and mask vector should have the same number of elements");
+ static assert(BaseType!V0.sizeof == BaseType!M.sizeof,
+ "argument vectors and mask vector should have the same element type size");
+
+ V0 blendvector(V0 op1, V1 op2, M mask);
+}
+
+/**
+ * Perform an element-wise comparison between two vectors, producing `0` when
+ * the comparison is false and `-1` (all bits are set to 1) otherwise.
+ * Params:
+ * op1 = input vector
+ * op2 = input vector
+ * Returns:
+ * vector of the same width and number of elements as the comparison
+ * operands with a signed integral element type
+ * Example:
+ * ---
+ * float4 a = [1, 3, 5, 7];
+ * float4 b = [2, 3, 4, 5];
+ * int4 c = greaterMask!float4(a, b);
+ * assert(c.array == [0, 0, -1, -1]);
+ * ---
+ */
+V equalMask(V)(V op1, V op2) if (isVectorType!V);
+/// Ditto
+V notEqualMask(V)(V op1, V op2) if (isVectorType!V);
+/// Ditto
+V greaterMask(V)(V op1, V op2) if (isVectorType!V);
+/// Ditto
+V greaterOrEqualMask(V)(V op1, V op2) if (isVectorType!V);
+
+/**
+ * Perform an element-wise logical comparison between two vectors, producing
+ * `0` when the comparison is false and `-1` (all bits are set to 1) otherwise.
+ * Params:
+ * op1 = input vector
+ * op2 = input vector
+ * Returns:
+ * vector of the same width and number of elements as the comparison
+ * operands with a signed integral element type
+ */
+V notMask(V)(V op1) if (isVectorType!V)
+{
+ return equalMask(op1, 0);
+}
+
+/// Ditto
+V andAndMask(V)(V op1, V op2) if (isVectorType!V)
+{
+ return notEqualMask(op1, 0) & notEqualMask(op2, 0);
+}
+
+/// Ditto
+V orOrMask(V)(V op1, V op2) if (isVectorType!V)
+{
+ return notEqualMask(op1, 0) | notEqualMask(op2, 0);
+}
+
+// Private helper templates.
+private:
+
+enum bool isVectorType(T) = is(T : __vector(V[N]), V, size_t N);
+
+template BaseType(V)
+{
+ alias typeof(V.array[0]) BaseType;
+}
+
+template numElements(V)
+{
+ enum numElements = V.sizeof / BaseType!(V).sizeof;
+}
+
+enum bool isPowerOf2(int Y) = Y && (Y & -Y) == Y;