diff options
-rw-r--r-- | gcc/d/intrinsics.cc | 587 | ||||
-rw-r--r-- | gcc/d/intrinsics.def | 23 | ||||
-rw-r--r-- | gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch1.d (renamed from gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch.d) | 0 | ||||
-rw-r--r-- | gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch2.d | 250 | ||||
-rw-r--r-- | gcc/testsuite/gdc.dg/torture/simd_blendvector.d | 345 | ||||
-rw-r--r-- | gcc/testsuite/gdc.dg/torture/simd_cond.d | 17 | ||||
-rw-r--r-- | gcc/testsuite/gdc.dg/torture/simd_convertvector.d | 122 | ||||
-rw-r--r-- | gcc/testsuite/gdc.dg/torture/simd_load.d | 52 | ||||
-rw-r--r-- | gcc/testsuite/gdc.dg/torture/simd_logical.d | 19 | ||||
-rw-r--r-- | gcc/testsuite/gdc.dg/torture/simd_shuffle.d | 454 | ||||
-rw-r--r-- | gcc/testsuite/gdc.dg/torture/simd_shufflevector.d | 55 | ||||
-rw-r--r-- | gcc/testsuite/gdc.dg/torture/simd_store.d | 54 | ||||
-rw-r--r-- | libphobos/libdruntime/Makefile.am | 16 | ||||
-rw-r--r-- | libphobos/libdruntime/Makefile.in | 19 | ||||
-rw-r--r-- | libphobos/libdruntime/gcc/simd.d | 359 |
15 files changed, 2355 insertions, 17 deletions
diff --git a/gcc/d/intrinsics.cc b/gcc/d/intrinsics.cc index 0dd5543..454d940 100644 --- a/gcc/d/intrinsics.cc +++ b/gcc/d/intrinsics.cc @@ -29,9 +29,12 @@ along with GCC; see the file COPYING3. If not see #include "tm.h" #include "function.h" #include "tree.h" +#include "diagnostic.h" +#include "langhooks.h" #include "fold-const.h" #include "stringpool.h" #include "builtins.h" +#include "vec-perm-indices.h" #include "d-tree.h" @@ -161,6 +164,16 @@ maybe_set_intrinsic (FuncDeclaration *decl) case INTRINSIC_MULUL: case INTRINSIC_NEGS: case INTRINSIC_NEGSL: + case INTRINSIC_LOADUNALIGNED: + case INTRINSIC_STOREUNALIGNED: + case INTRINSIC_SHUFFLE: + case INTRINSIC_SHUFFLEVECTOR: + case INTRINSIC_CONVERTVECTOR: + case INTRINSIC_BLENDVECTOR: + case INTRINSIC_EQUALMASK: + case INTRINSIC_NOTEQUALMASK: + case INTRINSIC_GREATERMASK: + case INTRINSIC_GREATEREQUALMASK: case INTRINSIC_VLOAD8: case INTRINSIC_VLOAD16: case INTRINSIC_VLOAD32: @@ -169,6 +182,8 @@ maybe_set_intrinsic (FuncDeclaration *decl) case INTRINSIC_VSTORE16: case INTRINSIC_VSTORE32: case INTRINSIC_VSTORE64: + /* Cannot interpret function during CTFE. If the library + provides a definition, its body will be used instead. */ break; case INTRINSIC_POW: @@ -196,6 +211,314 @@ maybe_set_intrinsic (FuncDeclaration *decl) } } +/* Helper function for maybe_warn_intrinsic_mismatch. Issue warning about + mismatch in the EXPECTED return type in call to the intrinsic function in + CALLEXP, and return TRUE. */ + +static bool +warn_mismatched_return_type (tree callexp, const char *expected) +{ + warning_at (EXPR_LOCATION (callexp), OPT_Wbuiltin_declaration_mismatch, + "mismatch in return type of intrinsic function %qD " + "(%qT, should be %qs)", get_callee_fndecl (callexp), + TREE_TYPE (callexp), expected); + return true; +} + +/* Helper function for maybe_warn_intrinsic_mismatch. Issue warning or error + about mismatch in the EXPECTED argument type at ARGNO in call to the + intrinsic function in CALLEXP, and return TRUE. */ + +static bool +warn_mismatched_argument (tree callexp, unsigned argno, const char *expected) +{ + warning_at (EXPR_LOCATION (callexp), OPT_Wbuiltin_declaration_mismatch, + "mismatch in argument %u type of intrinsic function %qD " + "(%qT, should be %qs)", argno + 1, get_callee_fndecl (callexp), + TREE_TYPE (CALL_EXPR_ARG (callexp, argno)), expected); + return true; +} + +static bool +warn_mismatched_argument (tree callexp, unsigned argno, tree expected, + bool error_p = false) +{ + if (error_p) + error_at (EXPR_LOCATION (callexp), + "mismatch in argument %u type of intrinsic function %qD " + "(%qT, should be %qT)", argno + 1, get_callee_fndecl (callexp), + TREE_TYPE (CALL_EXPR_ARG (callexp, argno)), expected); + else + warning_at (EXPR_LOCATION (callexp), OPT_Wbuiltin_declaration_mismatch, + "mismatch in argument %u type of intrinsic function %qD " + "(%qT, should be %qT)", argno + 1, get_callee_fndecl (callexp), + TREE_TYPE (CALL_EXPR_ARG (callexp, argno)), expected); + + return true; +} + +/* Helper function for maybe_warn_intrinsic_mismatch. Builds a vector integer + type suitable for the mask argument of INTRINSIC_SHUFFLE from the given + input argument TYPE. */ + +static tree +build_shuffle_mask_type (tree type) +{ + const unsigned bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type))); + const int unsignedp = TYPE_UNSIGNED (TREE_TYPE (type)); + tree inner = lang_hooks.types.type_for_size (bits, unsignedp); + gcc_assert (inner && TREE_CODE (inner) == INTEGER_TYPE); + + /* %% Get the front-end type for the vector so the D type will be + printed (this should really be handled by a D tree printer). */ + Type *t = build_frontend_type (inner); + gcc_assert (t != NULL); + unsigned HOST_WIDE_INT nunits; + TYPE_VECTOR_SUBPARTS (type).is_constant (&nunits); + + return build_ctype (TypeVector::create (t->sarrayOf (nunits))); +} + +/* Checks if call to intrinsic FUNCTION in CALLEXP matches the internal + type and value constraints that we expect from the library definitions. + Returns TRUE and issues a warning if there is a mismatch. + + Note: The return type and parameters are encoded into the signature `deco' + string that we match on in maybe_set_intrinsic(), so if the deco mangle + string has 'i' in the part that specifies the return type, then the matched + intrinsic will always have the return type `int'. + + For templated intrinsics however, we rely on template constraints to ensure + that the generic type matches what we expect it to be. There is still an + enforced relationship between a template argument and its instantiated type. + For example: `T func(T)(T*)' would have the generic return type `@1T' and + generic parameter type `@1PT', so it can be assumed that if the return type + matches what we expect then all parameters are fine as well. Otherwise it + can be assumed that some internal_error has occurred for this to be the case. + Where a templated intrinsic has multiple template arguments, each generic + type will need to be checked for its validity. */ + +static bool +maybe_warn_intrinsic_mismatch (tree function, tree callexp) +{ + switch (DECL_INTRINSIC_CODE (function)) + { + case INTRINSIC_NONE: + default: + return false; + + case INTRINSIC_LOADUNALIGNED: + { + /* Expects the signature: + vector(T) loadUnaligned (vector(T)*); */ + gcc_assert (call_expr_nargs (callexp) == 1); + + tree ptr = TREE_TYPE (CALL_EXPR_ARG (callexp, 0)); + if (!VECTOR_TYPE_P (TREE_TYPE (callexp)) + || !POINTER_TYPE_P (ptr) || !VECTOR_TYPE_P (TREE_TYPE (ptr))) + return warn_mismatched_return_type (callexp, "__vector(T)"); + + return false; + } + + case INTRINSIC_STOREUNALIGNED: + { + /* Expects the signature: + vector(T) storeUnaligned (vector(T)*, vector(T)); */ + gcc_assert (call_expr_nargs (callexp) == 2); + + tree ptr = TREE_TYPE (CALL_EXPR_ARG (callexp, 0)); + tree val = TREE_TYPE (CALL_EXPR_ARG (callexp, 1)); + if (!VECTOR_TYPE_P (TREE_TYPE (callexp)) + || !POINTER_TYPE_P (ptr) || !VECTOR_TYPE_P (TREE_TYPE (ptr)) + || !VECTOR_TYPE_P (val)) + return warn_mismatched_return_type (callexp, "__vector(T)"); + + return false; + } + + case INTRINSIC_SHUFFLE: + case INTRINSIC_BLENDVECTOR: + { + /* Expects the signature: + vector(T) shuffle (vector(T), vector(U), vector(V)); + vector(T) blendvector (vector(T), vector(U), vector(V)); */ + gcc_assert (call_expr_nargs (callexp) == 3); + + tree vec0 = TREE_TYPE (CALL_EXPR_ARG (callexp, 0)); + if (!VECTOR_TYPE_P (TREE_TYPE (callexp)) + || !VECTOR_TYPE_P (vec0)) + return warn_mismatched_return_type (callexp, "__vector(T)"); + + tree vec1 = TREE_TYPE (CALL_EXPR_ARG (callexp, 1)); + if (!VECTOR_TYPE_P (vec1)) + return warn_mismatched_argument (callexp, 1, vec0); + + tree mask = TREE_TYPE (CALL_EXPR_ARG (callexp, 2)); + if (!VECTOR_TYPE_P (mask) || !VECTOR_INTEGER_TYPE_P (mask)) + { + tree expected = build_shuffle_mask_type (vec0); + return warn_mismatched_argument (callexp, 2, expected, + VECTOR_TYPE_P (mask)); + } + + /* Types have been validated, now issue errors about violations on the + constraints of the intrinsic. */ + if (TYPE_MAIN_VARIANT (vec0) != TYPE_MAIN_VARIANT (vec1)) + return warn_mismatched_argument (callexp, 1, vec0, true); + + /* Vector element sizes should be equal between arguments and mask. */ + if (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vec0))) + != GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (mask))) + || maybe_ne (TYPE_VECTOR_SUBPARTS (vec0), + TYPE_VECTOR_SUBPARTS (mask)) + || maybe_ne (TYPE_VECTOR_SUBPARTS (vec1), + TYPE_VECTOR_SUBPARTS (mask))) + { + tree expected = build_shuffle_mask_type (vec0); + return warn_mismatched_argument (callexp, 2, expected, true); + } + + return false; + } + + case INTRINSIC_SHUFFLEVECTOR: + { + /* Expects the signature: + vector(T[N]) shufflevector (vector(T), vector(U), N...); */ + gcc_assert (call_expr_nargs (callexp) >= 3); + gcc_assert (VECTOR_TYPE_P (TREE_TYPE (callexp))); + + tree vec0 = TREE_TYPE (CALL_EXPR_ARG (callexp, 0)); + if (!VECTOR_TYPE_P (vec0)) + return warn_mismatched_argument (callexp, 0, "__vector(T)"); + + tree vec1 = TREE_TYPE (CALL_EXPR_ARG (callexp, 1)); + if (!VECTOR_TYPE_P (vec1)) + return warn_mismatched_argument (callexp, 1, vec0); + + for (int i = 2; i < call_expr_nargs (callexp); i++) + { + tree idx = TREE_TYPE (CALL_EXPR_ARG (callexp, i)); + if (TREE_CODE (idx) != INTEGER_TYPE) + return warn_mismatched_argument (callexp, i, d_int_type); + } + + /* Types have been validated, now issue errors about violations on the + constraints of the intrinsic. */ + if (TYPE_MAIN_VARIANT (TREE_TYPE (vec0)) + != TYPE_MAIN_VARIANT (TREE_TYPE (vec1))) + { + /* %% Get the front-end type for the vector so the D type will be + printed (this should really be handled by a D tree printer). */ + unsigned HOST_WIDE_INT nunits; + if (!TYPE_VECTOR_SUBPARTS (vec1).is_constant (&nunits)) + break; + + Type *inner = build_frontend_type (TREE_TYPE (vec0)); + Type *vector = TypeVector::create (inner->sarrayOf (nunits)); + return warn_mismatched_argument (callexp, 1, + build_ctype (vector), true); + } + + /* Vector sizes should be known, and number of indices a power of 2. */ + unsigned HOST_WIDE_INT vec0_length; + unsigned HOST_WIDE_INT vec1_length; + if (!TYPE_VECTOR_SUBPARTS (vec0).is_constant (&vec0_length) + || !TYPE_VECTOR_SUBPARTS (vec1).is_constant (&vec1_length) + || !pow2p_hwi (call_expr_nargs (callexp) - 2)) + break; + + /* All index arguments must be valid constants as well. */ + for (int i = 2; i < call_expr_nargs (callexp); i++) + { + tree idx = CALL_EXPR_ARG (callexp, i); + if (!tree_fits_shwi_p (idx)) + { + error_at (EXPR_LOCATION (callexp), + "argument %qE cannot be read at compile time", idx); + return true; + } + + HOST_WIDE_INT iidx = tree_to_shwi (idx); + if (iidx < 0 + || (unsigned HOST_WIDE_INT) iidx >= vec0_length + vec1_length) + { + error_at (EXPR_LOCATION (callexp), + "element index %qE is out of bounds %<[0 .. %E]%>", + idx, build_integer_cst (vec0_length + vec1_length)); + return true; + } + } + + return false; + } + + case INTRINSIC_CONVERTVECTOR: + { + /* Expects the signature: + vector(T) convertvector (vector(U)); */ + gcc_assert (call_expr_nargs (callexp) == 1); + + tree ret = TREE_TYPE (callexp); + if (!VECTOR_TYPE_P (ret) + || (!VECTOR_INTEGER_TYPE_P (ret) && !VECTOR_FLOAT_TYPE_P (ret))) + return warn_mismatched_return_type (callexp, "__vector(T)"); + + tree arg = TREE_TYPE (CALL_EXPR_ARG (callexp, 0)); + if (!VECTOR_TYPE_P (arg) + || (!VECTOR_INTEGER_TYPE_P (arg) && !VECTOR_FLOAT_TYPE_P (arg))) + return warn_mismatched_argument (callexp, 0, "__vector(T)"); + + /* Types have been validated, now issue errors about violations on the + constraints of the intrinsic. */ + if (maybe_ne (TYPE_VECTOR_SUBPARTS (ret), TYPE_VECTOR_SUBPARTS (arg))) + { + /* %% Get the front-end type for the vector so the D type will be + printed (this should really be handled by a D tree printer). */ + unsigned HOST_WIDE_INT nunits; + if (!TYPE_VECTOR_SUBPARTS (ret).is_constant (&nunits)) + break; + + Type *inner = build_frontend_type (TREE_TYPE (arg)); + Type *vector = TypeVector::create (inner->sarrayOf (nunits)); + return warn_mismatched_argument (callexp, 0, + build_ctype (vector), true); + } + + return false; + } + + case INTRINSIC_EQUALMASK: + case INTRINSIC_NOTEQUALMASK: + case INTRINSIC_GREATERMASK: + case INTRINSIC_GREATEREQUALMASK: + { + /* Expects the signature: + vector(T) equalMask(vector(T), vector(T)); + vector(T) notEqualMask(vector(T), vector(T)); + vector(T) greaterMask(vector(T), vector(T)); + vector(T) greateOrEqualMask(vector(T), vector(T)); */ + gcc_assert (call_expr_nargs (callexp) == 2); + + tree vec0 = TREE_TYPE (CALL_EXPR_ARG (callexp, 0)); + tree vec1 = TREE_TYPE (CALL_EXPR_ARG (callexp, 1)); + if (!VECTOR_TYPE_P (TREE_TYPE (callexp)) + || !VECTOR_TYPE_P (vec0) + || !VECTOR_TYPE_P (vec1) + || TYPE_MAIN_VARIANT (vec0) != TYPE_MAIN_VARIANT (vec1)) + return warn_mismatched_return_type (callexp, "__vector(T)"); + + return false; + } + } + + /* Generic mismatch warning if it hasn't already been handled. */ + warning_at (EXPR_LOCATION (callexp), OPT_Wbuiltin_declaration_mismatch, + "mismatch in call of intrinsic function %qD", function); + return true; +} + /* Construct a function call to the built-in function CODE, N is the number of arguments, and the `...' parameters are the argument expressions. The original call expression is held in CALLEXP. */ @@ -750,6 +1073,231 @@ expand_volatile_store (tree callexp) return modify_expr (result, value); } +/* Expand a front-end intrinsic call to a vector comparison intrinsic, which is + either a call to equalMask(), notEqualMask(), greaterMask(), or + greaterOrEqualMask(). These intrinsics take two arguments, the signature to + which can be either: + + vector(T) equalMask(vector(T) vec0, vector(T) vec1); + vector(T) notEqualMask(vector(T) vec0, vector(T) vec1); + vector(T) greaterMask(vector(T) vec0, vector(T) vec1); + vector(T) greaterOrEqualMask(vector(T) vec0, vector(T) vec1); + + This performs an element-wise comparison between two vectors VEC0 and VEC1, + returning a vector with signed integral elements. */ + +static tree +expand_intrinsic_vec_cond (tree_code code, tree callexp) +{ + tree vec0 = CALL_EXPR_ARG (callexp, 0); + tree vec1 = CALL_EXPR_ARG (callexp, 1); + tree type = TREE_TYPE (callexp); + + tree cmp = fold_build2_loc (EXPR_LOCATION (callexp), code, + truth_type_for (type), vec0, vec1); + return fold_build3_loc (EXPR_LOCATION (callexp), VEC_COND_EXPR, type, cmp, + build_minus_one_cst (type), build_zero_cst (type)); +} + +/* Expand a front-end instrinsic call to convertvector(). This takes one + argument, the signature to which is: + + vector(T) convertvector (vector(F) vec); + + This converts a vector VEC to TYPE by casting every element in VEC to the + element type of TYPE. The original call expression is held in CALLEXP. */ + +static tree +expand_intrinsic_vec_convert (tree callexp) +{ + tree vec = CALL_EXPR_ARG (callexp, 0); + tree type = TREE_TYPE (callexp); + + /* Use VIEW_CONVERT for simple vector conversions. */ + if ((TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (vec))) + == TYPE_MAIN_VARIANT (TREE_TYPE (type))) + || (VECTOR_INTEGER_TYPE_P (TREE_TYPE (vec)) + && VECTOR_INTEGER_TYPE_P (type) + && (TYPE_PRECISION (TREE_TYPE (TREE_TYPE (vec))) + == TYPE_PRECISION (TREE_TYPE (type))))) + return build1_loc (EXPR_LOCATION (callexp), VIEW_CONVERT_EXPR, type, vec); + + return build_call_expr_internal_loc (EXPR_LOCATION (callexp), IFN_VEC_CONVERT, + type, 1, vec); +} + +/* Expand a front-end instrinsic call to blendvector(). This expects to take + three arguments, the signature to which is: + + vector(T) blendvector (vector(T) vec0, vector(U) vec1, vector(M) mask); + + This builds a VEC_COND_EXPR if VEC0, VEC1, and MASK are vector types, VEC0 + has the same type as VEC1, and the number of elements of VEC0, VEC1, and MASK + are the same. The original call expression is held in CALLEXP. */ + +static tree +expand_intrinsic_vec_blend (tree callexp) +{ + tree vec0 = CALL_EXPR_ARG (callexp, 0); + tree vec1 = CALL_EXPR_ARG (callexp, 1); + tree mask = CALL_EXPR_ARG (callexp, 2); + + tree cmp = fold_build2_loc (EXPR_LOCATION (callexp), NE_EXPR, + truth_type_for (TREE_TYPE (mask)), + mask, build_zero_cst (TREE_TYPE (mask))); + + tree ret = fold_build3_loc (EXPR_LOCATION (callexp), VEC_COND_EXPR, + TREE_TYPE (callexp), cmp, vec0, vec1); + + if (!CONSTANT_CLASS_P (vec0) || !CONSTANT_CLASS_P (vec1)) + ret = force_target_expr (ret); + + return ret; +} + +/* Expand a front-end instrinsic call to shuffle(). This expects to take three + arguments, the signature to which is: + + vector(T) shuffle (vector(T) vec0, vector(T) vec1, vector(M) mask); + + This builds a VEC_PERM_EXPR if VEC0, VEC1, and MASK are vector types, VEC0 + has the same type as VEC1, and the number of elements of VEC0, VEC1, and MASK + are the same. The original call expression is held in CALLEXP. */ + +static tree +expand_intrinsic_vec_shuffle (tree callexp) +{ + tree vec0 = CALL_EXPR_ARG (callexp, 0); + tree vec1 = CALL_EXPR_ARG (callexp, 1); + tree mask = CALL_EXPR_ARG (callexp, 2); + + return build3_loc (EXPR_LOCATION (callexp), VEC_PERM_EXPR, + TREE_TYPE (callexp), vec0, vec1, mask); +} + +/* Expand a front-end instrinsic call to shufflevector(). This takes two + positional arguments and a variadic list, the signature to which is: + + vector(TM) shuffle (vector(T) vec1, vector(T) vec2, index...); + + This builds a VEC_PERM_EXPR if VEC0 and VEC1 are vector types, VEC0 has the + same element type as VEC1, and the number of elements in INDEX is a valid + power of two. The original call expression is held in CALLEXP. */ + +static tree +expand_intrinsic_vec_shufflevector (tree callexp) +{ + tree vec0 = CALL_EXPR_ARG (callexp, 0); + tree vec1 = CALL_EXPR_ARG (callexp, 1); + + unsigned HOST_WIDE_INT v0elems, v1elems; + TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec0)).is_constant (&v0elems); + TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec1)).is_constant (&v1elems); + + unsigned HOST_WIDE_INT num_indices = call_expr_nargs (callexp) - 2; + unsigned HOST_WIDE_INT masklen = MAX (num_indices, MAX (v0elems, v1elems)); + unsigned HOST_WIDE_INT pad_size = (v0elems < masklen ? masklen - v0elems : 0); + vec_perm_builder sel (masklen, masklen, 1); + + unsigned n = 0; + for (; n < num_indices; ++n) + { + tree idx = CALL_EXPR_ARG (callexp, n + 2); + HOST_WIDE_INT iidx = tree_to_shwi (idx); + /* VEC_PERM_EXPR does not allow different sized inputs. */ + if ((unsigned HOST_WIDE_INT) iidx >= v0elems) + iidx += pad_size; + + sel.quick_push (iidx); + } + + /* VEC_PERM_EXPR does not support a result that is smaller than the inputs. */ + for (; n < masklen; ++n) + sel.quick_push (n); + + vec_perm_indices indices (sel, 2, masklen); + + /* Pad out arguments to the common vector size. */ + tree ret_type = build_vector_type (TREE_TYPE (TREE_TYPE (vec0)), masklen); + if (v0elems < masklen) + { + constructor_elt elt = { NULL_TREE, build_zero_cst (TREE_TYPE (vec0)) }; + vec0 = build_constructor_single (ret_type, NULL_TREE, vec0); + for (unsigned i = 1; i < masklen / v0elems; ++i) + vec_safe_push (CONSTRUCTOR_ELTS (vec0), elt); + } + + if (v1elems < masklen) + { + constructor_elt elt = { NULL_TREE, build_zero_cst (TREE_TYPE (vec1)) }; + vec1 = build_constructor_single (ret_type, NULL_TREE, vec1); + for (unsigned i = 1; i < masklen / v1elems; ++i) + vec_safe_push (CONSTRUCTOR_ELTS (vec1), elt); + } + + tree mask_type = build_vector_type (build_nonstandard_integer_type + (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (ret_type))), 1), + masklen); + tree ret = build3_loc (EXPR_LOCATION (callexp), VEC_PERM_EXPR, ret_type, vec0, + vec1, vec_perm_indices_to_tree (mask_type, indices)); + + /* Get the low part we are interested in. */ + if (num_indices < masklen) + { + ret = build3_loc (EXPR_LOCATION (callexp), BIT_FIELD_REF, + TREE_TYPE (callexp), ret, + TYPE_SIZE (TREE_TYPE (callexp)), bitsize_zero_node); + /* Wrap the low part operation in a TARGET_EXPR so it gets a separate + temporary during gimplification. */ + ret = force_target_expr (ret); + } + + return ret; +} + +/* Expand a front-end instrinsic call to loadUnaligned(). This takes one + argument, the signature to which is: + + vector(T) loadUnaligned (vector(T)* ptr) + + This generates a load of a vector from an unaligned address PTR. + The original call expression is held in CALLEXP. */ + +static tree +expand_intrinsic_vec_load_unaligned (tree callexp) +{ + tree ptr = CALL_EXPR_ARG (callexp, 0); + + tree unaligned_type = build_variant_type_copy (TREE_TYPE (TREE_TYPE (ptr))); + SET_TYPE_ALIGN (unaligned_type, 1 * BITS_PER_UNIT); + TYPE_USER_ALIGN (unaligned_type) = 1; + + tree load = indirect_ref (unaligned_type, ptr); + return convert (TREE_TYPE (callexp), load); +} + +/* Expand a front-end instrinsic call to storeUnaligned(). This takes two + arguments, the signature to which is: + + vector(T) storeUnaligned (vector(T)* ptr, vector(T) value) + + This generates an assignment of a vector VALUE to an unaligned address PTR. + The original call expression is held in CALLEXP. */ + +static tree +expand_intrinsic_vec_store_unaligned (tree callexp) +{ + tree ptr = CALL_EXPR_ARG (callexp, 0); + tree vec = CALL_EXPR_ARG (callexp, 1); + + tree unaligned_type = build_variant_type_copy (TREE_TYPE (TREE_TYPE (ptr))); + SET_TYPE_ALIGN (unaligned_type, 1 * BITS_PER_UNIT); + TYPE_USER_ALIGN (unaligned_type) = 1; + + tree load = indirect_ref (unaligned_type, ptr); + return build_assign (MODIFY_EXPR, load, vec); +} + /* If CALLEXP is for an intrinsic , expand and return inlined compiler generated instructions. Most map directly to GCC builtins, others require a little extra work around them. */ @@ -766,6 +1314,15 @@ maybe_expand_intrinsic (tree callexp) if (DECL_BUILT_IN_CTFE (callee) && !doing_semantic_analysis_p) return callexp; + /* Gate the expansion of the intrinsic with constraint checks, if any fail + then bail out without any lowering. */ + if (maybe_warn_intrinsic_mismatch (callee, callexp)) + { + /* Reset the built-in flag so that we don't trip fold_builtin. */ + set_decl_built_in_function (callee, NOT_BUILT_IN, 0); + return callexp; + } + intrinsic_code intrinsic = DECL_INTRINSIC_CODE (callee); built_in_function code; @@ -913,6 +1470,36 @@ maybe_expand_intrinsic (tree callexp) case INTRINSIC_VSTORE64: return expand_volatile_store (callexp); + case INTRINSIC_LOADUNALIGNED: + return expand_intrinsic_vec_load_unaligned (callexp); + + case INTRINSIC_STOREUNALIGNED: + return expand_intrinsic_vec_store_unaligned (callexp); + + case INTRINSIC_SHUFFLE: + return expand_intrinsic_vec_shuffle (callexp); + + case INTRINSIC_SHUFFLEVECTOR: + return expand_intrinsic_vec_shufflevector (callexp); + + case INTRINSIC_CONVERTVECTOR: + return expand_intrinsic_vec_convert (callexp); + + case INTRINSIC_BLENDVECTOR: + return expand_intrinsic_vec_blend (callexp); + + case INTRINSIC_EQUALMASK: + return expand_intrinsic_vec_cond (EQ_EXPR, callexp); + + case INTRINSIC_NOTEQUALMASK: + return expand_intrinsic_vec_cond (NE_EXPR, callexp); + + case INTRINSIC_GREATERMASK: + return expand_intrinsic_vec_cond (GT_EXPR, callexp); + + case INTRINSIC_GREATEREQUALMASK: + return expand_intrinsic_vec_cond (GE_EXPR, callexp); + default: gcc_unreachable (); } diff --git a/gcc/d/intrinsics.def b/gcc/d/intrinsics.def index 61c1737..b8d1ec5 100644 --- a/gcc/d/intrinsics.def +++ b/gcc/d/intrinsics.def @@ -252,5 +252,28 @@ DEF_D_BUILTIN (INTRINSIC_C_VA_ARG, BUILT_IN_NONE, "va_arg", "core.stdc.stdarg", DEF_D_BUILTIN (INTRINSIC_VASTART, BUILT_IN_NONE, "va_start", "core.stdc.stdarg", "FJ@7va_listK@1TZv") +/* gcc.simd intrinsics. */ + +DEF_D_BUILTIN (INTRINSIC_LOADUNALIGNED, BUILT_IN_NONE, "loadUnaligned", + "gcc.simd", "FP@1VZ@1V") +DEF_D_BUILTIN (INTRINSIC_STOREUNALIGNED, BUILT_IN_NONE, "storeUnaligned", + "gcc.simd", "FP@1V@1VZ@1V") +DEF_D_BUILTIN (INTRINSIC_SHUFFLE, BUILT_IN_NONE, "shuffle", "gcc.simd", + "F@2V0@2V1@1MZ@2V0") +DEF_D_BUILTIN (INTRINSIC_SHUFFLEVECTOR, BUILT_IN_NONE, "shufflevector", + "gcc.simd", "F@2V1@2V2@1MZNhH@1M@") +DEF_D_BUILTIN (INTRINSIC_CONVERTVECTOR, BUILT_IN_NONE, "convertvector", + "gcc.simd", "F@1TZ@1V") +DEF_D_BUILTIN (INTRINSIC_BLENDVECTOR, BUILT_IN_NONE, "blendvector", "gcc.simd", + "F@2V0@2V1@1MZ@2V0") +DEF_D_BUILTIN (INTRINSIC_EQUALMASK, BUILT_IN_NONE, "equalMask", "gcc.simd", + "F@1V@1VZ@1V") +DEF_D_BUILTIN (INTRINSIC_NOTEQUALMASK, BUILT_IN_NONE, "notEqualMask", + "gcc.simd", "F@1V@1VZ@1V") +DEF_D_BUILTIN (INTRINSIC_GREATERMASK, BUILT_IN_NONE, "greaterMask", "gcc.simd", + "F@1V@1VZ@1V") +DEF_D_BUILTIN (INTRINSIC_GREATEREQUALMASK, BUILT_IN_NONE, + "greaterOrEqualMask", "gcc.simd", "F@1V@1VZ@1V") + #undef DEF_D_BUILTIN #undef DEF_CTFE_BUILTIN diff --git a/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch.d b/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch1.d index 5340647..5340647 100644 --- a/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch.d +++ b/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch1.d diff --git a/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch2.d b/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch2.d new file mode 100644 index 0000000..9e90c15 --- /dev/null +++ b/gcc/testsuite/gdc.dg/Wbuiltin_declaration_mismatch2.d @@ -0,0 +1,250 @@ +// { dg-additional-options "-mavx" { target avx_runtime } } +// { dg-do compile { target { avx_runtime || vect_sizes_16B_8B } } } +module gcc.simd; + +alias int4 = __vector(int[4]); +alias short8 = __vector(short[8]); +alias float4 = __vector(float[4]); +alias byte16 = __vector(byte[16]); +struct fake4 { int[4] v; } +enum f = fake4(); + +void test_load_store() +{ + loadUnaligned!int(null); // { dg-warning "mismatch in return type" } + loadUnaligned!double(null); // { dg-warning "mismatch in return type" } + loadUnaligned!int4(null); + loadUnaligned!short8(null); + loadUnaligned!float4(null); + loadUnaligned!byte16(null); + loadUnaligned!fake4(null); // { dg-warning "mismatch in return type" } + + storeUnaligned!int(null, 1); // { dg-warning "mismatch in return type" } + storeUnaligned!double(null, 1); // { dg-warning "mismatch in return type" } + storeUnaligned!int4(null, 1); + storeUnaligned!short8(null, 1); + storeUnaligned!float4(null, 1); + storeUnaligned!byte16(null, 1); + storeUnaligned!fake4(null, f); // { dg-warning "mismatch in return type" } +} + +void test_shuffle() +{ + shuffle!(int, int, int)(0, 0, 0); // { dg-warning "mismatch in return type" } + shuffle!(double, int, int)(0, 0, 0); // { dg-warning "mismatch in return type" } + shuffle!(fake4, int, int)(f, 0, 0); // { dg-warning "mismatch in return type" } + + shuffle!(int4, int, int)(0, 0, 0); // { dg-warning "mismatch in argument 2" } + shuffle!(int4, double, int)(0, 0, 0); // { dg-warning "mismatch in argument 2" } + shuffle!(int4, fake4, int)(0, f, 0); // { dg-warning "mismatch in argument 2" } + + shuffle!(int4, int4, int)(0, 0, 0); // { dg-warning "mismatch in argument 3" } + shuffle!(int4, int4, double)(0, 0, 0); // { dg-warning "mismatch in argument 3" } + shuffle!(int4, int4, fake4)(0, 0, f); // { dg-warning "mismatch in argument 3" } + + shuffle!(int4, int4, int4)(0, 0, 0); + shuffle!(int4, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + shuffle!(int4, float4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + shuffle!(int4, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + shuffle!(int4, int4, short8)(0, 0, 0); // { dg-error "mismatch in argument 3" } + shuffle!(int4, int4, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" } + shuffle!(int4, int4, byte16)(0, 0, 0); // { dg-error "mismatch in argument 3" } + + shuffle!(float4, int4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + shuffle!(float4, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + shuffle!(float4, float4, int4)(0, 0, 0); + shuffle!(float4, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + shuffle!(float4, float4, short8)(0, 0, 0); // { dg-error "mismatch in argument 3" } + shuffle!(float4, float4, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" } + shuffle!(float4, float4, byte16)(0, 0, 0); // { dg-error "mismatch in argument 3" } + + shuffle!(short8, int4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + shuffle!(short8, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 3" } + shuffle!(short8, float4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + shuffle!(short8, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + shuffle!(short8, short8, short8)(0, 0, 0); + shuffle!(short8, short8, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" } + shuffle!(short8, short8, byte16)(0, 0, 0); // { dg-error "mismatch in argument 3" } + + shuffle!(byte16, int4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + shuffle!(byte16, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + shuffle!(byte16, float4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + shuffle!(byte16, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 3" } + shuffle!(byte16, byte16, short8)(0, 0, 0); // { dg-error "mismatch in argument 3" } + shuffle!(byte16, byte16, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" } + shuffle!(byte16, byte16, byte16)(0, 0, 0); +} + +void test_shufflevector() +{ + shufflevector!(int, int4, int)(0, 0, 0); // { dg-warning "mismatch in argument 1" } + shufflevector!(double, int4, int)(0, 0, 0); // { dg-warning "mismatch in argument 1" } + shufflevector!(fake4, int4, int)(f, 0, 0); // { dg-warning "mismatch in argument 1" } + + shufflevector!(int4, int, int)(0, 0, 0); // { dg-warning "mismatch in argument 2" } + shufflevector!(int4, double, int)(0, 0, 0); // { dg-warning "mismatch in argument 2" } + shufflevector!(int4, int4, int)(0, 0, 0); + shufflevector!(int4, short8, int)(0, 0, 0); // { dg-error "mismatch in argument 2" } + shufflevector!(int4, float4, int)(0, 0, 0); // { dg-error "mismatch in argument 2" } + shufflevector!(int4, byte16, int)(0, 0, 0); // { dg-error "mismatch in argument 2" } + shufflevector!(int4, fake4, int)(0, f, 0); // { dg-warning "mismatch in argument 2" } + + shufflevector!(int4, int4, double)(0, 0, 0); // { dg-warning "mismatch in argument 3" } + shufflevector!(int4, int4, int4)(0, 0, 0); // { dg-warning "mismatch in argument 3" } + shufflevector!(int4, int4, short8)(0, 0, 0); // { dg-warning "mismatch in argument 3" } + shufflevector!(int4, int4, float4)(0, 0, 0); // { dg-warning "mismatch in argument 3" } + shufflevector!(int4, int4, byte16)(0, 0, 0); // { dg-warning "mismatch in argument 3" } + + shufflevector!(int4, int4, int, double)(0, 0, 0, 0); // { dg-warning "mismatch in argument 4" } + shufflevector!(int4, int4, int, int, double, int)(0, 0, 0, 0, 0, 0); // { dg-warning "mismatch in argument 5" } + shufflevector!(int4, int4, int, int, int, double)(0, 0, 0, 0, 0, 0); // { dg-warning "mismatch in argument 6" } + + int i; + shufflevector!(int4, int4, int)(0, 0, i); // { dg-error "argument .i. cannot be read at compile time" } + shufflevector!(int4, int4, int)(0, 0, -1u); // { dg-error "element index .-1. is out of bounds" } + shufflevector!(int4, int4, int)(0, 0, 8); // { dg-error "element index .8. is out of bounds" } +} + +void test_convertvector() +{ + convertvector!(int, int)(0); // { dg-warning "mismatch in return type" } + convertvector!(double, int)(0); // { dg-warning "mismatch in return type" } + convertvector!(fake4, int)(0); // { dg-warning "mismatch in return type" } + + convertvector!(int4, int)(0); // { dg-warning "mismatch in argument 1" } + convertvector!(int4, double)(0); // { dg-warning "mismatch in argument 1" } + convertvector!(int4, int4)(0); + convertvector!(int4, short8)(0); // { dg-error "mismatch in argument 1" } + convertvector!(int4, float4)(0); + convertvector!(int4, byte16)(0); // { dg-error "mismatch in argument 1" } + convertvector!(int4, fake4)(f); // { dg-warning "mismatch in argument 1" } + + convertvector!(short8, int)(0); // { dg-warning "mismatch in argument 1" } + convertvector!(short8, double)(0); // { dg-warning "mismatch in argument 1" } + convertvector!(short8, int4)(0); // { dg-error "mismatch in argument 1" } + convertvector!(short8, short8)(0); + convertvector!(short8, float4)(0); // { dg-error "mismatch in argument 1" } + convertvector!(short8, byte16)(0); // { dg-error "mismatch in argument 1" } + convertvector!(short8, fake4)(f); // { dg-warning "mismatch in argument 1" } + + convertvector!(float4, int)(0); // { dg-warning "mismatch in argument 1" } + convertvector!(float4, double)(0); // { dg-warning "mismatch in argument 1" } + convertvector!(float4, int4)(0); + convertvector!(float4, short8)(0); // { dg-error "mismatch in argument 1" } + convertvector!(float4, float4)(0); + convertvector!(float4, byte16)(0); // { dg-error "mismatch in argument 1" } + convertvector!(float4, fake4)(f); // { dg-warning "mismatch in argument 1" } + + convertvector!(byte16, int)(0); // { dg-warning "mismatch in argument 1" } + convertvector!(byte16, double)(0); // { dg-warning "mismatch in argument 1" } + convertvector!(byte16, int4)(0); // { dg-error "mismatch in argument 1" } + convertvector!(byte16, short8)(0); // { dg-error "mismatch in argument 1" } + convertvector!(byte16, float4)(0); // { dg-error "mismatch in argument 1" } + convertvector!(byte16, byte16)(0); + convertvector!(byte16, fake4)(f); // { dg-warning "mismatch in argument 1" } +} + +void test_blendvector() +{ + blendvector!(int, int, int)(0, 0, 0); // { dg-warning "mismatch in return type" } + blendvector!(double, int, int)(0, 0, 0); // { dg-warning "mismatch in return type" } + blendvector!(fake4, int, int)(f, 0, 0); // { dg-warning "mismatch in return type" } + + blendvector!(int4, int, int)(0, 0, 0); // { dg-warning "mismatch in argument 2" } + blendvector!(int4, double, int)(0, 0, 0); // { dg-warning "mismatch in argument 2" } + blendvector!(int4, fake4, int)(0, f, 0); // { dg-warning "mismatch in argument 2" } + + blendvector!(int4, int4, int)(0, 0, 0); // { dg-warning "mismatch in argument 3" } + blendvector!(int4, int4, double)(0, 0, 0); // { dg-warning "mismatch in argument 3" } + blendvector!(int4, int4, fake4)(0, 0, f); // { dg-warning "mismatch in argument 3" } + + blendvector!(int4, int4, int4)(0, 0, 0); + blendvector!(int4, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + blendvector!(int4, float4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + blendvector!(int4, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + blendvector!(int4, int4, short8)(0, 0, 0); // { dg-error "mismatch in argument 3" } + blendvector!(int4, int4, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" } + blendvector!(int4, int4, byte16)(0, 0, 0); // { dg-error "mismatch in argument 3" } + + blendvector!(float4, int4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + blendvector!(float4, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + blendvector!(float4, float4, int4)(0, 0, 0); + blendvector!(float4, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + blendvector!(float4, float4, short8)(0, 0, 0); // { dg-error "mismatch in argument 3" } + blendvector!(float4, float4, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" } + blendvector!(float4, float4, byte16)(0, 0, 0); // { dg-error "mismatch in argument 3" } + + blendvector!(short8, int4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + blendvector!(short8, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 3" } + blendvector!(short8, float4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + blendvector!(short8, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + blendvector!(short8, short8, short8)(0, 0, 0); + blendvector!(short8, short8, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" } + blendvector!(short8, short8, byte16)(0, 0, 0); // { dg-error "mismatch in argument 3" } + + blendvector!(byte16, int4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + blendvector!(byte16, short8, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + blendvector!(byte16, float4, int4)(0, 0, 0); // { dg-error "mismatch in argument 2" } + blendvector!(byte16, byte16, int4)(0, 0, 0); // { dg-error "mismatch in argument 3" } + blendvector!(byte16, byte16, short8)(0, 0, 0); // { dg-error "mismatch in argument 3" } + blendvector!(byte16, byte16, float4)(0, 0, 0); // { dg-error "mismatch in argument 3" } + blendvector!(byte16, byte16, byte16)(0, 0, 0); +} + +void test_comparison() +{ + equalMask!int(0, 0); // { dg-warning "mismatch in return type" } + equalMask!double(0, 0); // { dg-warning "mismatch in return type" } + equalMask!int4(0, 0); + equalMask!short8(0, 0); + equalMask!float4(0, 0); + equalMask!byte16(0, 0); + equalMask!fake4(f, f); // { dg-warning "mismatch in return type" } + + notEqualMask!int(0, 0); // { dg-warning "mismatch in return type" } + notEqualMask!double(0, 0); // { dg-warning "mismatch in return type" } + notEqualMask!int4(0, 0); + notEqualMask!short8(0, 0); + notEqualMask!float4(0, 0); + notEqualMask!byte16(0, 0); + notEqualMask!fake4(f, f); // { dg-warning "mismatch in return type" } + + greaterMask!int(0, 0); // { dg-warning "mismatch in return type" } + greaterMask!double(0, 0); // { dg-warning "mismatch in return type" } + greaterMask!int4(0, 0); + greaterMask!short8(0, 0); + greaterMask!float4(0, 0); + greaterMask!byte16(0, 0); + greaterMask!fake4(f, f); // { dg-warning "mismatch in return type" } + + greaterOrEqualMask!int(0, 0); // { dg-warning "mismatch in return type" } + greaterOrEqualMask!double(0, 0); // { dg-warning "mismatch in return type" } + greaterOrEqualMask!int4(0, 0); + greaterOrEqualMask!short8(0, 0); + greaterOrEqualMask!float4(0, 0); + greaterOrEqualMask!byte16(0, 0); + greaterOrEqualMask!fake4(f, f); // { dg-warning "mismatch in return type" } +} + +// The following declarations of the simd intrinsics are without any guards +// to verify `d/intrinsics.cc` is doing checks to prevent invalid lowerings. +V loadUnaligned(V)(const V*); +V storeUnaligned(V)(V*, V); + +V0 shuffle(V0, V1, M)(V0, V1, M); + +// Use overloads to test different argument positions. +template E(V) { alias typeof(V.array[0]) E; } +enum isV(T) = is(T : __vector(V[N]), V, size_t N); + +__vector(E!V1[M.length]) shufflevector(V1, V2, M...)(V1, V2, M) if (isV!V1 && !isV!V2); +__vector(E!V2[M.length]) shufflevector(V1, V2, M...)(V1, V2, M) if (isV!V2 && !isV!V1); +__vector(E!V1[M.length]) shufflevector(V1, V2, M...)(V1, V2, M) if (isV!V1 && isV!V2); + +V convertvector(V, T)(T); +V0 blendvector(V0, V1, M)(V0, V1, M); + +V equalMask(V)(V, V); +V notEqualMask(V)(V, V); +V greaterMask(V)(V, V); +V greaterOrEqualMask(V)(V, V); diff --git a/gcc/testsuite/gdc.dg/torture/simd_blendvector.d b/gcc/testsuite/gdc.dg/torture/simd_blendvector.d new file mode 100644 index 0000000..42459bd --- /dev/null +++ b/gcc/testsuite/gdc.dg/torture/simd_blendvector.d @@ -0,0 +1,345 @@ +// { dg-additional-options "-mavx" { target avx_runtime } } +// { dg-skip-if "needs gcc/config.d" { ! d_runtime } } +import gcc.simd; + +void testblendvector(V, VI = V)() +{ + alias E = typeof(V.array[0]); + enum numElements = V.sizeof / E.sizeof; + + static if (numElements == 16) + { + // Test fragment for vectors with 16 elements + immutable V[5] in1 = + [[ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ], + [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ], + [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ], + [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ], + [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ]]; + + immutable V in2 = + [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45 ]; + + immutable VI[5] mask1 = + [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], + [ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ], + [ 7, 6, 5, 4, 16, 17, 18, 19, 31, 30, 29, 28, 3, 2, 1, 0 ], + [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 ]]; + + immutable V[5] out1 = + [[30, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], + [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], + [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 45], + [30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], + [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]]; + } + else static if (numElements == 8) + { + // Test fragment for vectors with 8 elements + static if (is(E == uint)) + { + enum E A1 = 0x11121314; + enum E B1 = 0x21222324; + enum E C1 = 0x31323334; + enum E D1 = 0x41424344; + enum E E1 = 0x51525354; + enum E F1 = 0x61626364; + enum E G1 = 0x71727374; + enum E H1 = 0x81828384; + + enum E A2 = 0x91929394; + enum E B2 = 0xa1a2a3a4; + enum E C2 = 0xb1b2b3b4; + enum E D2 = 0xc1c2c3c4; + enum E E2 = 0xd1d2d3d4; + enum E F2 = 0xe1e2e3e4; + enum E G2 = 0xf1f2f3f4; + enum E H2 = 0x01020304; + } + else static if (is(E == ushort)) + { + enum E A1 = 0x1112; + enum E B1 = 0x2122; + enum E C1 = 0x3132; + enum E D1 = 0x4142; + enum E E1 = 0x5152; + enum E F1 = 0x6162; + enum E G1 = 0x7172; + enum E H1 = 0x8182; + + enum E A2 = 0x9192; + enum E B2 = 0xa1a2; + enum E C2 = 0xb1b2; + enum E D2 = 0xc1c2; + enum E E2 = 0xd1d2; + enum E F2 = 0xe1e2; + enum E G2 = 0xf1f2; + enum E H2 = 0x0102; + } + else static if (is(E == ubyte)) + { + enum E A1 = 0x11; + enum E B1 = 0x12; + enum E C1 = 0x13; + enum E D1 = 0x14; + enum E E1 = 0x15; + enum E F1 = 0x16; + enum E G1 = 0x17; + enum E H1 = 0x18; + + enum E A2 = 0xf1; + enum E B2 = 0xf2; + enum E C2 = 0xf3; + enum E D2 = 0xf4; + enum E E2 = 0xf5; + enum E F2 = 0xf6; + enum E G2 = 0xf7; + enum E H2 = 0xf8; + } + else + enum unsupported = true; + + static if (!__traits(compiles, unsupported)) + { + immutable V[6] in1 = + [[ A1, B1, C1, D1, E1, F1, G1, H1 ], + [ A1, B1, C1, D1, E1, F1, G1, H1 ], + [ A1, B1, C1, D1, E1, F1, G1, H1 ], + [ A1, B1, C1, D1, E1, F1, G1, H1 ], + [ A1, B1, C1, D1, E1, F1, G1, H1 ], + [ A1, B1, C1, D1, E1, F1, G1, H1 ]]; + + + immutable V in2 = + [ A2, B2, C2, D2, E2, F2, G2, H2 ]; + + immutable VI[6] mask1 = + [[ 0, 1, 2 , 3 , 4 , 5 , 6 , 0 ], + [ 8, 9, 0, 11, 12, 13, 0, 15 ], + [ 0, 8, 1, 0, 2, 0, 3, 11 ], + [ 0, 15, 4, 11, 0, 3, 7, 8 ], + [ 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 0x1e, 0x2e, 0x3e, 0x4e, 0x5e, 0x6e, 0x7e, 0x8e ]]; + + immutable V[6] out1 = + [[ A2, B1, C1, D1, E1, F1, G1, H2 ], + [ A1, B1, C2, D1, E1, F1, G2, H1 ], + [ A2, B1, C1, D2, E1, F2, G1, H1 ], + [ A2, B1, C1, D1, E2, F1, G1, H1 ], + [ A2, B2, C2, D2, E2, F2, G2, H2 ], + [ A1, B1, C1, D1, E1, F1, G1, H1 ]]; + } + } + else static if (numElements == 4) + { + // Test fragment for vectors with 4 elements + static if (is(E == double)) + { + enum E A = 0.69314718055994530942; + enum E B = 2.7182818284590452354; + enum E C = 2.30258509299404568402; + enum E D = 1.4426950408889634074; + + enum E W = 0.31830988618379067154; + enum E X = 3.14159265358979323846; + enum E Y = 1.41421356237309504880; + enum E Z = 0.70710678118654752440; + } + else static if (is(E == float)) + { + enum E A = 0.69314718055994530942f; + enum E B = 2.7182818284590452354f; + enum E C = 2.30258509299404568402f; + enum E D = 1.4426950408889634074f; + + enum E W = 0.31830988618379067154f; + enum E X = 3.14159265358979323846f; + enum E Y = 1.41421356237309504880f; + enum E Z = 0.70710678118654752440f; + } + else static if (is(E == ulong)) + { + enum E A = 0x1112131415161718; + enum E B = 0x2122232425262728; + enum E C = 0x3132333435363738; + enum E D = 0x4142434445464748; + + enum E W = 0xc1c2c3c4c5c6c7c8; + enum E X = 0xd1d2d3d4d5d6d7d8; + enum E Y = 0xe1e2e3e4e5e6e7e8; + enum E Z = 0xf1f2f3f4f5f6f7f8; + } + else static if (is(E == uint)) + { + enum E A = 0x11121314; + enum E B = 0x21222324; + enum E C = 0x31323334; + enum E D = 0x41424344; + + enum E W = 0xc1c2c3c4; + enum E X = 0xd1d2d3d4; + enum E Y = 0xe1e2e3e4; + enum E Z = 0xf1f2f3f4; + } + else + enum unsupported = true; + + static if (!__traits(compiles, unsupported)) + { + immutable V[6] in1 = + [[ A, B, C, D ], + [ A, B, C, D ], + [ A, B, C, D ], + [ A, B, C, D ], + [ A, B, C, D ], + [ A, B, C, D ]]; + + immutable V in2 = [ W, X, Y, Z ]; + + immutable VI[6] mask1 = + [[ 0, 1, 2, 3 ], + [ 4, 0, 6, 7 ], + [ 0, 4, 0, 5 ], + [ 0, 7, 4, 0 ], + [ 0, 0, 0, 0 ], + [ 7, 7, 7, 7 ]]; + + immutable V[6] out1 = + [[ W, B, C, D ], + [ A, X, C, D ], + [ W, B, Y, D ], + [ W, B, C, Z ], + [ W, X, Y, Z ], + [ A, B, C, D ]]; + } + } + else static if (numElements == 2) + { + // Test fragment for vectors with 2 elements + static if (is(E == double)) + { + enum E A = 0.69314718055994530942; + enum E B = 2.7182818284590452354; + + enum E X = 3.14159265358979323846; + enum E Y = 1.41421356237309504880; + } + else static if (is(E == float)) + { + enum E A = 0.69314718055994530942f; + enum E B = 2.7182818284590452354f; + + enum E X = 3.14159265358979323846f; + enum E Y = 1.41421356237309504880f; + } + else static if (is(E == ulong)) + { + enum E A = 0x1112131415161718; + enum E B = 0x2122232425262728; + + enum E X = 0xc1c2c3c4c5c6c7c8; + enum E Y = 0xd1d2d3d4d5d6d7d8; + } + else static if (is(E == uint)) + { + enum E A = 0x11121314; + enum E B = 0x21222324; + + enum E X = 0xd1d2d3d4; + enum E Y = 0xe1e2e3e4; + } + else + enum unsupported = true; + + static if (!__traits(compiles, unsupported)) + { + immutable V[7] in1 = + [[ A, B ], + [ A, B ], + [ A, B ], + [ A, B ], + [ A, B ], + [ A, B ], + [ A, B ]]; + + immutable V in2 = [ X, Y ]; + + immutable VI[7] mask1 = + [[ 0, 1 ], + [ 2, 3 ], + [ 0, 2 ], + [ 2, 1 ], + [ 3, 0 ], + [ 0, 0 ], + [ 3, 3 ]]; + + immutable V[7] out1 = + [[ X, B ], + [ A, B ], + [ X, B ], + [ A, B ], + [ A, Y ], + [ X, Y ], + [ A, B ]]; + } + } + else + enum unsupported = true; + + static if (!__traits(compiles, unsupported)) + { + static foreach (i; 0 .. in1.length) + assert(blendvector(in1[i], in2, mask1[i]).array == out1[i].array); + } +} + +void main() +{ + static if (__traits(compiles, __vector(ubyte[16]))) + testblendvector!(__vector(ubyte[16]))(); + + static if (__traits(compiles, __vector(ushort[16]))) + testblendvector!(__vector(ushort[16]))(); + + static if (__traits(compiles, __vector(ubyte[8]))) + testblendvector!(__vector(ubyte[8]))(); + + static if (__traits(compiles, __vector(ushort[8]))) + testblendvector!(__vector(ushort[8]))(); + + static if (__traits(compiles, __vector(uint[8]))) + testblendvector!(__vector(uint[8]))(); + + static if (__traits(compiles, __vector(ulong[4]))) + { + testblendvector!(__vector(ulong[4])); + + static if (__traits(compiles, __vector(double[4]))) + testblendvector!(__vector(double[4]), __vector(ulong[4])); + } + + static if (__traits(compiles, __vector(uint[4]))) + { + testblendvector!(__vector(uint[4])); + + static if (__traits(compiles, __vector(float[4]))) + testblendvector!(__vector(float[4]), __vector(uint[4])); + } + + static if (__traits(compiles, __vector(ulong[2]))) + { + testblendvector!(__vector(ulong[2])); + + static if (__traits(compiles, __vector(double[2]))) + testblendvector!(__vector(double[2]), __vector(ulong[2])); + } + + static if (__traits(compiles, __vector(uint[2]))) + { + testblendvector!(__vector(uint[2])); + + static if (__traits(compiles, __vector(float[2]))) + testblendvector!(__vector(float[2]), __vector(uint[2])); + } +} diff --git a/gcc/testsuite/gdc.dg/torture/simd_cond.d b/gcc/testsuite/gdc.dg/torture/simd_cond.d new file mode 100644 index 0000000..1548956 --- /dev/null +++ b/gcc/testsuite/gdc.dg/torture/simd_cond.d @@ -0,0 +1,17 @@ +// { dg-skip-if "needs gcc/config.d" { ! d_runtime } } + +import gcc.simd; + +void main() +{ + static if (__traits(compiles, __vector(int[4]))) + { + __gshared __vector(int[4]) a = [1,3,5,7]; + __gshared __vector(int[4]) b = [2,3,4,5]; + + assert(equalMask(a, b).array == [0,-1,0,0]); + assert(notEqualMask(a, b).array == [-1,0,-1,-1]); + assert(greaterMask(a, b).array == [0,0,-1,-1]); + assert(greaterOrEqualMask(a, b).array == [0,-1,-1,-1]); + } +} diff --git a/gcc/testsuite/gdc.dg/torture/simd_convertvector.d b/gcc/testsuite/gdc.dg/torture/simd_convertvector.d new file mode 100644 index 0000000..0d6b18e --- /dev/null +++ b/gcc/testsuite/gdc.dg/torture/simd_convertvector.d @@ -0,0 +1,122 @@ +// { dg-additional-options "-mavx" { target avx_runtime } } +// { dg-skip-if "needs gcc/config.d" { ! d_runtime } } +import gcc.simd; + +void main () +{ + static if (__traits(compiles, __vector(int[4]))) + alias int4 = __vector(int[4]); + static if (__traits(compiles, __vector(uint[4]))) + alias uint4 = __vector(uint[4]); + static if (__traits(compiles, __vector(float[4]))) + alias float4 = __vector(float[4]); + static if (__traits(compiles, __vector(double[4]))) + alias double4 = __vector(double[4]); + + static if (__traits(compiles, int4)) + { + union U1 { int4 v; int[4] a; } + U1 u1; + } + static if (__traits(compiles, uint4)) + { + union U2 { uint4 v; uint[4] a; } + U2 u2; + } + static if (__traits(compiles, float4)) + { + union U3 { float4 v; float[4] a; } + U3 u3; + } + static if (__traits(compiles, double4)) + { + union U4 { double4 v; double[4] a; } + U4 u4; + } + + static if (__traits(compiles, u1) && __traits(compiles, u2)) + { + static void f1(ref uint4 x, out int4 y) + { + y = convertvector!int4(x); + } + static foreach (i; 0 .. 4) + u2.a[i] = i * 2; + f1(u2.v, u1.v); + static foreach (i; 0 .. 4) + assert(u1.a[i] == i * 2); + } + + static if (__traits(compiles, u1) && __traits(compiles, u3)) + { + static void f2(ref float4 x, out int4 y) + { + y = convertvector!int4(x); + } + + static void f3(ref int4 x, out float4 y) + { + y = convertvector!float4(x); + } + + static foreach (i; 0 .. 4) + u3.a[i] = i - 2.25f; + f2(u3.v, u1.v); + static foreach (i; 0 .. 4) + assert(u1.a[i] == (i == 3 ? 0 : i - 2)); + + static foreach (i; 0 .. 4) + u3.a[i] = i + 0.75f; + f2(u3.v, u1.v); + static foreach (i; 0 .. 4) + assert(u1.a[i] == i); + + static foreach (i; 0 .. 4) + u1.a[i] = 7 * i - 5; + f3(u1.v, u3.v); + static foreach (i; 0 .. 4) + assert(u3.a[i] == 7 * i - 5); + } + static if (__traits(compiles, u1) && __traits(compiles, u4)) + { + static void f4(ref double4 x, out int4 y) + { + y = convertvector!int4(x); + } + + static void f5(ref int4 x, out double4 y) + { + y = convertvector!double4(x); + } + + static foreach (i; 0 .. 4) + u4.a[i] = i - 2.25; + f4(u4.v, u1.v); + static foreach (i; 0 .. 4) + assert(u1.a[i] == (i == 3 ? 0 : i - 2)); + + static foreach (i; 0 .. 4) + u4.a[i] = i + 0.75; + f4(u4.v, u1.v); + static foreach (i; 0 .. 4) + assert(u1.a[i] == i); + + static foreach (i; 0 .. 4) + u1.a[i] = 7 * i - 5; + f5(u1.v, u4.v); + static foreach (i; 0 .. 4) + assert(u4.a[i] == 7 * i - 5); + } + static if (__traits(compiles, u4)) + { + static void f6(out double4 x) + { + int4 a = [1, 2, -3, -4]; + x = convertvector!double4(a); + } + + f6(u4.v); + static foreach (i; 0 .. 4) + assert(u4.a[i] == (i >= 2 ? -1 - i : i + 1)); + } +} diff --git a/gcc/testsuite/gdc.dg/torture/simd_load.d b/gcc/testsuite/gdc.dg/torture/simd_load.d new file mode 100644 index 0000000..188ffda --- /dev/null +++ b/gcc/testsuite/gdc.dg/torture/simd_load.d @@ -0,0 +1,52 @@ +// { dg-additional-options "-mavx" { target avx_runtime } } +// { dg-skip-if "needs gcc/config.d" { ! d_runtime } } +import gcc.simd; + +void main() +{ + ubyte[32] data; + foreach (i; 0..data.length) + { + data[i] = cast(ubyte)i; + } + + // to test all alignments from 1 ~ 16 + foreach (i; 0..16) + { + ubyte* d = &data[i]; + + void test(T)() + { + // load the data + T v = loadUnaligned(cast(T*)d); + + // check that the data was loaded correctly + ubyte* ptrToV = cast(ubyte*)&v; + foreach (j; 0..T.sizeof) + assert(ptrToV[j] == d[j]); + } + + static if (__traits(compiles, __vector(void[16]))) + test!(__vector(void[16]))(); + static if (__traits(compiles, __vector(byte[16]))) + test!(__vector(byte[16]))(); + static if (__traits(compiles, __vector(ubyte[16]))) + test!(__vector(ubyte[16]))(); + static if (__traits(compiles, __vector(short[8]))) + test!(__vector(short[8]))(); + static if (__traits(compiles, __vector(ushort[8]))) + test!(__vector(ushort[8]))(); + static if (__traits(compiles, __vector(int[4]))) + test!(__vector(int[4]))(); + static if (__traits(compiles, __vector(uint[4]))) + test!(__vector(uint[4]))(); + static if (__traits(compiles, __vector(long[2]))) + test!(__vector(long[2]))(); + static if (__traits(compiles, __vector(ulong[2]))) + test!(__vector(ulong[2]))(); + static if (__traits(compiles, __vector(double[2]))) + test!(__vector(double[2]))(); + static if (__traits(compiles, __vector(float[4]))) + test!(__vector(float[4]))(); + } +} diff --git a/gcc/testsuite/gdc.dg/torture/simd_logical.d b/gcc/testsuite/gdc.dg/torture/simd_logical.d new file mode 100644 index 0000000..e9c23f5 --- /dev/null +++ b/gcc/testsuite/gdc.dg/torture/simd_logical.d @@ -0,0 +1,19 @@ +// { dg-skip-if "needs gcc/config.d" { ! d_runtime } } + +import gcc.simd; + +void main() +{ + static if (__traits(compiles, __vector(int[4]))) + { + __gshared __vector(int[4]) a = [1,0,-1,2]; + + assert(notMask(a).array == [0,-1,0,0]); + + assert(andAndMask(a, 1).array == [-1,0,-1,-1]); + assert(andAndMask(a, 0).array == [0,0,0,0]); + + assert(orOrMask(a, 1).array == [-1,-1,-1,-1]); + assert(orOrMask(a, 0).array == [-1,0,-1,-1]); + } +} diff --git a/gcc/testsuite/gdc.dg/torture/simd_shuffle.d b/gcc/testsuite/gdc.dg/torture/simd_shuffle.d new file mode 100644 index 0000000..3629cee --- /dev/null +++ b/gcc/testsuite/gdc.dg/torture/simd_shuffle.d @@ -0,0 +1,454 @@ +// { dg-additional-options "-mavx" { target avx_runtime } } +// { dg-skip-if "needs gcc/config.d" { ! d_runtime } } +import gcc.simd; + +void testshuffle(V, VI = V)() +{ + alias E = typeof(V.array[0]); + enum numElements = V.sizeof / E.sizeof; + + static if (numElements == 16) + { + // Test fragment for vectors with 16 elements + immutable V[5] in1 = + [[ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ], + [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ], + [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ], + [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ], + [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ]]; + + immutable VI[5] mask1 = + [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ], + [ 0x10, 0x21, 0x32, 0x43, 0x54, 0x65, 0x76, 0x87, + 0x98, 0xa9, 0xba, 0xcb, 0xdc, 0xed, 0xfe, 0xff ] , + [ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ], + [ 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 ], + [ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 ]]; + + immutable V[5] out1 = + [[ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ], + [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ], + [ 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 ], + [ 1, 3, 5, 7, 9, 11, 13, 15, 2, 4, 6, 8, 10, 12, 14, 16 ], + [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ]]; + + immutable V[5] in2 = + [[ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ], + [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ], + [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ], + [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ], + [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ]]; + + immutable V in3 = + [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45 ]; + + immutable VI[5] mask2 = + [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], + [ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ], + [ 7, 6, 5, 4, 16, 17, 18, 19, 31, 30, 29, 28, 3, 2, 1, 0 ], + [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 ]]; + + immutable V[5] out2 = + [[ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ], + [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45 ], + [ 17, 16, 15, 14, 30, 31, 32, 33, 45, 44, 43, 42, 13, 12, 11, 10 ], + [ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 ], + [ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45 ]]; + } + else static if (numElements == 8) + { + // Test fragment for vectors with 8 elements + static if (is(E == uint)) + { + enum E A1 = 0x11121314; + enum E B1 = 0x21222324; + enum E C1 = 0x31323334; + enum E D1 = 0x41424344; + enum E E1 = 0x51525354; + enum E F1 = 0x61626364; + enum E G1 = 0x71727374; + enum E H1 = 0x81828384; + + enum E A2 = 0x91929394; + enum E B2 = 0xa1a2a3a4; + enum E C2 = 0xb1b2b3b4; + enum E D2 = 0xc1c2c3c4; + enum E E2 = 0xd1d2d3d4; + enum E F2 = 0xe1e2e3e4; + enum E G2 = 0xf1f2f3f4; + enum E H2 = 0x01020304; + } + else static if (is(E == ushort)) + { + enum E A1 = 0x1112; + enum E B1 = 0x2122; + enum E C1 = 0x3132; + enum E D1 = 0x4142; + enum E E1 = 0x5152; + enum E F1 = 0x6162; + enum E G1 = 0x7172; + enum E H1 = 0x8182; + + enum E A2 = 0x9192; + enum E B2 = 0xa1a2; + enum E C2 = 0xb1b2; + enum E D2 = 0xc1c2; + enum E E2 = 0xd1d2; + enum E F2 = 0xe1e2; + enum E G2 = 0xf1f2; + enum E H2 = 0x0102; + } + else static if (is(E == ubyte)) + { + enum E A1 = 0x11; + enum E B1 = 0x12; + enum E C1 = 0x13; + enum E D1 = 0x14; + enum E E1 = 0x15; + enum E F1 = 0x16; + enum E G1 = 0x17; + enum E H1 = 0x18; + + enum E A2 = 0xf1; + enum E B2 = 0xf2; + enum E C2 = 0xf3; + enum E D2 = 0xf4; + enum E E2 = 0xf5; + enum E F2 = 0xf6; + enum E G2 = 0xf7; + enum E H2 = 0xf8; + } + else + enum unsupported = true; + + static if (!__traits(compiles, unsupported)) + { + immutable V[8] in1 = + [[ A1, B1, C1, D1, E1, F1, G1, H1 ], + [ A1, B1, C1, D1, E1, F1, G1, H1 ], + [ A1, B1, C1, D1, E1, F1, G1, H1 ], + [ A1, B1, C1, D1, E1, F1, G1, H1 ], + [ A1, B1, C1, D1, E1, F1, G1, H1 ], + [ A2, B2, C2, D2, E2, F2, G2, H2 ], + [ A2, B2, C2, D2, E2, F2, G2, H2 ], + [ A2, B2, C2, D2, E2, F2, G2, H2 ]]; + + immutable VI[8] mask1 = + [[ 0, 1, 2, 3, 4, 5, 6, 7 ], + [ 0x10, 0x21, 0x32, 0x43, 0x54, 0x65, 0x76, 0x87 ], + [ 7, 6, 5, 4, 3, 2, 1, 0 ], + [ 7, 0, 5, 3, 2, 4, 1, 6 ], + [ 0, 2, 1, 3, 4, 6, 5, 7 ], + [ 3, 1, 2, 0, 7, 5, 6, 4 ], + [ 0, 0, 0, 0 ], + [ 1, 6, 1, 6, 1, 6, 1, 6 ]]; + + immutable V[8] out1 = + [[ A1, B1, C1, D1, E1, F1, G1, H1 ], + [ A1, B1, C1, D1, E1, F1, G1, H1 ], + [ H1, G1, F1, E1, D1, C1, B1, A1 ], + [ H1, A1, F1, D1, C1, E1, B1, G1 ], + [ A1, C1, B1, D1, E1, G1, F1, H1 ], + [ D2, B2, C2, A2, H2, F2, G2, E2 ], + [ A2, A2, A2, A2, A2, A2, A2, A2 ], + [ B2, G2, B2, G2, B2, G2, B2, G2 ]]; + + immutable V[6] in2 = + [[ A1, B1, C1, D1, E1, F1, G1, H1 ], + [ A1, B1, C1, D1, E1, F1, G1, H1 ], + [ A1, B1, C1, D1, E1, F1, G1, H1 ], + [ A1, B1, C1, D1, E1, F1, G1, H1 ], + [ A1, B1, C1, D1, E1, F1, G1, H1 ], + [ A1, B1, C1, D1, E1, F1, G1, H1 ]]; + + + immutable V in3 = + [ A2, B2, C2, D2, E2, F2, G2, H2 ]; + + immutable VI[6] mask2 = + [[ 0, 1, 2, 3, 4, 5, 6, 7 ], + [ 8, 9, 10, 11, 12, 13, 14, 15 ], + [ 0, 8, 1, 9, 2, 10, 3, 11 ], + [ 0, 15, 4, 11, 12, 3, 7, 8 ], + [ 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 0x1e, 0x2e, 0x3e, 0x4e, 0x5e, 0x6e, 0x7e, 0x8e ]]; + + immutable V[6] out2 = + [[ A1, B1, C1, D1, E1, F1, G1, H1 ], + [ A2, B2, C2, D2, E2, F2, G2, H2 ], + [ A1, A2, B1, B2, C1, C2, D1, D2 ], + [ A1, H2, E1, D2, E2, D1, H1, A2 ], + [ A1, A1, A1, A1, A1, A1, A1, A1 ], + [ G2, G2, G2, G2, G2, G2, G2, G2 ]]; + } + } + else static if (numElements == 4) + { + // Test fragment for vectors with 4 elements + static if (is(E == double)) + { + enum E A = 0.69314718055994530942; + enum E B = 2.7182818284590452354; + enum E C = 2.30258509299404568402; + enum E D = 1.4426950408889634074; + + enum E W = 0.31830988618379067154; + enum E X = 3.14159265358979323846; + enum E Y = 1.41421356237309504880; + enum E Z = 0.70710678118654752440; + } + else static if (is(E == float)) + { + enum E A = 0.69314718055994530942f; + enum E B = 2.7182818284590452354f; + enum E C = 2.30258509299404568402f; + enum E D = 1.4426950408889634074f; + + enum E W = 0.31830988618379067154f; + enum E X = 3.14159265358979323846f; + enum E Y = 1.41421356237309504880f; + enum E Z = 0.70710678118654752440f; + } + else static if (is(E == ulong)) + { + enum E A = 0x1112131415161718; + enum E B = 0x2122232425262728; + enum E C = 0x3132333435363738; + enum E D = 0x4142434445464748; + + enum E W = 0xc1c2c3c4c5c6c7c8; + enum E X = 0xd1d2d3d4d5d6d7d8; + enum E Y = 0xe1e2e3e4e5e6e7e8; + enum E Z = 0xf1f2f3f4f5f6f7f8; + } + else static if (is(E == uint)) + { + enum E A = 0x11121314; + enum E B = 0x21222324; + enum E C = 0x31323334; + enum E D = 0x41424344; + + enum E W = 0xc1c2c3c4; + enum E X = 0xd1d2d3d4; + enum E Y = 0xe1e2e3e4; + enum E Z = 0xf1f2f3f4; + } + else + enum unsupported = true; + + static if (!__traits(compiles, unsupported)) + { + immutable V[8] in1 = + [[ A, B, C, D ], + [ A, B, C, D ], + [ A, B, C, D ], + [ A, B, C, D ], + [ A, B, C, D ], + [ W, X, Y, Z ], + [ W, X, Y, Z ], + [ W, X, Y, Z ]]; + + immutable VI[8] mask1 = + [[ 0, 1, 2, 3 ], + [ 0+1*4, 1+2*4, 2+3*4, 3+4*4 ], + [ 3, 2, 1, 0 ], + [ 0, 3, 2, 1 ], + [ 0, 2, 1, 3 ], + [ 3, 1, 2, 0 ], + [ 0, 0, 0, 0 ], + [ 1, 2, 1, 2 ]]; + + immutable V[8] out1 = + [[ A, B, C, D ], + [ A, B, C, D ], + [ D, C, B, A ], + [ A, D, C, B ], + [ A, C, B, D ], + [ Z, X, Y, W ], + [ W, W, W, W ], + [ X, Y, X, Y ]]; + + + immutable V[6] in2 = + [[ A, B, C, D ], + [ A, B, C, D ], + [ A, B, C, D ], + [ A, B, C, D ], + [ A, B, C, D ], + [ A, B, C, D ]]; + + immutable V in3 = [ W, X, Y, Z ]; + + immutable VI[6] mask2 = + [[ 0, 1, 2, 3 ], + [ 4, 5, 6, 7 ], + [ 0, 4, 1, 5 ], + [ 0, 7, 4, 3 ], + [ 0, 0, 0, 0 ], + [ 7, 7, 7, 7 ]]; + + immutable V[6] out2 = + [[ A, B, C, D ], + [ W, X, Y, Z ], + [ A, W, B, X ], + [ A, Z, W, D ], + [ A, A, A, A ], + [ Z, Z, Z, Z ]]; + } + } + else static if (numElements == 2) + { + // Test fragment for vectors with 2 elements + static if (is(E == double)) + { + enum E A = 0.69314718055994530942; + enum E B = 2.7182818284590452354; + + enum E X = 3.14159265358979323846; + enum E Y = 1.41421356237309504880; + } + else static if (is(E == float)) + { + enum E A = 0.69314718055994530942f; + enum E B = 2.7182818284590452354f; + + enum E X = 3.14159265358979323846f; + enum E Y = 1.41421356237309504880f; + } + else static if (is(E == ulong)) + { + enum E A = 0x1112131415161718; + enum E B = 0x2122232425262728; + + enum E X = 0xc1c2c3c4c5c6c7c8; + enum E Y = 0xd1d2d3d4d5d6d7d8; + } + else static if (is(E == uint)) + { + enum E A = 0x11121314; + enum E B = 0x21222324; + + enum E X = 0xd1d2d3d4; + enum E Y = 0xe1e2e3e4; + } + else + enum unsupported = true; + + static if (!__traits(compiles, unsupported)) + { + immutable V[6] in1 = + [[ A, B ], + [ A, B ], + [ A, B ], + [ A, B ], + [ X, Y ], + [ X, Y ]]; + + immutable VI[6] mask1 = + [[ 0, 1 ], + [ -16, 1 ], + [ 1, 0 ], + [ 0, 0 ], + [ 1, 1 ], + [ 1, 0 ]]; + + immutable V[6] out1 = + [[ A, B ], + [ A, B ], + [ B, A ], + [ A, A ], + [ Y, Y ], + [ Y, X ]]; + + immutable V[7] in2 = + [[ A, B ], + [ A, B ], + [ A, B ], + [ A, B ], + [ A, B ], + [ A, B ], + [ A, B ]]; + + immutable V in3 = [ X, Y ]; + + immutable VI[7] mask2 = + [[ 0, 1 ], + [ 2, 3 ], + [ 0, 2 ], + [ 2, 1 ], + [ 3, 0 ], + [ 0, 0 ], + [ 3, 3 ]]; + + immutable V[7] out2 = + [[ A, B ], + [ X, Y ], + [ A, X ], + [ X, B ], + [ Y, A ], + [ A, A ], + [ Y, Y ]]; + } + } + else + enum unsupported = true; + + static if (!__traits(compiles, unsupported)) + { + static foreach (i; 0 .. in1.length) + assert(shuffle(in1[i], mask1[i]).array == out1[i].array); + static foreach (i; 0 .. in2.length) + assert(shuffle(in2[i], in3, mask2[i]).array == out2[i].array); + } +} + +void main() +{ + static if (__traits(compiles, __vector(ubyte[16]))) + testshuffle!(__vector(ubyte[16]))(); + + static if (__traits(compiles, __vector(ushort[16]))) + testshuffle!(__vector(ushort[16]))(); + + static if (__traits(compiles, __vector(ubyte[8]))) + testshuffle!(__vector(ubyte[8]))(); + + static if (__traits(compiles, __vector(ushort[8]))) + testshuffle!(__vector(ushort[8]))(); + + static if (__traits(compiles, __vector(uint[8]))) + testshuffle!(__vector(uint[8]))(); + + static if (__traits(compiles, __vector(ulong[4]))) + { + testshuffle!(__vector(ulong[4])); + + static if (__traits(compiles, __vector(double[4]))) + testshuffle!(__vector(double[4]), __vector(ulong[4])); + } + + static if (__traits(compiles, __vector(uint[4]))) + { + testshuffle!(__vector(uint[4])); + + static if (__traits(compiles, __vector(float[4]))) + testshuffle!(__vector(float[4]), __vector(uint[4])); + } + + static if (__traits(compiles, __vector(ulong[2]))) + { + testshuffle!(__vector(ulong[2])); + + static if (__traits(compiles, __vector(double[2]))) + testshuffle!(__vector(double[2]), __vector(ulong[2])); + } + + static if (__traits(compiles, __vector(uint[2]))) + { + testshuffle!(__vector(uint[2])); + + static if (__traits(compiles, __vector(float[2]))) + testshuffle!(__vector(float[2]), __vector(uint[2])); + } +} diff --git a/gcc/testsuite/gdc.dg/torture/simd_shufflevector.d b/gcc/testsuite/gdc.dg/torture/simd_shufflevector.d new file mode 100644 index 0000000..cc55999 --- /dev/null +++ b/gcc/testsuite/gdc.dg/torture/simd_shufflevector.d @@ -0,0 +1,55 @@ +// { dg-additional-options "-mavx" { target avx_runtime } } +// { dg-skip-if "needs gcc/config.d" { ! d_runtime } } +import gcc.simd; +import gcc.attributes; + +void main() +{ + static if (__traits(compiles, __vector(int[4]))) + alias int4 = __vector(int[4]); + static if (__traits(compiles, __vector(int[8]))) + alias int8 = __vector(int[8]); + + static if (__traits(compiles, int4) && __traits(compiles, int8)) + { + __gshared int4[5] res; + __gshared int4 a; + __gshared int4 b; + __gshared int8[3] res8; + __gshared int8 a8; + __gshared int8 b8; + + @noipa static void foo() + { + res[0] = shufflevector(a, b, 0, 1, 4, 5); + res[1] = shufflevector(a, b, 0, 1, 2, 5); + res8[0] = shufflevector(a, b, 0, 1, 2, 2 + 1, 4, 5, 6, 7); + res[2] = shufflevector(a8, b8, 0, 8, 1, 9); + res[3] = shufflevector(a8, b, 0, 8, 1, 9); + res[4] = shufflevector(a, b8, 0, 4, 1, 5); + res8[1] = shufflevector(a8, b, 0, 8, 1, 9, 10, 11, 2, 3); + res8[2] = shufflevector(a, b8, 0, 4, 1, 5, 4, 5, 6, 7); + } + + a = [0, 1, 2, 3]; + b = [4, 5, 6, 7]; + a8 = [0, 1, 2, 3, 4, 5, 6, 7]; + b8 = [8, 9, 10, 11, 12, 13, 14, 15]; + foo(); + assert(res[0].array == [0, 1, 4, 5]); + + res[1][2] = 9; + assert(res[1].array == [0, 1, 9, 5]); + assert(res8[0].array == [0, 1, 2, 3, 4, 5, 6, 7]); + assert(res[2].array == [0, 8, 1, 9]); + assert(res[3].array == [0, 4, 1, 5]); + assert(res[4].array == [0, 8, 1, 9]); + assert(res8[1].array == [0, 4, 1, 5, 6, 7, 2, 3]); + + res8[2][4] = 42; + res8[2][5] = 42; + res8[2][6] = 42; + res8[2][7] = 42; + assert(res8[2].array == [0, 8, 1, 9, 42, 42, 42, 42]); + } +} diff --git a/gcc/testsuite/gdc.dg/torture/simd_store.d b/gcc/testsuite/gdc.dg/torture/simd_store.d new file mode 100644 index 0000000..b96ed42 --- /dev/null +++ b/gcc/testsuite/gdc.dg/torture/simd_store.d @@ -0,0 +1,54 @@ +// { dg-additional-options "-mavx" { target avx_runtime } } +// { dg-skip-if "needs gcc/config.d" { ! d_runtime } } +import gcc.simd; + +void main() +{ + ubyte[32] data; + + // to test all alignments from 1 ~ 16 + foreach (i; 0..16) + { + ubyte* d = &data[i]; + + void test(T)() + { + T v; + + // populate v` with data + ubyte* ptrToV = cast(ubyte*)&v; + foreach (j; 0..T.sizeof) + ptrToV[j] = cast(ubyte)j; + + // store `v` to location pointed to by `d` + storeUnaligned(cast(T*)d, v); + + // check that the the data was stored correctly + foreach (j; 0..T.sizeof) + assert(ptrToV[j] == d[j]); + } + + static if (__traits(compiles, __vector(void[16]))) + test!(__vector(void[16]))(); + static if (__traits(compiles, __vector(byte[16]))) + test!(__vector(byte[16]))(); + static if (__traits(compiles, __vector(ubyte[16]))) + test!(__vector(ubyte[16]))(); + static if (__traits(compiles, __vector(short[8]))) + test!(__vector(short[8]))(); + static if (__traits(compiles, __vector(ushort[8]))) + test!(__vector(ushort[8]))(); + static if (__traits(compiles, __vector(int[4]))) + test!(__vector(int[4]))(); + static if (__traits(compiles, __vector(uint[4]))) + test!(__vector(uint[4]))(); + static if (__traits(compiles, __vector(long[2]))) + test!(__vector(long[2]))(); + static if (__traits(compiles, __vector(ulong[2]))) + test!(__vector(ulong[2]))(); + static if (__traits(compiles, __vector(double[2]))) + test!(__vector(double[2]))(); + static if (__traits(compiles, __vector(float[4]))) + test!(__vector(float[4]))(); + } +} diff --git a/libphobos/libdruntime/Makefile.am b/libphobos/libdruntime/Makefile.am index d963aa9..56b332d 100644 --- a/libphobos/libdruntime/Makefile.am +++ b/libphobos/libdruntime/Makefile.am @@ -207,14 +207,14 @@ DRUNTIME_DSOURCES = core/atomic.d core/attribute.d core/bitop.d \ core/vararg.d core/volatile.d gcc/attribute.d gcc/attributes.d \ gcc/backtrace.d gcc/builtins.d gcc/deh.d gcc/emutls.d gcc/gthread.d \ gcc/sections/common.d gcc/sections/elf.d gcc/sections/macho.d \ - gcc/sections/package.d gcc/sections/pecoff.d gcc/unwind/arm.d \ - gcc/unwind/arm_common.d gcc/unwind/c6x.d gcc/unwind/generic.d \ - gcc/unwind/package.d gcc/unwind/pe.d object.d rt/aApply.d rt/aApplyR.d \ - rt/aaA.d rt/adi.d rt/arrayassign.d rt/arraycat.d rt/cast_.d \ - rt/config.d rt/critical_.d rt/deh.d rt/dmain2.d rt/ehalloc.d \ - rt/invariant.d rt/lifetime.d rt/memory.d rt/minfo.d rt/monitor_.d \ - rt/profilegc.d rt/sections.d rt/tlsgc.d rt/util/typeinfo.d \ - rt/util/utility.d + gcc/sections/package.d gcc/sections/pecoff.d gcc/simd.d \ + gcc/unwind/arm.d gcc/unwind/arm_common.d gcc/unwind/c6x.d \ + gcc/unwind/generic.d gcc/unwind/package.d gcc/unwind/pe.d object.d \ + rt/aApply.d rt/aApplyR.d rt/aaA.d rt/adi.d rt/arrayassign.d \ + rt/arraycat.d rt/cast_.d rt/config.d rt/critical_.d rt/deh.d \ + rt/dmain2.d rt/ehalloc.d rt/invariant.d rt/lifetime.d rt/memory.d \ + rt/minfo.d rt/monitor_.d rt/profilegc.d rt/sections.d rt/tlsgc.d \ + rt/util/typeinfo.d rt/util/utility.d DRUNTIME_DSOURCES_STDCXX = core/stdcpp/allocator.d core/stdcpp/array.d \ core/stdcpp/exception.d core/stdcpp/memory.d core/stdcpp/new_.d \ diff --git a/libphobos/libdruntime/Makefile.in b/libphobos/libdruntime/Makefile.in index 45e086a..24865fb 100644 --- a/libphobos/libdruntime/Makefile.in +++ b/libphobos/libdruntime/Makefile.in @@ -236,7 +236,7 @@ am__objects_1 = core/atomic.lo core/attribute.lo core/bitop.lo \ gcc/backtrace.lo gcc/builtins.lo gcc/deh.lo gcc/emutls.lo \ gcc/gthread.lo gcc/sections/common.lo gcc/sections/elf.lo \ gcc/sections/macho.lo gcc/sections/package.lo \ - gcc/sections/pecoff.lo gcc/unwind/arm.lo \ + gcc/sections/pecoff.lo gcc/simd.lo gcc/unwind/arm.lo \ gcc/unwind/arm_common.lo gcc/unwind/c6x.lo \ gcc/unwind/generic.lo gcc/unwind/package.lo gcc/unwind/pe.lo \ object.lo rt/aApply.lo rt/aApplyR.lo rt/aaA.lo rt/adi.lo \ @@ -874,14 +874,14 @@ DRUNTIME_DSOURCES = core/atomic.d core/attribute.d core/bitop.d \ core/vararg.d core/volatile.d gcc/attribute.d gcc/attributes.d \ gcc/backtrace.d gcc/builtins.d gcc/deh.d gcc/emutls.d gcc/gthread.d \ gcc/sections/common.d gcc/sections/elf.d gcc/sections/macho.d \ - gcc/sections/package.d gcc/sections/pecoff.d gcc/unwind/arm.d \ - gcc/unwind/arm_common.d gcc/unwind/c6x.d gcc/unwind/generic.d \ - gcc/unwind/package.d gcc/unwind/pe.d object.d rt/aApply.d rt/aApplyR.d \ - rt/aaA.d rt/adi.d rt/arrayassign.d rt/arraycat.d rt/cast_.d \ - rt/config.d rt/critical_.d rt/deh.d rt/dmain2.d rt/ehalloc.d \ - rt/invariant.d rt/lifetime.d rt/memory.d rt/minfo.d rt/monitor_.d \ - rt/profilegc.d rt/sections.d rt/tlsgc.d rt/util/typeinfo.d \ - rt/util/utility.d + gcc/sections/package.d gcc/sections/pecoff.d gcc/simd.d \ + gcc/unwind/arm.d gcc/unwind/arm_common.d gcc/unwind/c6x.d \ + gcc/unwind/generic.d gcc/unwind/package.d gcc/unwind/pe.d object.d \ + rt/aApply.d rt/aApplyR.d rt/aaA.d rt/adi.d rt/arrayassign.d \ + rt/arraycat.d rt/cast_.d rt/config.d rt/critical_.d rt/deh.d \ + rt/dmain2.d rt/ehalloc.d rt/invariant.d rt/lifetime.d rt/memory.d \ + rt/minfo.d rt/monitor_.d rt/profilegc.d rt/sections.d rt/tlsgc.d \ + rt/util/typeinfo.d rt/util/utility.d DRUNTIME_DSOURCES_STDCXX = core/stdcpp/allocator.d core/stdcpp/array.d \ core/stdcpp/exception.d core/stdcpp/memory.d core/stdcpp/new_.d \ @@ -1340,6 +1340,7 @@ gcc/sections/elf.lo: gcc/sections/$(am__dirstamp) gcc/sections/macho.lo: gcc/sections/$(am__dirstamp) gcc/sections/package.lo: gcc/sections/$(am__dirstamp) gcc/sections/pecoff.lo: gcc/sections/$(am__dirstamp) +gcc/simd.lo: gcc/$(am__dirstamp) gcc/unwind/$(am__dirstamp): @$(MKDIR_P) gcc/unwind @: > gcc/unwind/$(am__dirstamp) diff --git a/libphobos/libdruntime/gcc/simd.d b/libphobos/libdruntime/gcc/simd.d new file mode 100644 index 0000000..ffca50f --- /dev/null +++ b/libphobos/libdruntime/gcc/simd.d @@ -0,0 +1,359 @@ +// GNU D Compiler SIMD support functions and intrinsics. +// Copyright (C) 2022 Free Software Foundation, Inc. + +// GCC is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3, or (at your option) any later +// version. + +// GCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +module gcc.simd; + +pure: +nothrow: +@safe: +@nogc: +pragma(inline, true): + +/** +* Emit prefetch instruction. +* Params: +* address = address to be prefetched +* writeFetch = true for write fetch, false for read fetch +* locality = 0..3 (0 meaning least local, 3 meaning most local) +*/ +void prefetch(bool writeFetch, ubyte locality)(const(void)* address) +{ + static assert(locality < 4, "0..3 expected for locality"); + import gcc.builtins : __builtin_prefetch; + __builtin_prefetch(address, writeFetch, locality); +} + +/** + * Load unaligned vector from address. + * This is a compiler intrinsic. + * Params: + * p = pointer to vector + * Returns: + * vector + */ +V loadUnaligned(V)(const V* p) if (isVectorType!V); + +/** + * Store vector to unaligned address. + * This is a compiler intrinsic. + * Params: + * p = pointer to vector + * value = value to store + * Returns: + * value + */ +V storeUnaligned(V)(V* p, V value) if (isVectorType!V); + +/** + * Construct a permutation of elements from one or two vectors, returning a + * vector of the same type as the input vector(s). The `mask` is an integral + * vector with the same width and element count as the output vector. + * Params: + * op1 = input vector + * op2 = input vector + * mask = integer vector mask + * Returns: + * vector with the same type as `op1` and `op2` + * Example: + * --- + * int4 a = [1, 2, 3, 4]; + * int4 b = [5, 6, 7, 8]; + * int4 mask1 = [0, 1, 1, 3]; + * int4 mask2 = [0, 4, 2, 5]; + * assert(shuffle(a, mask1).array == [1, 2, 2, 4]); + * assert(shuffle(a, b, mask2).array == [1, 5, 3, 6]); + * --- + */ +template shuffle(V0, V1, M) +{ + static assert(isVectorType!V0, "first argument must be vector"); + static assert(isVectorType!V1, "second argument must be vector"); + static assert(is(BaseType!V0 == BaseType!V1), + "first and second argument vectors must have the same element type"); + static assert(isVectorType!M && is(BaseType!M : long), + "last argument must be an integer vector"); + static assert(numElements!V0 == numElements!M && numElements!V1 == numElements!M, + "argument vectors and mask vector should have the same number of elements"); + static assert(BaseType!V0.sizeof == BaseType!M.sizeof, + "argument vectors and mask vector should have the same element type size"); + + V0 shuffle(V0 op1, V1 op2, M mask); +} + +/// Ditto +template shuffle(V, M) +{ + static assert(isVectorType!V, "first argument must be a vector"); + static assert(isVectorType!M && is(BaseType!M : long), + "last argument must be an integer vector"); + static assert(numElements!V == numElements!M, + "argument vector and mask vector should have the same number of elements"); + static assert(BaseType!V.sizeof == BaseType!M.sizeof, + "argument vector and mask vector should have the same element type size"); + + V shuffle(V op1, M mask) + { + return shuffle(op1, op1, mask); + } +} + +/** + * Construct a permutation of elements from two vectors, returning a vector with + * the same element type as the input vector(s), and same length as the `mask`. + * Params: + * op1 = input vector + * op2 = input vector + * index = elements indices of the vectors that should be extracted and returned + * Returns: + * vector with the same element type as `op1` and `op2`, but has an element count + * equal to the number of indices in `index`. + * Example: + * --- + * int8 a = [1, -2, 3, -4, 5, -6, 7, -8]; + * int4 b = shufflevector(a, a, 0, 2, 4, 6); + * assert(b.array == [1, 3, 5, 7]); + * int4 c = [-2, -4, -6, -8]; + * int d = shufflevector(c, b, 4, 0, 5, 1, 6, 2, 7, 3); + * assert(d.array == a.array); + * --- + */ +template shufflevector(V1, V2, M...) +{ + static assert(isVectorType!V1, "first argument must be vector"); + static assert(isVectorType!V2, "second argument must be vector"); + static assert(is(BaseType!V1 == BaseType!V2), + "first and second argument vectors must have the same element type"); + static assert(isPowerOf2!(M.length), + "number of index arguments must be a power of 2"); + + __vector(BaseType!V1[M.length]) shufflevector(V1 op1, V2 op2, M index); +} + +/// Ditto +template shufflevector(V, index...) +{ + // Defined for compatibility with LDC. + static assert(isVectorType!V, "first argument must be a vector type"); + static assert(numElements!V == index.length, + "number of index arguments must be the same number of vector elements"); + + private template ctfeConstants(m...) + { + static if (m.length == 0) enum ctfeConstants = 1; + else enum ctfeConstants = m[0] | ctfeConstants!(m[1 .. $]); + } + static assert(__traits(compiles, ctfeConstants!index), + "all index arguments must be compile time constants"); + + private template validIndexes(m...) + { + static if (m.length == 0) enum validIndexes = true; + else enum validIndexes = (cast(long)m[0] > -1) && validIndexes!(m[1 .. $]); + } + static assert(validIndexes!index, + "all index arguments must be greater than or equal to 0"); + + V shufflevector(V op1, V op2) + { + return shufflevector(op1, op2, index); + } +} + +/** + * Extracts a single scalar element from a vector at a specified index. + * Defined for compatibility with LDC. + * Params: + * val = vector to extract element from + * idx = index indicating the position from which to extract the element + * Returns: + * scalar of the same type as the element type of val + * Example: + * --- + * int4 a = [0, 10, 20, 30]; + * int k = extractelement!(int4, 2)(a); + * assert(k == 20); + * --- + */ +BaseType!V extractelement(V, int idx)(V val) + if (isVectorType!V && idx < numElements!V) +{ + return val[idx]; +} + +/** + * Inserts a scalar element into a vector at a specified index. + * Defined for compatibility with LDC. + * Params: + * val = vector to assign element to + * elt = scalar whose type is the element type of val + * idx = index indicating the position from which to extract the element + * Returns: + * vector of the same type as val + * Example: + * --- + * int4 a = [0, 10, 20, 30]; + * int4 b = insertelement!(int4, 2)(a, 50); + * assert(b.array == [0, 10, 50, 30]); + * --- + */ +V insertelement(V, int idx)(V val, BaseType!V elt) + if (isVectorType!V && idx < numElements!V) +{ + val[idx] = elt; + return val; +} + +/** + * Convert a vector from one integral or floating vector type to another. + * The result is an integral or floating vector that has had every element + * cast to the element type of the return type. + * Params: + * from = input vector + * Returns: + * converted vector + * Example: + * --- + * int4 a = [1, -2, 3, -4]; + * float4 b = [1.5, -2.5, 3, 7]; + * assert(convertvector!float4(a).array == [1, -2, 3, -4]); + * assert(convertvector!double4(a).array == [1, -2, 3, -4]); + * assert(convertvector!double4(b).array == [1.5, -2.5, 3, 7]); + * assert(convertvector!int4(b).array == [1, -2, 3, 7]); + * --- + */ + +template convertvector(V, T) +{ + static assert(isVectorType!V && (is(BaseType!V : long) || is(BaseType!V : real)), + "first argument must be an integer or floating vector type"); + static assert(isVectorType!T && (is(BaseType!T : long) || is(BaseType!T : real)), + "second argument must be an integer or floating vector"); + static assert(numElements!V == numElements!T, + "first and second argument vectors should have the same number of elements"); + + V convertvector(T); +} + +/** + * Construct a conditional merge of elements from two vectors, returning a + * vector of the same type as the input vector(s). The `mask` is an integral + * vector with the same width and element count as the output vector. + * Params: + * op1 = input vector + * op2 = input vector + * mask = integer vector mask + * Returns: + * vector with the same type as `op1` and `op2` + * Example: + * --- + * int4 a = [1, 2, 3, 4]; + * int4 b = [5, 6, 7, 8]; + * int4 mask1 = [0, 1, 1, 3]; + * int4 mask2 = [0, 4, 2, 5]; + * assert(shuffle(a, mask1).array == [1, 2, 2, 4]); + * assert(shuffle(a, b, mask2).array == [1, 5, 3, 6]); + * --- + */ +template blendvector(V0, V1, M) +{ + static assert(isVectorType!V0, "first argument must be vector"); + static assert(isVectorType!V1, "second argument must be vector"); + static assert(is(BaseType!V0 == BaseType!V1), + "first and second argument vectors must have the same element type"); + static assert(isVectorType!M && is(BaseType!M : long), + "last argument must be an integer vector"); + static assert(numElements!V0 == numElements!M && numElements!V1 == numElements!M, + "argument vectors and mask vector should have the same number of elements"); + static assert(BaseType!V0.sizeof == BaseType!M.sizeof, + "argument vectors and mask vector should have the same element type size"); + + V0 blendvector(V0 op1, V1 op2, M mask); +} + +/** + * Perform an element-wise comparison between two vectors, producing `0` when + * the comparison is false and `-1` (all bits are set to 1) otherwise. + * Params: + * op1 = input vector + * op2 = input vector + * Returns: + * vector of the same width and number of elements as the comparison + * operands with a signed integral element type + * Example: + * --- + * float4 a = [1, 3, 5, 7]; + * float4 b = [2, 3, 4, 5]; + * int4 c = greaterMask!float4(a, b); + * assert(c.array == [0, 0, -1, -1]); + * --- + */ +V equalMask(V)(V op1, V op2) if (isVectorType!V); +/// Ditto +V notEqualMask(V)(V op1, V op2) if (isVectorType!V); +/// Ditto +V greaterMask(V)(V op1, V op2) if (isVectorType!V); +/// Ditto +V greaterOrEqualMask(V)(V op1, V op2) if (isVectorType!V); + +/** + * Perform an element-wise logical comparison between two vectors, producing + * `0` when the comparison is false and `-1` (all bits are set to 1) otherwise. + * Params: + * op1 = input vector + * op2 = input vector + * Returns: + * vector of the same width and number of elements as the comparison + * operands with a signed integral element type + */ +V notMask(V)(V op1) if (isVectorType!V) +{ + return equalMask(op1, 0); +} + +/// Ditto +V andAndMask(V)(V op1, V op2) if (isVectorType!V) +{ + return notEqualMask(op1, 0) & notEqualMask(op2, 0); +} + +/// Ditto +V orOrMask(V)(V op1, V op2) if (isVectorType!V) +{ + return notEqualMask(op1, 0) | notEqualMask(op2, 0); +} + +// Private helper templates. +private: + +enum bool isVectorType(T) = is(T : __vector(V[N]), V, size_t N); + +template BaseType(V) +{ + alias typeof(V.array[0]) BaseType; +} + +template numElements(V) +{ + enum numElements = V.sizeof / BaseType!(V).sizeof; +} + +enum bool isPowerOf2(int Y) = Y && (Y & -Y) == Y; |