aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarl Love <cel@us.ibm.com>2020-06-15 17:44:19 -0500
committerCarl Love <carll@us.ibm.com>2020-08-04 23:12:04 -0500
commit3f029aea51a9b48b03a0671e445339a5ab1607eb (patch)
tree77731cea7b4b2fe3d7d3debf82edd79315831262
parent530e90952574febf1fa51639b08bd6b3f188b0b0 (diff)
downloadgcc-3f029aea51a9b48b03a0671e445339a5ab1607eb.zip
gcc-3f029aea51a9b48b03a0671e445339a5ab1607eb.tar.gz
gcc-3f029aea51a9b48b03a0671e445339a5ab1607eb.tar.bz2
rs6000, Add vector replace builtin support GCC maintainers:
The following patch adds support for builtins vec_replace_elt and vec_replace_unaligned. The patch has been compiled and tested on powerpc64le-unknown-linux-gnu (Power 8 LE) powerpc64le-unknown-linux-gnu (Power 9 LE) and mambo with no regression errors. Please let me know if this patch is acceptable for the mainline branch. Thanks. Carl Love ------------------------------------------------------- gcc/ChangeLog 2020-08-04 Carl Love <cel@us.ibm.com> * config/rs6000/altivec.h: Add define for vec_replace_elt and vec_replace_unaligned. * config/rs6000/vsx.md (UNSPEC_REPLACE_ELT, UNSPEC_REPLACE_UN): New unspecs. (REPLACE_ELT): New mode iterator. (REPLACE_ELT_char, REPLACE_ELT_sh, REPLACE_ELT_max): New mode attributes. (vreplace_un_<mode>, vreplace_elt_<mode>_inst): New. * config/rs6000/rs6000-builtin.def (VREPLACE_ELT_V4SI, VREPLACE_ELT_UV4SI, VREPLACE_ELT_V4SF, VREPLACE_ELT_UV2DI, VREPLACE_ELT_V2DF, VREPLACE_UN_V4SI, VREPLACE_UN_UV4SI, VREPLACE_UN_V4SF, VREPLACE_UN_V2DI, VREPLACE_UN_UV2DI, VREPLACE_UN_V2DF, (REPLACE_ELT, REPLACE_UN, VREPLACE_ELT_V2DI): New builtin entries. * config/rs6000/rs6000-call.c (P10_BUILTIN_VEC_REPLACE_ELT, P10_BUILTIN_VEC_REPLACE_UN): New builtin argument definitions. (rs6000_expand_quaternop_builtin): Add 3rd argument checks for CODE_FOR_vreplace_elt_v4si, CODE_FOR_vreplace_elt_v4sf, CODE_FOR_vreplace_un_v4si, CODE_FOR_vreplace_un_v4sf. (builtin_function_type) [P10_BUILTIN_VREPLACE_ELT_UV4SI, P10_BUILTIN_VREPLACE_ELT_UV2DI, P10_BUILTIN_VREPLACE_UN_UV4SI, P10_BUILTIN_VREPLACE_UN_UV2DI]: New cases. * doc/extend.texi: Add description for vec_replace_elt and vec_replace_unaligned builtins. gcc/testsuite/ChangeLog 2020-08-04 Carl Love <cel@us.ibm.com> * gcc.target/powerpc/vec-replace-word-runnable.c: New test.
-rw-r--r--gcc/config/rs6000/altivec.h2
-rw-r--r--gcc/config/rs6000/rs6000-builtin.def16
-rw-r--r--gcc/config/rs6000/rs6000-call.c61
-rw-r--r--gcc/config/rs6000/vsx.md60
-rw-r--r--gcc/doc/extend.texi50
-rw-r--r--gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c289
6 files changed, 478 insertions, 0 deletions
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 7ba2878..62fe0bf 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -705,6 +705,8 @@ __altivec_scalar_pred(vec_any_nle,
#define vec_extracth(a, b, c) __builtin_vec_extracth (a, b, c)
#define vec_insertl(a, b, c) __builtin_vec_insertl (a, b, c)
#define vec_inserth(a, b, c) __builtin_vec_inserth (a, b, c)
+#define vec_replace_elt(a, b, c) __builtin_vec_replace_elt (a, b, c)
+#define vec_replace_unaligned(a, b, c) __builtin_vec_replace_un (a, b, c)
#define vec_gnb(a, b) __builtin_vec_gnb (a, b)
#define vec_clrl(a, b) __builtin_vec_clrl (a, b)
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 50a885c..6e11d38 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2750,6 +2750,20 @@ BU_P10V_3 (VINSERTVPRBR, "vinsvubvrx", CONST, vinsertvr_v16qi)
BU_P10V_3 (VINSERTVPRHR, "vinsvuhvrx", CONST, vinsertvr_v8hi)
BU_P10V_3 (VINSERTVPRWR, "vinsvuwvrx", CONST, vinsertvr_v4si)
+BU_P10V_3 (VREPLACE_ELT_V4SI, "vreplace_v4si", CONST, vreplace_elt_v4si)
+BU_P10V_3 (VREPLACE_ELT_UV4SI, "vreplace_uv4si", CONST, vreplace_elt_v4si)
+BU_P10V_3 (VREPLACE_ELT_V4SF, "vreplace_v4sf", CONST, vreplace_elt_v4sf)
+BU_P10V_3 (VREPLACE_ELT_V2DI, "vreplace_v2di", CONST, vreplace_elt_v2di)
+BU_P10V_3 (VREPLACE_ELT_UV2DI, "vreplace_uv2di", CONST, vreplace_elt_v2di)
+BU_P10V_3 (VREPLACE_ELT_V2DF, "vreplace_v2df", CONST, vreplace_elt_v2df)
+
+BU_P10V_3 (VREPLACE_UN_V4SI, "vreplace_un_v4si", CONST, vreplace_un_v4si)
+BU_P10V_3 (VREPLACE_UN_UV4SI, "vreplace_un_uv4si", CONST, vreplace_un_v4si)
+BU_P10V_3 (VREPLACE_UN_V4SF, "vreplace_un_v4sf", CONST, vreplace_un_v4sf)
+BU_P10V_3 (VREPLACE_UN_V2DI, "vreplace_un_v2di", CONST, vreplace_un_v2di)
+BU_P10V_3 (VREPLACE_UN_UV2DI, "vreplace_un_uv2di", CONST, vreplace_un_v2di)
+BU_P10V_3 (VREPLACE_UN_V2DF, "vreplace_un_v2df", CONST, vreplace_un_v2df)
+
BU_P10V_1 (VSTRIBR, "vstribr", CONST, vstrir_v16qi)
BU_P10V_1 (VSTRIHR, "vstrihr", CONST, vstrir_v8hi)
BU_P10V_1 (VSTRIBL, "vstribl", CONST, vstril_v16qi)
@@ -2794,6 +2808,8 @@ BU_P10_OVERLOAD_3 (EXTRACTL, "extractl")
BU_P10_OVERLOAD_3 (EXTRACTH, "extracth")
BU_P10_OVERLOAD_3 (INSERTL, "insertl")
BU_P10_OVERLOAD_3 (INSERTH, "inserth")
+BU_P10_OVERLOAD_3 (REPLACE_ELT, "replace_elt")
+BU_P10_OVERLOAD_3 (REPLACE_UN, "replace_un")
BU_P10_OVERLOAD_1 (VSTRIR, "strir")
BU_P10_OVERLOAD_1 (VSTRIL, "stril")
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 574f718..5dd1f66 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -5639,6 +5639,36 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTQI },
+ { P10_BUILTIN_VEC_REPLACE_ELT, P10_BUILTIN_VREPLACE_ELT_UV4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_UINTSI, RS6000_BTI_UINTQI },
+ { P10_BUILTIN_VEC_REPLACE_ELT, P10_BUILTIN_VREPLACE_ELT_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTQI },
+ { P10_BUILTIN_VEC_REPLACE_ELT, P10_BUILTIN_VREPLACE_ELT_V4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_float, RS6000_BTI_INTQI },
+ { P10_BUILTIN_VEC_REPLACE_ELT, P10_BUILTIN_VREPLACE_ELT_UV2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_UINTDI, RS6000_BTI_UINTQI },
+ { P10_BUILTIN_VEC_REPLACE_ELT, P10_BUILTIN_VREPLACE_ELT_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTDI, RS6000_BTI_INTQI },
+ { P10_BUILTIN_VEC_REPLACE_ELT, P10_BUILTIN_VREPLACE_ELT_V2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_double, RS6000_BTI_INTQI },
+
+ { P10_BUILTIN_VEC_REPLACE_UN, P10_BUILTIN_VREPLACE_UN_UV4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_UINTSI, RS6000_BTI_UINTQI },
+ { P10_BUILTIN_VEC_REPLACE_UN, P10_BUILTIN_VREPLACE_UN_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTQI },
+ { P10_BUILTIN_VEC_REPLACE_UN, P10_BUILTIN_VREPLACE_UN_V4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_float, RS6000_BTI_INTQI },
+ { P10_BUILTIN_VEC_REPLACE_UN, P10_BUILTIN_VREPLACE_UN_UV2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_UINTDI, RS6000_BTI_UINTQI },
+ { P10_BUILTIN_VEC_REPLACE_UN, P10_BUILTIN_VREPLACE_UN_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTDI, RS6000_BTI_INTQI },
+ { P10_BUILTIN_VEC_REPLACE_UN, P10_BUILTIN_VREPLACE_UN_V2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_double, RS6000_BTI_INTQI },
+
{ P10_BUILTIN_VEC_VSTRIL, P10_BUILTIN_VSTRIBL,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
{ P10_BUILTIN_VEC_VSTRIL, P10_BUILTIN_VSTRIBL,
@@ -10066,6 +10096,33 @@ rs6000_expand_quaternop_builtin (enum insn_code icode, tree exp, rtx target)
return CONST0_RTX (tmode);
}
}
+ else if (icode == CODE_FOR_vreplace_elt_v4si
+ || icode == CODE_FOR_vreplace_elt_v4sf)
+ {
+ /* Check whether the 3rd argument is an integer constant in the range
+ 0 to 3 inclusive. */
+ STRIP_NOPS (arg2);
+ if (TREE_CODE (arg2) != INTEGER_CST
+ || !IN_RANGE (TREE_INT_CST_LOW (arg2), 0, 3))
+ {
+ error ("argument 3 must be in the range 0 to 3");
+ return CONST0_RTX (tmode);
+ }
+ }
+
+ else if (icode == CODE_FOR_vreplace_un_v4si
+ || icode == CODE_FOR_vreplace_un_v4sf)
+ {
+ /* Check whether the 3rd argument is an integer constant in the range
+ 0 to 12 inclusive. */
+ STRIP_NOPS (arg2);
+ if (TREE_CODE (arg2) != INTEGER_CST
+ || !IN_RANGE(TREE_INT_CST_LOW (arg2), 0, 12))
+ {
+ error ("argument 3 must be in the range 0 to 12");
+ return CONST0_RTX (tmode);
+ }
+ }
if (target == 0
|| GET_MODE (target) != tmode
@@ -13912,6 +13969,10 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
case P10_BUILTIN_VINSERTVPRBL:
case P10_BUILTIN_VINSERTVPRHL:
case P10_BUILTIN_VINSERTVPRWL:
+ case P10_BUILTIN_VREPLACE_ELT_UV4SI:
+ case P10_BUILTIN_VREPLACE_ELT_UV2DI:
+ case P10_BUILTIN_VREPLACE_UN_UV4SI:
+ case P10_BUILTIN_VREPLACE_UN_UV2DI:
h.uns_p[0] = 1;
h.uns_p[1] = 1;
h.uns_p[2] = 1;
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 626ba78..dd75021 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -359,6 +359,8 @@
UNSPEC_EXTRACTR
UNSPEC_INSERTL
UNSPEC_INSERTR
+ UNSPEC_REPLACE_ELT
+ UNSPEC_REPLACE_UN
])
(define_int_iterator XVCVBF16 [UNSPEC_VSX_XVCVSPBF16
@@ -370,6 +372,15 @@
;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops
(define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
+;; Vector extract_elt iterator/attr for 32-bit and 64-bit elements
+(define_mode_iterator REPLACE_ELT [V4SI V4SF V2DI V2DF])
+(define_mode_attr REPLACE_ELT_char [(V4SI "w") (V4SF "w")
+ (V2DI "d") (V2DF "d")])
+(define_mode_attr REPLACE_ELT_sh [(V4SI "2") (V4SF "2")
+ (V2DI "3") (V2DF "3")])
+(define_mode_attr REPLACE_ELT_max [(V4SI "12") (V4SF "12")
+ (V2DI "8") (V2DF "8")])
+
;; VSX moves
;; The patterns for LE permuted loads and stores come before the general
@@ -4022,6 +4033,55 @@
"vins<wd>rx %0,%1,%2"
[(set_attr "type" "vecsimple")])
+(define_expand "vreplace_elt_<mode>"
+ [(set (match_operand:REPLACE_ELT 0 "register_operand")
+ (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand")
+ (match_operand:<VS_scalar> 2 "register_operand")
+ (match_operand:QI 3 "const_0_to_3_operand")]
+ UNSPEC_REPLACE_ELT))]
+ "TARGET_POWER10"
+{
+ int index;
+ /* Immediate value is the word index, convert to byte index and adjust for
+ Endianness if needed. */
+ if (BYTES_BIG_ENDIAN)
+ index = INTVAL (operands[3]) << <REPLACE_ELT_sh>;
+
+ else
+ index = <REPLACE_ELT_max> - (INTVAL (operands[3]) << <REPLACE_ELT_sh>);
+
+ emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1],
+ operands[2],
+ GEN_INT (index)));
+ DONE;
+ }
+[(set_attr "type" "vecsimple")])
+
+(define_expand "vreplace_un_<mode>"
+ [(set (match_operand:REPLACE_ELT 0 "register_operand")
+ (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand")
+ (match_operand:<VS_scalar> 2 "register_operand")
+ (match_operand:QI 3 "const_0_to_12_operand")]
+ UNSPEC_REPLACE_UN))]
+ "TARGET_POWER10"
+{
+ /* Immediate value is the byte index Big Endian numbering. */
+ emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+[(set_attr "type" "vecsimple")])
+
+(define_insn "vreplace_elt_<mode>_inst"
+ [(set (match_operand:REPLACE_ELT 0 "register_operand" "=v")
+ (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand" "0")
+ (match_operand:<VS_scalar> 2 "register_operand" "r")
+ (match_operand:QI 3 "const_0_to_12_operand" "n")]
+ UNSPEC_REPLACE_ELT))]
+ "TARGET_POWER10"
+ "vins<REPLACE_ELT_char> %0,%2,%3"
+ [(set_attr "type" "vecsimple")])
+
;; VSX_EXTRACT optimizations
;; Optimize double d = (double) vec_extract (vi, <n>)
;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 035c38c..f9b57e4 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -21090,6 +21090,56 @@ Note that some fairly anomalous results can be generated if the byte index is
not aligned on an element boundary for the sort of element being inserted.
@findex vec_inserth
+Vector Replace Element
+@smallexample
+@exdent vector signed int vec_replace_elt (vector signed int, signed int,
+const int);
+@exdent vector unsigned int vec_replace_elt (vector unsigned int,
+unsigned int, const int);
+@exdent vector float vec_replace_elt (vector float, float, const int);
+@exdent vector signed long long vec_replace_elt (vector signed long long,
+signed long long, const int);
+@exdent vector unsigned long long vec_replace_elt (vector unsigned long long,
+unsigned long long, const int);
+@exdent vector double rec_replace_elt (vector double, double, const int);
+@end smallexample
+The third argument (constrained to [0,3]) identifies the natural-endian
+element number of the first argument that will be replaced by the second
+argument to produce the result. The other elements of the first argument will
+remain unchanged in the result.
+
+If it's desirable to insert a word at an unaligned position, use
+vec_replace_unaligned instead.
+
+@findex vec_replace_element
+
+Vector Replace Unaligned
+@smallexample
+@exdent vector unsigned char vec_replace_unaligned (vector unsigned char,
+signed int, const int);
+@exdent vector unsigned char vec_replace_unaligned (vector unsigned char,
+unsigned int, const int);
+@exdent vector unsigned char vec_replace_unaligned (vector unsigned char,
+float, const int);
+@exdent vector unsigned char vec_replace_unaligned (vector unsigned char,
+signed long long, const int);
+@exdent vector unsigned char vec_replace_unaligned (vector unsigned char,
+unsigned long long, const int);
+@exdent vector unsigned char vec_replace_unaligned (vector unsigned char,
+double, const int);
+@end smallexample
+
+The second argument replaces a portion of the first argument to produce the
+result, with the rest of the first argument unchanged in the result. The
+third argument identifies the byte index (using left-to-right, or big-endian
+order) where the high-order byte of the second argument will be placed, with
+the remaining bytes of the second argument placed naturally "to the right"
+of the high-order byte.
+
+The programmer is responsible for understanding the endianness issues involved
+with the first argument and the result.
+@findex vec_replace_unaligned
+
@smallexample
@exdent vector unsigned long long int
@exdent vec_pext (vector unsigned long long int, vector unsigned long long int)
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c b/gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c
new file mode 100644
index 0000000..94af210
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c
@@ -0,0 +1,289 @@
+/* { dg-do run } */
+/* { dg-require-effective-target power10_hw } */
+/* { dg-options "-mdejagnu-cpu=power10" } */
+
+#include <altivec.h>
+
+#define DEBUG 0
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+extern void abort (void);
+
+int
+main (int argc, char *argv [])
+{
+ int i;
+ unsigned char ch;
+ unsigned int index;
+
+ vector unsigned int vresult_uint;
+ vector unsigned int expected_vresult_uint;
+ vector unsigned int src_va_uint;
+ vector unsigned int src_vb_uint;
+ unsigned int src_a_uint;
+
+ vector int vresult_int;
+ vector int expected_vresult_int;
+ vector int src_va_int;
+ vector int src_vb_int;
+ int src_a_int;
+
+ vector unsigned long long int vresult_ullint;
+ vector unsigned long long int expected_vresult_ullint;
+ vector unsigned long long int src_va_ullint;
+ vector unsigned long long int src_vb_ullint;
+ unsigned int long long src_a_ullint;
+
+ vector long long int vresult_llint;
+ vector long long int expected_vresult_llint;
+ vector long long int src_va_llint;
+ vector long long int src_vb_llint;
+ long long int src_a_llint;
+
+ vector float vresult_float;
+ vector float expected_vresult_float;
+ vector float src_va_float;
+ float src_a_float;
+
+ vector double vresult_double;
+ vector double expected_vresult_double;
+ vector double src_va_double;
+ double src_a_double;
+
+ /* Vector replace 32-bit element */
+ src_a_uint = 345;
+ src_va_uint = (vector unsigned int) { 0, 1, 2, 3 };
+ vresult_uint = (vector unsigned int) { 0, 0, 0, 0 };
+ expected_vresult_uint = (vector unsigned int) { 0, 1, 345, 3 };
+
+ vresult_uint = vec_replace_elt (src_va_uint, src_a_uint, 2);
+
+ if (!vec_all_eq (vresult_uint, expected_vresult_uint)) {
+#if DEBUG
+ printf("ERROR, vec_replace_elt (src_vb_uint, src_va_uint, index)\n");
+ for(i = 0; i < 4; i++)
+ printf(" vresult_uint[%d] = %d, expected_vresult_uint[%d] = %d\n",
+ i, vresult_uint[i], i, expected_vresult_uint[i]);
+#else
+ abort();
+#endif
+ }
+
+ src_a_int = 234;
+ src_va_int = (vector int) { 0, 1, 2, 3 };
+ vresult_int = (vector int) { 0, 0, 0, 0 };
+ expected_vresult_int = (vector int) { 0, 234, 2, 3 };
+
+ vresult_int = vec_replace_elt (src_va_int, src_a_int, 1);
+
+ if (!vec_all_eq (vresult_int, expected_vresult_int)) {
+#if DEBUG
+ printf("ERROR, vec_replace_elt (src_vb_int, src_va_int, index)\n");
+ for(i = 0; i < 4; i++)
+ printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
+ i, vresult_int[i], i, expected_vresult_int[i]);
+#else
+ abort();
+#endif
+ }
+
+ src_a_float = 34.0;
+ src_va_float = (vector float) { 0.0, 10.0, 20.0, 30.0 };
+ vresult_float = (vector float) { 0.0, 0.0, 0.0, 0.0 };
+ expected_vresult_float = (vector float) { 0.0, 34.0, 20.0, 30.0 };
+
+ vresult_float = vec_replace_elt (src_va_float, src_a_float, 1);
+
+ if (!vec_all_eq (vresult_float, expected_vresult_float)) {
+#if DEBUG
+ printf("ERROR, vec_replace_elt (src_vb_float, src_va_float, index)\n");
+ for(i = 0; i < 4; i++)
+ printf(" vresult_float[%d] = %f, expected_vresult_float[%d] = %f\n",
+ i, vresult_float[i], i, expected_vresult_float[i]);
+#else
+ abort();
+#endif
+ }
+
+ /* Vector replace 64-bit element */
+ src_a_ullint = 456;
+ src_va_ullint = (vector unsigned long long int) { 0, 1 };
+ vresult_ullint = (vector unsigned long long int) { 0, 0 };
+ expected_vresult_ullint = (vector unsigned long long int) { 0, 456 };
+
+ vresult_ullint = vec_replace_elt (src_va_ullint, src_a_ullint, 1);
+
+ if (!vec_all_eq (vresult_ullint, expected_vresult_ullint)) {
+#if DEBUG
+ printf("ERROR, vec_replace_elt (src_vb_ullint, src_va_ullint, index)\n");
+ for(i = 0; i < 2; i++)
+ printf(" vresult_ullint[%d] = %d, expected_vresult_ullint[%d] = %d\n",
+ i, vresult_ullint[i], i, expected_vresult_ullint[i]);
+#else
+ abort();
+#endif
+ }
+
+ src_a_llint = 678;
+ src_va_llint = (vector long long int) { 0, 1 };
+ vresult_llint = (vector long long int) { 0, 0 };
+ expected_vresult_llint = (vector long long int) { 0, 678 };
+
+ vresult_llint = vec_replace_elt (src_va_llint, src_a_llint, 1);
+
+ if (!vec_all_eq (vresult_llint, expected_vresult_llint)) {
+#if DEBUG
+ printf("ERROR, vec_replace_elt (src_vb_llint, src_va_llint, index)\n");
+ for(i = 0; i < 2; i++)
+ printf(" vresult_llint[%d] = %d, expected_vresult_llint[%d] = %d\n",
+ i, vresult_llint[i], i, expected_vresult_llint[i]);
+#else
+ abort();
+#endif
+ }
+
+ src_a_double = 678.0;
+ src_va_double = (vector double) { 0.0, 50.0 };
+ vresult_double = (vector double) { 0.0, 0.0 };
+ expected_vresult_double = (vector double) { 0.0, 678.0 };
+
+ vresult_double = vec_replace_elt (src_va_double, src_a_double, 1);
+
+ if (!vec_all_eq (vresult_double, expected_vresult_double)) {
+#if DEBUG
+ printf("ERROR, vec_replace_elt (src_vb_double, src_va_double, index)\n");
+ for(i = 0; i < 2; i++)
+ printf(" vresult_double[%d] = %f, expected_vresult_double[%d] = %f\n",
+ i, vresult_double[i], i, expected_vresult_double[i]);
+#else
+ abort();
+#endif
+ }
+
+
+ /* Vector replace 32-bit element, unaligned */
+ src_a_uint = 345;
+ src_va_uint = (vector unsigned int) { 1, 2, 0, 0 };
+ vresult_uint = (vector unsigned int) { 0, 0, 0, 0 };
+ /* Byte index 7 will overwrite part of elements 2 and 3 */
+ expected_vresult_uint = (vector unsigned int) { 1, 2, 345*256, 0 };
+
+ vresult_uint = vec_replace_unaligned (src_va_uint, src_a_uint, 3);
+
+ if (!vec_all_eq (vresult_uint, expected_vresult_uint)) {
+#if DEBUG
+ printf("ERROR, vec_replace_unaligned (src_vb_uint, src_va_uint, index)\n");
+ for(i = 0; i < 4; i++)
+ printf(" vresult_uint[%d] = %d, expected_vresult_uint[%d] = %d\n",
+ i, vresult_uint[i], i, expected_vresult_uint[i]);
+#else
+ abort();
+#endif
+ }
+
+ src_a_int = 234;
+ src_va_int = (vector int) { 1, 0, 3, 4 };
+ vresult_int = (vector int) { 0, 0, 0, 0 };
+ /* Byte index 7 will over write part of elements 1 and 2 */
+ expected_vresult_int = (vector int) { 1, 234*256, 0, 4 };
+
+ vresult_int = vec_replace_unaligned (src_va_int, src_a_int, 7);
+
+ if (!vec_all_eq (vresult_int, expected_vresult_int)) {
+#if DEBUG
+ printf("ERROR, vec_replace_unaligned (src_vb_int, src_va_int, index)\n");
+ for(i = 0; i < 4; i++)
+ printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
+ i, vresult_int[i], i, expected_vresult_int[i]);
+#else
+ abort();
+#endif
+ }
+
+ src_a_float = 34.0;
+ src_va_float = (vector float) { 0.0, 10.0, 20.0, 30.0 };
+ vresult_float = (vector float) { 0.0, 0.0, 0.0, 0.0 };
+ expected_vresult_float = (vector float) { 0.0, 34.0, 20.0, 30.0 };
+
+ vresult_float = vec_replace_unaligned (src_va_float, src_a_float, 8);
+
+ if (!vec_all_eq (vresult_float, expected_vresult_float)) {
+#if DEBUG
+ printf("ERROR, vec_replace_unaligned (src_vb_float, src_va_float, index)\n");
+ for(i = 0; i < 4; i++)
+ printf(" vresult_float[%d] = %f, expected_vresult_float[%d] = %f\n",
+ i, vresult_float[i], i, expected_vresult_float[i]);
+#else
+ abort();
+#endif
+ }
+
+ /* Vector replace 64-bit element, unaligned */
+ src_a_ullint = 456;
+ src_va_ullint = (vector unsigned long long int) { 0, 0x222 };
+ vresult_ullint = (vector unsigned long long int) { 0, 0 };
+ expected_vresult_ullint = (vector unsigned long long int) { 456*256,
+ 0x200 };
+
+ /* Byte index 7 will over write least significant byte of element 0 */
+ vresult_ullint = vec_replace_unaligned (src_va_ullint, src_a_ullint, 7);
+
+ if (!vec_all_eq (vresult_ullint, expected_vresult_ullint)) {
+#if DEBUG
+ printf("ERROR, vec_replace_unaligned (src_vb_ullint, src_va_ullint, index)\n");
+ for(i = 0; i < 2; i++)
+ printf(" vresult_ullint[%d] = %d, expected_vresult_ullint[%d] = %d\n",
+ i, vresult_ullint[i], i, expected_vresult_ullint[i]);
+#else
+ abort();
+#endif
+ }
+
+ src_a_llint = 678;
+ src_va_llint = (vector long long int) { 0, 0x101 };
+ vresult_llint = (vector long long int) { 0, 0 };
+ /* Byte index 7 will over write least significant byte of element 0 */
+ expected_vresult_llint = (vector long long int) { 678*256, 0x100 };
+
+ vresult_llint = vec_replace_unaligned (src_va_llint, src_a_llint, 7);
+
+ if (!vec_all_eq (vresult_llint, expected_vresult_llint)) {
+#if DEBUG
+ printf("ERROR, vec_replace_unaligned (src_vb_llint, src_va_llint, index)\n");
+ for(i = 0; i < 2; i++)
+ printf(" vresult_llint[%d] = %d, expected_vresult_llint[%d] = %d\n",
+ i, vresult_llint[i], i, expected_vresult_llint[i]);
+#else
+ abort();
+#endif
+ }
+
+ src_a_double = 678.0;
+ src_va_double = (vector double) { 0.0, 50.0 };
+ vresult_double = (vector double) { 0.0, 0.0 };
+ expected_vresult_double = (vector double) { 0.0, 678.0 };
+
+ vresult_double = vec_replace_unaligned (src_va_double, src_a_double, 0);
+
+ if (!vec_all_eq (vresult_double, expected_vresult_double)) {
+#if DEBUG
+ printf("ERROR, vec_replace_unaligned (src_vb_double, src_va_double, index)\
+n");
+ for(i = 0; i < 2; i++)
+ printf(" vresult_double[%d] = %f, expected_vresult_double[%d] = %f\n",
+ i, vresult_double[i], i, expected_vresult_double[i]);
+#else
+ abort();
+#endif
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times {\mvinsw\M} 6 } } */
+/* { dg-final { scan-assembler-times {\mvinsd\M} 6 } } */
+
+