aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/config/rs6000/altivec.md19
-rw-r--r--gcc/config/rs6000/predicates.md17
-rw-r--r--gcc/config/rs6000/rs6000-protos.h2
-rw-r--r--gcc/config/rs6000/rs6000.c59
-rw-r--r--gcc/testsuite/gcc.dg/pr101384.c39
-rw-r--r--gcc/testsuite/gcc.target/powerpc/pr101384-1.c79
-rw-r--r--gcc/testsuite/gcc.target/powerpc/pr101384-2.c79
7 files changed, 268 insertions, 26 deletions
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index a20d6ac..d70c17e 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -317,22 +317,33 @@
[(const_int 0)]
{
rtx dest = operands[0];
- machine_mode mode = GET_MODE (operands[0]);
+ machine_mode mode;
rtvec v;
int i, num_elements;
- if (mode == V4SFmode)
+ switch (easy_altivec_constant (operands[1], <MODE>mode))
{
+ case 1:
+ mode = V16QImode;
+ break;
+ case 2:
+ mode = V8HImode;
+ break;
+ case 4:
mode = V4SImode;
- dest = gen_lowpart (V4SImode, dest);
+ break;
+ default:
+ gcc_unreachable ();
}
+ if (mode != <MODE>mode)
+ dest = gen_lowpart (mode, dest);
num_elements = GET_MODE_NUNITS (mode);
v = rtvec_alloc (num_elements);
for (i = 0; i < num_elements; i++)
RTVEC_ELT (v, i) = constm1_rtx;
- emit_insn (gen_vec_initv4sisi (dest, gen_rtx_PARALLEL (mode, v)));
+ rs6000_expand_vector_init (dest, gen_rtx_PARALLEL (mode, v));
emit_insn (gen_rtx_SET (dest, gen_rtx_ASHIFT (mode, dest, dest)));
DONE;
})
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 121cbf1..956e42b 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -683,15 +683,26 @@
(define_predicate "easy_vector_constant_msb"
(and (match_code "const_vector")
(and (match_test "TARGET_ALTIVEC")
- (match_test "easy_altivec_constant (op, mode)")))
+ (match_test "easy_altivec_constant (op, mode)")
+ (match_test "vspltis_shifted (op) == 0")))
{
HOST_WIDE_INT val;
- int elt;
+ int elt, sz = easy_altivec_constant (op, mode);
+ machine_mode inner = GET_MODE_INNER (mode);
+ int isz = GET_MODE_SIZE (inner);
if (mode == V2DImode || mode == V2DFmode)
return 0;
elt = BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 : 0;
+ if (isz < sz)
+ {
+ if (const_vector_elt_as_int (op, elt) != 0)
+ return 0;
+ elt += (BYTES_BIG_ENDIAN ? -1 : 1) * (sz - isz) / isz;
+ }
+ else if (isz > sz)
+ inner = smallest_int_mode_for_size (sz * BITS_PER_UNIT);
val = const_vector_elt_as_int (op, elt);
- return EASY_VECTOR_MSB (val, GET_MODE_INNER (mode));
+ return EASY_VECTOR_MSB (val, inner);
})
;; Return true if this is an easy altivec constant that we form
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 94bf961..14f6b31 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -30,7 +30,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
tree, machine_mode);
#endif /* TREE_CODE */
-extern bool easy_altivec_constant (rtx, machine_mode);
+extern int easy_altivec_constant (rtx, machine_mode);
extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
extern int vspltis_shifted (rtx);
extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 779de95..279f00c 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6134,6 +6134,27 @@ vspltis_constant (rtx op, unsigned step, unsigned copies)
splat_val = val;
msb_val = val >= 0 ? 0 : -1;
+ if (val == 0 && step > 1)
+ {
+ /* Special case for loading most significant bit with step > 1.
+ In that case, match 0s in all but step-1s elements, where match
+ EASY_VECTOR_MSB. */
+ for (i = 1; i < nunits; ++i)
+ {
+ unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
+ HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
+ if ((i & (step - 1)) == step - 1)
+ {
+ if (!EASY_VECTOR_MSB (elt_val, inner))
+ break;
+ }
+ else if (elt_val)
+ break;
+ }
+ if (i == nunits)
+ return true;
+ }
+
/* Construct the value to be splatted, if possible. If not, return 0. */
for (i = 2; i <= copies; i *= 2)
{
@@ -6146,6 +6167,7 @@ vspltis_constant (rtx op, unsigned step, unsigned copies)
| (small_val & mask)))
return false;
splat_val = small_val;
+ inner = smallest_int_mode_for_size (bitsize);
}
/* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
@@ -6160,8 +6182,9 @@ vspltis_constant (rtx op, unsigned step, unsigned copies)
;
/* Also check if are loading up the most significant bit which can be done by
- loading up -1 and shifting the value left by -1. */
- else if (EASY_VECTOR_MSB (splat_val, inner))
+ loading up -1 and shifting the value left by -1. Only do this for
+ step 1 here, for larger steps it is done earlier. */
+ else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1)
;
else
@@ -6271,15 +6294,15 @@ vspltis_shifted (rtx op)
}
}
- /* If all elements are equal, we don't need to do VLSDOI. */
+ /* If all elements are equal, we don't need to do VSLDOI. */
return 0;
}
-/* Return true if OP is of the given MODE and can be synthesized
- with a vspltisb, vspltish or vspltisw. */
+/* Return non-zero (element mode byte size) if OP is of the given MODE
+ and can be synthesized with a vspltisb, vspltish or vspltisw. */
-bool
+int
easy_altivec_constant (rtx op, machine_mode mode)
{
unsigned step, copies;
@@ -6287,39 +6310,39 @@ easy_altivec_constant (rtx op, machine_mode mode)
if (mode == VOIDmode)
mode = GET_MODE (op);
else if (mode != GET_MODE (op))
- return false;
+ return 0;
/* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
constants. */
if (mode == V2DFmode)
- return zero_constant (op, mode);
+ return zero_constant (op, mode) ? 8 : 0;
else if (mode == V2DImode)
{
if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
|| !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
- return false;
+ return 0;
if (zero_constant (op, mode))
- return true;
+ return 8;
if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
&& INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
- return true;
+ return 8;
- return false;
+ return 0;
}
/* V1TImode is a special container for TImode. Ignore for now. */
else if (mode == V1TImode)
- return false;
+ return 0;
/* Start with a vspltisw. */
step = GET_MODE_NUNITS (mode) / 4;
copies = 1;
if (vspltis_constant (op, step, copies))
- return true;
+ return 4;
/* Then try with a vspltish. */
if (step == 1)
@@ -6328,7 +6351,7 @@ easy_altivec_constant (rtx op, machine_mode mode)
step >>= 1;
if (vspltis_constant (op, step, copies))
- return true;
+ return 2;
/* And finally a vspltisb. */
if (step == 1)
@@ -6337,12 +6360,12 @@ easy_altivec_constant (rtx op, machine_mode mode)
step >>= 1;
if (vspltis_constant (op, step, copies))
- return true;
+ return 1;
if (vspltis_shifted (op) != 0)
- return true;
+ return GET_MODE_SIZE (GET_MODE_INNER (mode));
- return false;
+ return 0;
}
/* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
diff --git a/gcc/testsuite/gcc.dg/pr101384.c b/gcc/testsuite/gcc.dg/pr101384.c
new file mode 100644
index 0000000..7030c0a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr101384.c
@@ -0,0 +1,39 @@
+/* PR target/101384 */
+/* { dg-do run } */
+/* { dg-options "-O2 -Wno-psabi -w" } */
+
+typedef unsigned char __attribute__((__vector_size__ (16))) U;
+typedef unsigned short __attribute__((__vector_size__ (8 * sizeof (short)))) V;
+
+U u;
+V v;
+
+__attribute__((noipa)) U
+foo (void)
+{
+ U y = (U) { 0x80, 0xff, 0xff, 0xff, 0x80, 0xff, 0xff, 0xff,
+ 0x80, 0xff, 0xff, 0xff, 0x80, 0xff, 0xff, 0xff } + u;
+ return y;
+}
+
+__attribute__((noipa)) V
+bar (void)
+{
+ V y = (V) { 0x8000, 0xffff, 0x8000, 0xffff,
+ 0x8000, 0xffff, 0x8000, 0xffff } + v;
+ return y;
+}
+
+int
+main ()
+{
+ U x = foo ();
+ for (unsigned i = 0; i < 16; i++)
+ if (x[i] != ((i & 3) ? 0xff : 0x80))
+ __builtin_abort ();
+ V y = bar ();
+ for (unsigned i = 0; i < 8; i++)
+ if (y[i] != ((i & 1) ? 0xffff : 0x8000))
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr101384-1.c b/gcc/testsuite/gcc.target/powerpc/pr101384-1.c
new file mode 100644
index 0000000..627d7d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr101384-1.c
@@ -0,0 +1,79 @@
+/* PR target/101384 */
+/* { dg-do compile { target le } } */
+/* { dg-options "-O2 -maltivec" } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-final { scan-assembler-times {\mvspltis[whb] [^\n\r]*,-1\M} 9 } } */
+/* { dg-final { scan-assembler-times {\mvslw\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mvslh\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mvslb\M} 3 } } */
+
+typedef unsigned char __attribute__((__vector_size__ (16))) U;
+typedef unsigned short __attribute__((__vector_size__ (16))) V;
+typedef unsigned int __attribute__((__vector_size__ (16))) W;
+
+U u;
+V v;
+W w;
+
+U
+f1 (void)
+{
+ U y = (U) { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 } + u;
+ return y;
+}
+
+U
+f2 (void)
+{
+ U y = (U) { 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80 } + u;
+ return y;
+}
+
+U
+f3 (void)
+{
+ U y = (U) { 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80 } + u;
+ return y;
+}
+
+V
+f4 (void)
+{
+ V y = (V) { 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080 } + v;
+ return y;
+}
+
+V
+f5 (void)
+{
+ V y = (V) { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 } + v;
+ return y;
+}
+
+V
+f6 (void)
+{
+ V y = (V) { 0, 0x8000, 0, 0x8000, 0, 0x8000, 0, 0x8000 } + v;
+ return y;
+}
+
+W
+f7 (void)
+{
+ W y = (W) { 0x80808080, 0x80808080, 0x80808080, 0x80808080 } + w;
+ return y;
+}
+
+W
+f8 (void)
+{
+ W y = (W) { 0x80008000, 0x80008000, 0x80008000, 0x80008000 } + w;
+ return y;
+}
+
+W
+f9 (void)
+{
+ W y = (W) { 0x80000000, 0x80000000, 0x80000000, 0x80000000 } + w;
+ return y;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr101384-2.c b/gcc/testsuite/gcc.target/powerpc/pr101384-2.c
new file mode 100644
index 0000000..f395708
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr101384-2.c
@@ -0,0 +1,79 @@
+/* PR target/101384 */
+/* { dg-do compile { target be } } */
+/* { dg-options "-O2 -maltivec" } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-final { scan-assembler-times {\mvspltis[whb] [^\n\r]*,-1\M} 9 } } */
+/* { dg-final { scan-assembler-times {\mvslw\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mvslh\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mvslb\M} 3 } } */
+
+typedef unsigned char __attribute__((__vector_size__ (16))) U;
+typedef unsigned short __attribute__((__vector_size__ (16))) V;
+typedef unsigned int __attribute__((__vector_size__ (16))) W;
+
+U u;
+V v;
+W w;
+
+U
+f1 (void)
+{
+ U y = (U) { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 } + u;
+ return y;
+}
+
+U
+f2 (void)
+{
+ U y = (U) { 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0 } + u;
+ return y;
+}
+
+U
+f3 (void)
+{
+ U y = (U) { 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0 } + u;
+ return y;
+}
+
+V
+f4 (void)
+{
+ V y = (V) { 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080 } + v;
+ return y;
+}
+
+V
+f5 (void)
+{
+ V y = (V) { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 } + v;
+ return y;
+}
+
+V
+f6 (void)
+{
+ V y = (V) { 0x8000, 0, 0x8000, 0, 0x8000, 0, 0x8000, 0 } + v;
+ return y;
+}
+
+W
+f7 (void)
+{
+ W y = (W) { 0x80808080, 0x80808080, 0x80808080, 0x80808080 } + w;
+ return y;
+}
+
+W
+f8 (void)
+{
+ W y = (W) { 0x80008000, 0x80008000, 0x80008000, 0x80008000 } + w;
+ return y;
+}
+
+W
+f9 (void)
+{
+ W y = (W) { 0x80000000, 0x80000000, 0x80000000, 0x80000000 } + w;
+ return y;
+}