aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRoger Sayle <roger@eyesopen.com>2006-04-10 21:01:19 +0000
committerRoger Sayle <sayle@gcc.gnu.org>2006-04-10 21:01:19 +0000
commitacef130fabdabd9c37b27e7ae1ad6c943a34a405 (patch)
tree1c365032f326e9e550ddea7a0e0045d195fac037 /gcc
parente3df376d22bdd92e4cc62e2b290f76d39ddebcca (diff)
downloadgcc-acef130fabdabd9c37b27e7ae1ad6c943a34a405.zip
gcc-acef130fabdabd9c37b27e7ae1ad6c943a34a405.tar.gz
gcc-acef130fabdabd9c37b27e7ae1ad6c943a34a405.tar.bz2
i386.c (ix86_expand_vector_init_one_nonzero): Renamed from ix86_expand_vector_init_low_nonzero.
* config/i386/i386.c (ix86_expand_vector_init_one_nonzero): Renamed from ix86_expand_vector_init_low_nonzero. Take an additional one_var argument indicating which element is non-zero. Support one_var != 0 for V4SFmode and V4SImode by permuting the result. (ix86_expand_vector_init): Call ix86_expand_vector_init_one_nonzero with one_var instead of ix86_expand_vector_init_low_nonzero. * gcc.target/i386/vecinit-1.c: New test case. * gcc.target/i386/vecinit-2.c: Likewise. From-SVN: r112832
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog9
-rw-r--r--gcc/config/i386/i386.c71
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.target/i386/vecinit-1.c11
-rw-r--r--gcc/testsuite/gcc.target/i386/vecinit-2.c11
5 files changed, 98 insertions, 9 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 923be6c..a50a06e 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2006-04-10 Roger Sayle <roger@eyesopen.com>
+
+ * config/i386/i386.c (ix86_expand_vector_init_one_nonzero): Renamed
+ from ix86_expand_vector_init_low_nonzero. Take an additional
+ one_var argument indicating which element is non-zero. Support
+ one_var != 0 for V4SFmode and V4SImode by permuting the result.
+ (ix86_expand_vector_init): Call ix86_expand_vector_init_one_nonzero
+ with one_var instead of ix86_expand_vector_init_low_nonzero.
+
2006-04-10 Kazu Hirata <kazu@codesourcery.com>
* Makefile.in (tree-into-ssa.o, tree-outof-ssa.o,
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9076825..6562685 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -17880,15 +17880,16 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
}
/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
- whose low element is VAR, and other elements are zero. Return true
+ whose ONE_VAR element is VAR, and other elements are zero. Return true
if successful. */
static bool
-ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
- rtx target, rtx var)
+ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
+ rtx target, rtx var, int one_var)
{
enum machine_mode vsimode;
- rtx x;
+ rtx new_target;
+ rtx x, tmp;
switch (mode)
{
@@ -17900,6 +17901,8 @@ ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
case V2DFmode:
case V2DImode:
+ if (one_var != 0)
+ return false;
var = force_reg (GET_MODE_INNER (mode), var);
x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
emit_insn (gen_rtx_SET (VOIDmode, target, x));
@@ -17907,10 +17910,55 @@ ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
case V4SFmode:
case V4SImode:
+ if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
+ new_target = gen_reg_rtx (mode);
+ else
+ new_target = target;
var = force_reg (GET_MODE_INNER (mode), var);
x = gen_rtx_VEC_DUPLICATE (mode, var);
x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
- emit_insn (gen_rtx_SET (VOIDmode, target, x));
+ emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
+ if (one_var != 0)
+ {
+ /* We need to shuffle the value to the correct position, so
+ create a new pseudo to store the intermediate result. */
+
+ /* With SSE2, we can use the integer shuffle insns. */
+ if (mode != V4SFmode && TARGET_SSE2)
+ {
+ emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
+ GEN_INT (1),
+ GEN_INT (one_var == 1 ? 0 : 1),
+ GEN_INT (one_var == 2 ? 0 : 1),
+ GEN_INT (one_var == 3 ? 0 : 1)));
+ if (target != new_target)
+ emit_move_insn (target, new_target);
+ return true;
+ }
+
+ /* Otherwise convert the intermediate result to V4SFmode and
+ use the SSE1 shuffle instructions. */
+ if (mode != V4SFmode)
+ {
+ tmp = gen_reg_rtx (V4SFmode);
+ emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
+ }
+ else
+ tmp = new_target;
+
+ emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
+ GEN_INT (1),
+ GEN_INT (one_var == 1 ? 0 : 1),
+ GEN_INT (one_var == 2 ? 0+4 : 1+4),
+ GEN_INT (one_var == 3 ? 0+4 : 1+4)));
+
+ if (mode != V4SFmode)
+ emit_move_insn (target, gen_lowpart (V4SImode, tmp));
+ else if (tmp != target)
+ emit_move_insn (target, tmp);
+ }
+ else if (target != new_target)
+ emit_move_insn (target, new_target);
return true;
case V8HImode:
@@ -17924,11 +17972,15 @@ ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
vsimode = V2SImode;
goto widen;
widen:
+ if (one_var != 0)
+ return false;
+
/* Zero extend the variable element to SImode and recurse. */
var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
x = gen_reg_rtx (vsimode);
- if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
+ if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
+ var, one_var))
gcc_unreachable ();
emit_move_insn (target, gen_lowpart (mode, x));
@@ -18185,9 +18237,10 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
the pool and overwritten via move later. */
if (n_var == 1)
{
- if (all_const_zero && one_var == 0
- && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
- XVECEXP (vals, 0, 0)))
+ if (all_const_zero
+ && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
+ XVECEXP (vals, 0, one_var),
+ one_var))
return;
if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index becca44..7cf2a77 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2006-04-10 Roger Sayle <roger@eyesopen.com>
+
+ * gcc.target/i386/vecinit-1.c: New test case.
+ * gcc.target/i386/vecinit-2.c: Likewise.
+
2006-04-10 Jakub Jelinek <jakub@redhat.com>
PR debug/27057
diff --git a/gcc/testsuite/gcc.target/i386/vecinit-1.c b/gcc/testsuite/gcc.target/i386/vecinit-1.c
new file mode 100644
index 0000000..86cad89
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vecinit-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+#define vector __attribute__((vector_size(16)))
+
+float a;
+vector float f1(void) { return (vector float){ a, 0.0, 0.0, 0.0}; }
+vector float f2(void) { return (vector float){ 0.0, a, 0.0, 0.0}; }
+vector float f3(void) { return (vector float){ 0.0, 0.0, a, 0.0}; }
+vector float f4(void) { return (vector float){ 0.0, 0.0, 0.0, a}; }
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "xor" } } */
diff --git a/gcc/testsuite/gcc.target/i386/vecinit-2.c b/gcc/testsuite/gcc.target/i386/vecinit-2.c
new file mode 100644
index 0000000..41e5027
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vecinit-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+#define vector __attribute__((vector_size(16)))
+
+int a;
+vector int f1(void) { return (vector int){ a, 0, 0, 0}; }
+vector int f2(void) { return (vector int){ 0, a, 0, 0}; }
+vector int f3(void) { return (vector int){ 0, 0, a, 0}; }
+vector int f4(void) { return (vector int){ 0, 0, 0, a}; }
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "xor" } } */