aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog7
-rw-r--r--gcc/config/i386/i386.c412
2 files changed, 246 insertions, 173 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9641511..a0b2cbe 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2008-05-19 H.J. Lu <hongjiu.lu@intel.com>
+
+ * config/i386/i386.c (ix86_expand_vector_init_concat): New.
+ (ix86_expand_vector_init_interleave): Likewise.
+ (ix86_expand_vector_init_general): Use them. Assert word_mode
+ == SImode when n_words == 4.
+
2008-05-19 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.c (ix86_secondary_reload): New static function.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index fa3df97..a116bc6 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -23869,194 +23869,270 @@ ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
return true;
}
-/* A subroutine of ix86_expand_vector_init. Handle the most general case:
- all values variable, and none identical. */
+/* A subroutine of ix86_expand_vector_init_general. Use vector
+ concatenate to handle the most general case: all values variable,
+ and none identical. */
static void
-ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
- rtx target, rtx vals)
+ix86_expand_vector_init_concat (enum machine_mode mode,
+ rtx target, rtx *ops, int n)
{
- enum machine_mode half_mode = GET_MODE_INNER (mode);
- rtx op0 = NULL, op1 = NULL;
- bool use_vec_concat = false;
+ enum machine_mode cmode, hmode = VOIDmode;
+ rtx first[8], second[4];
+ rtvec v;
+ int i, j;
- switch (mode)
+ switch (n)
{
- case V2SFmode:
- case V2SImode:
- if (!mmx_ok && !TARGET_SSE)
- break;
- /* FALLTHRU */
+ case 2:
+ switch (mode)
+ {
+ case V4SImode:
+ cmode = V2SImode;
+ break;
+ case V4SFmode:
+ cmode = V2SFmode;
+ break;
+ case V2DImode:
+ cmode = DImode;
+ break;
+ case V2SImode:
+ cmode = SImode;
+ break;
+ case V2DFmode:
+ cmode = DFmode;
+ break;
+ case V2SFmode:
+ cmode = SFmode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
- case V2DFmode:
- case V2DImode:
- /* For the two element vectors, we always implement VEC_CONCAT. */
- op0 = XVECEXP (vals, 0, 0);
- op1 = XVECEXP (vals, 0, 1);
- use_vec_concat = true;
+ if (!register_operand (ops[1], cmode))
+ ops[1] = force_reg (cmode, ops[1]);
+ if (!register_operand (ops[0], cmode))
+ ops[0] = force_reg (cmode, ops[0]);
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_VEC_CONCAT (mode, ops[0],
+ ops[1])));
break;
- case V4SFmode:
- half_mode = V2SFmode;
- goto half;
- case V4SImode:
- half_mode = V2SImode;
+ case 4:
+ switch (mode)
+ {
+ case V4SImode:
+ cmode = V2SImode;
+ break;
+ case V4SFmode:
+ cmode = V2SFmode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
goto half;
- half:
- {
- rtvec v;
- /* For V4SF and V4SI, we implement a concat of two V2 vectors.
- Recurse to load the two halves. */
+half:
+ /* FIXME: We process inputs backward to help RA. PR 36222. */
+ i = n - 1;
+ j = (n >> 1) - 1;
+ for (; i > 0; i -= 2, j--)
+ {
+ first[j] = gen_reg_rtx (cmode);
+ v = gen_rtvec (2, ops[i - 1], ops[i]);
+ ix86_expand_vector_init (false, first[j],
+ gen_rtx_PARALLEL (cmode, v));
+ }
- op1 = gen_reg_rtx (half_mode);
- v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
- ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
+ n >>= 1;
+ if (n > 2)
+ {
+ gcc_assert (hmode != VOIDmode);
+ for (i = j = 0; i < n; i += 2, j++)
+ {
+ second[j] = gen_reg_rtx (hmode);
+ ix86_expand_vector_init_concat (hmode, second [j],
+ &first [i], 2);
+ }
+ n >>= 1;
+ ix86_expand_vector_init_concat (mode, target, second, n);
+ }
+ else
+ ix86_expand_vector_init_concat (mode, target, first, n);
+ break;
- op0 = gen_reg_rtx (half_mode);
- v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
- ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
+ default:
+ gcc_unreachable ();
+ }
+}
- use_vec_concat = true;
- }
- break;
+/* A subroutine of ix86_expand_vector_init_general. Use vector
+ interleave to handle the most general case: all values variable,
+ and none identical. */
+static void
+ix86_expand_vector_init_interleave (enum machine_mode mode,
+ rtx target, rtx *ops, int n)
+{
+ enum machine_mode first_imode, second_imode, third_imode;
+ int i, j;
+ rtx op0, op1;
+ rtx (*gen_load_even) (rtx, rtx, rtx);
+ rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
+ rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
+
+ switch (mode)
+ {
case V8HImode:
- if (TARGET_SSE2)
- {
- rtx ops[4];
- unsigned int i, j;
+ gen_load_even = gen_vec_setv8hi;
+ gen_interleave_first_low = gen_vec_interleave_lowv4si;
+ gen_interleave_second_low = gen_vec_interleave_lowv2di;
+ first_imode = V4SImode;
+ second_imode = V2DImode;
+ third_imode = VOIDmode;
+ break;
+ case V16QImode:
+ gen_load_even = gen_vec_setv16qi;
+ gen_interleave_first_low = gen_vec_interleave_lowv8hi;
+ gen_interleave_second_low = gen_vec_interleave_lowv4si;
+ first_imode = V8HImode;
+ second_imode = V4SImode;
+ third_imode = V2DImode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ for (i = 0; i < n; i++)
+ {
+ /* Extend the odd elment to SImode using a paradoxical SUBREG. */
+ op0 = gen_reg_rtx (SImode);
+ emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
- for (i = 0; i < ARRAY_SIZE (ops); i++)
- {
- /* Extend the odd elment from HImode to SImode using
- a paradoxical SUBREG. */
- op0 = gen_reg_rtx (SImode);
- emit_move_insn (op0, gen_lowpart (SImode,
- XVECEXP (vals, 0,
- i + i)));
-
- /* Insert the SImode value as low element of V4SImode
- vector. */
- op1 = gen_reg_rtx (V4SImode);
- op0 = gen_rtx_VEC_MERGE (V4SImode,
- gen_rtx_VEC_DUPLICATE (V4SImode,
- op0),
- CONST0_RTX (V4SImode),
- const1_rtx);
- emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
-
- /* Cast the V4SImode vector back to a V8HImode vector. */
- op0 = gen_reg_rtx (mode);
- emit_move_insn (op0, gen_lowpart (mode, op1));
-
- /* Load even HI elements into the second positon. */
- emit_insn (gen_vec_setv8hi (op0, XVECEXP (vals, 0,
- i + i + 1),
- const1_rtx));
-
- /* Cast V8HImode vector to V4SImode vector. */
- ops[i] = gen_reg_rtx (V4SImode);
- emit_move_insn (ops[i], gen_lowpart (V4SImode, op0));
- }
+ /* Insert the SImode value as low element of V4SImode vector. */
+ op1 = gen_reg_rtx (V4SImode);
+ op0 = gen_rtx_VEC_MERGE (V4SImode,
+ gen_rtx_VEC_DUPLICATE (V4SImode,
+ op0),
+ CONST0_RTX (V4SImode),
+ const1_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
- /* Interleave low V4SIs. */
- for (i = j = 0; i < ARRAY_SIZE (ops); i += 2, j++)
- {
- op0 = gen_reg_rtx (V4SImode);
- emit_insn (gen_vec_interleave_lowv4si (op0, ops[i],
- ops[i + 1]));
-
- /* Cast V4SImode vectors to V2DImode vectors. */
- op1 = gen_reg_rtx (V2DImode);
- emit_move_insn (op1, gen_lowpart (V2DImode, op0));
- ops[j] = op1;
- }
+ /* Cast the V4SImode vector back to a vector in orignal mode. */
+ op0 = gen_reg_rtx (mode);
+ emit_move_insn (op0, gen_lowpart (mode, op1));
+
+ /* Load even elements into the second positon. */
+ emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
+ const1_rtx));
- /* Interleave low V2DIs. */
- op0 = gen_reg_rtx (V2DImode);
- emit_insn (gen_vec_interleave_lowv2di (op0, ops[0], ops[1]));
+ /* Cast vector to FIRST_IMODE vector. */
+ ops[i] = gen_reg_rtx (first_imode);
+ emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
+ }
- /* Cast the V2DImode vector back to a V8HImode vector. */
- emit_insn (gen_rtx_SET (VOIDmode, target,
- gen_lowpart (mode, op0)));
- return;
- }
+ /* Interleave low FIRST_IMODE vectors. */
+ for (i = j = 0; i < n; i += 2, j++)
+ {
+ op0 = gen_reg_rtx (first_imode);
+ emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
- case V16QImode:
- if (TARGET_SSE4_1)
+ /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
+ ops[j] = gen_reg_rtx (second_imode);
+ emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
+ }
+
+ /* Interleave low SECOND_IMODE vectors. */
+ switch (second_imode)
+ {
+ case V4SImode:
+ for (i = j = 0; i < n / 2; i += 2, j++)
{
- rtx ops[8];
- unsigned int i, j;
+ op0 = gen_reg_rtx (second_imode);
+ emit_insn ((*gen_interleave_second_low) (op0, ops[i],
+ ops[i + 1]));
- for (i = 0; i < ARRAY_SIZE (ops); i++)
- {
- /* Extend the odd elment from QImode to SImode using
- a paradoxical SUBREG. */
- op0 = gen_reg_rtx (SImode);
- emit_move_insn (op0, gen_lowpart (SImode,
- XVECEXP (vals, 0,
- i + i)));
-
- /* Insert the SImode value as low element of V4SImode
- vector. */
- op1 = gen_reg_rtx (V4SImode);
- op0 = gen_rtx_VEC_MERGE (V4SImode,
- gen_rtx_VEC_DUPLICATE (V4SImode,
- op0),
- CONST0_RTX (V4SImode),
- const1_rtx);
- emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
-
- /* Cast the V4SImode vector back to a V16QImode vector. */
- op0 = gen_reg_rtx (mode);
- emit_move_insn (op0, gen_lowpart (mode, op1));
-
- /* Load even QI elements into the second positon. */
- emit_insn (gen_vec_setv16qi (op0, XVECEXP (vals, 0,
- i + i + 1),
- const1_rtx));
-
- /* Cast V16QImode vector to V8HImode vector. */
- ops[i] = gen_reg_rtx (V8HImode);
- emit_move_insn (ops[i], gen_lowpart (V8HImode, op0));
- }
+ /* Cast the SECOND_IMODE vector to the THIRD_IMODE
+ vector. */
+ ops[j] = gen_reg_rtx (third_imode);
+ emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
+ }
+ second_imode = V2DImode;
+ gen_interleave_second_low = gen_vec_interleave_lowv2di;
+ /* FALLTHRU */
- /* Interleave low V8HIs. */
- for (i = j = 0; i < ARRAY_SIZE (ops); i += 2, j++)
- {
- op0 = gen_reg_rtx (V8HImode);
- emit_insn (gen_vec_interleave_lowv8hi (op0, ops[i],
- ops[i + 1]));
-
- /* Cast V8HImode vector to V4SImode vector. */
- op1 = gen_reg_rtx (V4SImode);
- emit_move_insn (op1, gen_lowpart (V4SImode, op0));
- ops[j] = op1;
- }
+ case V2DImode:
+ op0 = gen_reg_rtx (second_imode);
+ emit_insn ((*gen_interleave_second_low) (op0, ops[0],
+ ops[1]));
- /* Interleave low V4SIs. */
- for (i = j = 0; i < ARRAY_SIZE (ops) / 2; i += 2, j++)
- {
- op0 = gen_reg_rtx (V4SImode);
- emit_insn (gen_vec_interleave_lowv4si (op0, ops[i],
- ops[i + 1]));
-
- /* Cast V4SImode vectors to V2DImode vectors. */
- op1 = gen_reg_rtx (V2DImode);
- emit_move_insn (op1, gen_lowpart (V2DImode, op0));
- ops[j] = op1;
- }
+ /* Cast the SECOND_IMODE vector back to a vector on original
+ mode. */
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_lowpart (mode, op0)));
+ break;
- /* Interleave low V2DIs. */
- op0 = gen_reg_rtx (V2DImode);
- emit_insn (gen_vec_interleave_lowv2di (op0, ops[0], ops[1]));
+ default:
+ gcc_unreachable ();
+ }
+}
- /* Cast the V2DImode vector back to a V8HImode vector. */
- emit_insn (gen_rtx_SET (VOIDmode, target,
- gen_lowpart (mode, op0)));
- return;
- }
+/* A subroutine of ix86_expand_vector_init. Handle the most general case:
+ all values variable, and none identical. */
+
+static void
+ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
+ rtx target, rtx vals)
+{
+ rtx ops[32], op0, op1;
+ enum machine_mode half_mode = VOIDmode;
+ int n, i;
+
+ switch (mode)
+ {
+ case V2SFmode:
+ case V2SImode:
+ if (!mmx_ok && !TARGET_SSE)
+ break;
+
+ n = 2;
+ goto vec_concat;
+
+ case V4SFmode:
+ case V4SImode:
+ n = 4;
+ goto vec_concat;
+
+ case V2DFmode:
+ case V2DImode:
+ n = 2;
+ goto vec_concat;
+
+vec_concat:
+ for (i = 0; i < n; i++)
+ ops[i] = XVECEXP (vals, 0, i);
+ ix86_expand_vector_init_concat (mode, target, ops, n);
+ return;
+
+ case V16QImode:
+ if (!TARGET_SSE4_1)
+ break;
+
+ n = 16;
+ goto vec_interleave;
+
+ case V8HImode:
+ if (!TARGET_SSE2)
+ break;
+
+ n = 8;
+ goto vec_interleave;
+
+vec_interleave:
+ for (i = 0; i < n; i++)
+ ops[i] = XVECEXP (vals, 0, i);
+ ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
+ return;
case V4HImode:
case V8QImode:
@@ -24066,17 +24142,6 @@ ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
gcc_unreachable ();
}
- if (use_vec_concat)
- {
- if (!register_operand (op1, half_mode))
- op1 = force_reg (half_mode, op1);
- if (!register_operand (op0, half_mode))
- op0 = force_reg (half_mode, op0);
-
- emit_insn (gen_rtx_SET (VOIDmode, target,
- gen_rtx_VEC_CONCAT (mode, op0, op1)));
- }
- else
{
int i, j, n_elts, n_words, n_elt_per_word;
enum machine_mode inner_mode;
@@ -24124,6 +24189,7 @@ ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
else if (n_words == 4)
{
rtx tmp = gen_reg_rtx (V4SImode);
+ gcc_assert (word_mode == SImode);
vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
emit_move_insn (target, gen_lowpart (mode, tmp));