aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Henderson <rth@redhat.com>2005-01-05 22:18:15 -0800
committerRichard Henderson <rth@gcc.gnu.org>2005-01-05 22:18:15 -0800
commit051d8245886f5f133fad42d38b9d4ce12afd1394 (patch)
tree8d103dbf6344ce53612014171f97098266de9edd /gcc
parent8f98556f0151b9a45553a49d876b55f76017d0d9 (diff)
downloadgcc-051d8245886f5f133fad42d38b9d4ce12afd1394.zip
gcc-051d8245886f5f133fad42d38b9d4ce12afd1394.tar.gz
gcc-051d8245886f5f133fad42d38b9d4ce12afd1394.tar.bz2
ia64.c (rtx_needs_barrier): Handle CONST_VECTOR and VEC_SELECT.
* config/ia64/ia64.c (rtx_needs_barrier): Handle CONST_VECTOR and VEC_SELECT. * config/ia64/vect.md (mulv8qi3): Re-implement with mix_[rl]. (mulv4hi3): Set itanium_class mmmul. (fpack_sfxf, fpack_xfsf, fpack_xfxf): Remove. From-SVN: r92987
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog9
-rw-r--r--gcc/config/ia64/ia64.c36
-rw-r--r--gcc/config/ia64/vect.md90
-rw-r--r--gcc/testsuite/gcc.c-torture/execute/simd-6.c22
4 files changed, 83 insertions, 74 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 078a19e..48586a2 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,14 @@
2005-01-05 Richard Henderson <rth@redhat.com>
+ * config/ia64/ia64.c (rtx_needs_barrier): Handle CONST_VECTOR
+ and VEC_SELECT.
+ * config/ia64/vect.md (mulv8qi3): Re-implement with mix_[rl].
+ (mulv4hi3): Set itanium_class mmmul.
+ (fpack_sfxf, fpack_xfsf, fpack_xfxf): Remove.
+ (fpack): Rename from fpack_sfsf.
+
+2005-01-05 Richard Henderson <rth@redhat.com>
+
PR rtl-opt/10692
* reload1.c (do_input_reload): Restrict the optimization deleteing
a previous output reload to RELOAD_FOR_INPUT.
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index a6197c0..ce5d8aa 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -5198,17 +5198,27 @@ rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
{
rtx pat = XVECEXP (x, 0, i);
- if (GET_CODE (pat) == SET)
+ switch (GET_CODE (pat))
{
+ case SET:
update_set_flags (pat, &new_flags, &pred, &cond);
- need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
+ need_barrier |= set_src_needs_barrier (pat, new_flags,
+ pred, cond);
+ break;
+
+ case USE:
+ case CALL:
+ case ASM_OPERANDS:
+ need_barrier |= rtx_needs_barrier (pat, flags, pred);
+ break;
+
+ case CLOBBER:
+ case RETURN:
+ break;
+
+ default:
+ gcc_unreachable ();
}
- else if (GET_CODE (pat) == USE
- || GET_CODE (pat) == CALL
- || GET_CODE (pat) == ASM_OPERANDS)
- need_barrier |= rtx_needs_barrier (pat, flags, pred);
- else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
- abort ();
}
for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
{
@@ -5246,7 +5256,7 @@ rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
break;
- case CONST_INT: case CONST_DOUBLE:
+ case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
case SYMBOL_REF: case LABEL_REF: case CONST:
break;
@@ -5290,6 +5300,14 @@ rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
break;
+ case VEC_SELECT:
+ /* VEC_SELECT's second argument is a PARALLEL with integers that
+ describe the elements selected. On ia64, those integers are
+ always constants. Avoid walking the PARALLEL so that we don't
+ get confused with "normal" parallels and abort. */
+ need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
+ break;
+
case UNSPEC:
switch (XINT (x, 1))
{
diff --git a/gcc/config/ia64/vect.md b/gcc/config/ia64/vect.md
index a42ad2a..12729a9 100644
--- a/gcc/config/ia64/vect.md
+++ b/gcc/config/ia64/vect.md
@@ -172,43 +172,35 @@
(match_operand:V8QI 2 "gr_register_operand" "r")))]
""
{
- rtx l1, h1, l2, h2, lm, hm, lz, hz;
+ rtx r1, l1, r2, l2, rm, lm;
+ r1 = gen_reg_rtx (V4HImode);
l1 = gen_reg_rtx (V4HImode);
- h1 = gen_reg_rtx (V4HImode);
+ r2 = gen_reg_rtx (V4HImode);
l2 = gen_reg_rtx (V4HImode);
- h2 = gen_reg_rtx (V4HImode);
-
- /* Zero-extend the QImode elements into two words of HImode elements. */
- emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1),
- operands[1], CONST0_RTX (V8QImode)));
- emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2),
- operands[2], CONST0_RTX (V8QImode)));
- emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1),
- operands[1], CONST0_RTX (V8QImode)));
- emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2),
- operands[2], CONST0_RTX (V8QImode)));
+
+ /* Zero-extend the QImode elements into two words of HImode elements
+ by interleaving them with zero bytes. */
+ emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r1),
+ operands[1], CONST0_RTX (V8QImode)));
+ emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r2),
+ operands[2], CONST0_RTX (V8QImode)));
+ emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l1),
+ operands[1], CONST0_RTX (V8QImode)));
+ emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l2),
+ operands[2], CONST0_RTX (V8QImode)));
/* Multiply. */
+ rm = gen_reg_rtx (V4HImode);
lm = gen_reg_rtx (V4HImode);
- hm = gen_reg_rtx (V4HImode);
+ emit_insn (gen_mulv4hi3 (rm, r1, r2));
emit_insn (gen_mulv4hi3 (lm, l1, l2));
- emit_insn (gen_mulv4hi3 (hm, h1, h2));
-
- /* Zap the high order bytes of the HImode elements. There are several
- ways that this could be done. On Itanium2, there's 1 cycle latency
- moving between the ALU units and the PALU units, so using AND would
- be 3 cycles latency into the eventual pack insn, whereas using MIX
- is only 2 cycles. */
- lz = gen_reg_rtx (V4HImode);
- hz = gen_reg_rtx (V4HImode);
- emit_insn (gen_mix1_r (gen_lowpart (V8QImode, lz),
- gen_lowpart (V8QImode, lm), CONST0_RTX (V8QImode)));
- emit_insn (gen_mix1_r (gen_lowpart (V8QImode, lz),
- gen_lowpart (V8QImode, lm), CONST0_RTX (V8QImode)));
-
- /* Repack the HImode elements as QImode elements. */
- emit_insn (gen_pack2_sss (operands[0], lz, hz));
+
+ /* Zap the high order bytes of the HImode elements by overwriting those
+ in one part with the low order bytes of the other. */
+ emit_insn (gen_mix1_r (operands[0],
+ gen_lowpart (V8QImode, rm),
+ gen_lowpart (V8QImode, lm)));
DONE;
})
@@ -218,7 +210,7 @@
(match_operand:V4HI 2 "gr_register_operand" "r")))]
""
"pmpyshr2 %0 = %1, %2, 0"
- [(set_attr "itanium_class" "mmalua")])
+ [(set_attr "itanium_class" "mmmul")])
(define_expand "umax<mode>3"
[(set (match_operand:VECINT 0 "gr_register_operand" "")
@@ -450,7 +442,7 @@
"mix1.r %0 = %r2, %r1"
[(set_attr "itanium_class" "mmshf")])
-(define_insn "*mix1_l"
+(define_insn "mix1_l"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(vec_concat:V16QI
@@ -948,7 +940,7 @@
DONE;
})
-(define_insn "*fpack_sfsf"
+(define_insn "*fpack"
[(set (match_operand:V2SF 0 "fr_register_operand" "=f")
(vec_concat:V2SF
(match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
@@ -957,38 +949,6 @@
"fpack %0 = %F2, %F1"
[(set_attr "itanium_class" "fmisc")])
-(define_insn "*fpack_sfxf"
- [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
- (vec_concat:V2SF
- (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
- (float_truncate:SF
- (match_operand 2 "fr_register_operand" "f"))))]
- "GET_MODE (operands[2]) == DFmode || GET_MODE (operands[2]) == XFmode"
- "fpack %0 = %2, %F1"
- [(set_attr "itanium_class" "fmisc")])
-
-(define_insn "*fpack_xfsf"
- [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
- (vec_concat:V2SF
- (float_truncate:SF
- (match_operand 1 "fr_register_operand" "f"))
- (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))]
- "GET_MODE (operands[1]) == DFmode || GET_MODE (operands[1]) == XFmode"
- "fpack %0 = %F2, %1"
- [(set_attr "itanium_class" "fmisc")])
-
-(define_insn "*fpack_xfxf"
- [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
- (vec_concat:V2SF
- (float_truncate:SF
- (match_operand 1 "fr_register_operand" "f"))
- (float_truncate:SF
- (match_operand 2 "fr_register_operand" "f"))))]
- "(GET_MODE (operands[1]) == DFmode || GET_MODE (operands[1]) == XFmode)
- && (GET_MODE (operands[2]) == DFmode || GET_MODE (operands[2]) == XFmode)"
- "fpack %0 = %2, %1"
- [(set_attr "itanium_class" "fmisc")])
-
;; Missing operations
;; fprcpa
;; fpsqrta
diff --git a/gcc/testsuite/gcc.c-torture/execute/simd-6.c b/gcc/testsuite/gcc.c-torture/execute/simd-6.c
new file mode 100644
index 0000000..ca4aabc
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/simd-6.c
@@ -0,0 +1,22 @@
+extern void abort (void);
+extern int memcmp (const void *, const void *, __SIZE_TYPE__);
+
+typedef unsigned char v8qi __attribute__((vector_size(8)));
+
+v8qi foo(v8qi x, v8qi y)
+{
+ return x * y;
+}
+
+int main()
+{
+ v8qi a = { 1, 2, 3, 4, 5, 6, 7, 8 };
+ v8qi b = { 3, 3, 3, 3, 3, 3, 3, 3 };
+ v8qi c = { 3, 6, 9, 12, 15, 18, 21, 24 };
+ v8qi r;
+
+ r = foo (a, b);
+ if (memcmp (&r, &c, 8) != 0)
+ abort ();
+ return 0;
+}