diff options
author | Richard Henderson <rth@redhat.com> | 2005-01-05 22:18:15 -0800 |
---|---|---|
committer | Richard Henderson <rth@gcc.gnu.org> | 2005-01-05 22:18:15 -0800 |
commit | 051d8245886f5f133fad42d38b9d4ce12afd1394 (patch) | |
tree | 8d103dbf6344ce53612014171f97098266de9edd /gcc | |
parent | 8f98556f0151b9a45553a49d876b55f76017d0d9 (diff) | |
download | gcc-051d8245886f5f133fad42d38b9d4ce12afd1394.zip gcc-051d8245886f5f133fad42d38b9d4ce12afd1394.tar.gz gcc-051d8245886f5f133fad42d38b9d4ce12afd1394.tar.bz2 |
ia64.c (rtx_needs_barrier): Handle CONST_VECTOR and VEC_SELECT.
* config/ia64/ia64.c (rtx_needs_barrier): Handle CONST_VECTOR
and VEC_SELECT.
* config/ia64/vect.md (mulv8qi3): Re-implement with mix_[rl].
(mulv4hi3): Set itanium_class mmmul.
(fpack_sfxf, fpack_xfsf, fpack_xfxf): Remove.
From-SVN: r92987
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/config/ia64/ia64.c | 36 | ||||
-rw-r--r-- | gcc/config/ia64/vect.md | 90 | ||||
-rw-r--r-- | gcc/testsuite/gcc.c-torture/execute/simd-6.c | 22 |
4 files changed, 83 insertions, 74 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 078a19e..48586a2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,14 @@ 2005-01-05 Richard Henderson <rth@redhat.com> + * config/ia64/ia64.c (rtx_needs_barrier): Handle CONST_VECTOR + and VEC_SELECT. + * config/ia64/vect.md (mulv8qi3): Re-implement with mix_[rl]. + (mulv4hi3): Set itanium_class mmmul. + (fpack_sfxf, fpack_xfsf, fpack_xfxf): Remove. + (fpack): Rename from fpack_sfsf. + +2005-01-05 Richard Henderson <rth@redhat.com> + PR rtl-opt/10692 * reload1.c (do_input_reload): Restrict the optimization deleteing a previous output reload to RELOAD_FOR_INPUT. diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index a6197c0..ce5d8aa 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -5198,17 +5198,27 @@ rtx_needs_barrier (rtx x, struct reg_flags flags, int pred) for (i = XVECLEN (x, 0) - 1; i >= 0; --i) { rtx pat = XVECEXP (x, 0, i); - if (GET_CODE (pat) == SET) + switch (GET_CODE (pat)) { + case SET: update_set_flags (pat, &new_flags, &pred, &cond); - need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond); + need_barrier |= set_src_needs_barrier (pat, new_flags, + pred, cond); + break; + + case USE: + case CALL: + case ASM_OPERANDS: + need_barrier |= rtx_needs_barrier (pat, flags, pred); + break; + + case CLOBBER: + case RETURN: + break; + + default: + gcc_unreachable (); } - else if (GET_CODE (pat) == USE - || GET_CODE (pat) == CALL - || GET_CODE (pat) == ASM_OPERANDS) - need_barrier |= rtx_needs_barrier (pat, flags, pred); - else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN) - abort (); } for (i = XVECLEN (x, 0) - 1; i >= 0; --i) { @@ -5246,7 +5256,7 @@ rtx_needs_barrier (rtx x, struct reg_flags flags, int pred) need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); break; - case CONST_INT: case CONST_DOUBLE: + case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR: case SYMBOL_REF: case LABEL_REF: case CONST: break; @@ -5290,6 +5300,14 @@ rtx_needs_barrier (rtx x, struct reg_flags flags, int pred) need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred); break; + case VEC_SELECT: + /* VEC_SELECT's second argument is a PARALLEL with integers that + describe the elements selected. On ia64, those integers are + always constants. Avoid walking the PARALLEL so that we don't + get confused with "normal" parallels and abort. */ + need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred); + break; + case UNSPEC: switch (XINT (x, 1)) { diff --git a/gcc/config/ia64/vect.md b/gcc/config/ia64/vect.md index a42ad2a..12729a9 100644 --- a/gcc/config/ia64/vect.md +++ b/gcc/config/ia64/vect.md @@ -172,43 +172,35 @@ (match_operand:V8QI 2 "gr_register_operand" "r")))] "" { - rtx l1, h1, l2, h2, lm, hm, lz, hz; + rtx r1, l1, r2, l2, rm, lm; + r1 = gen_reg_rtx (V4HImode); l1 = gen_reg_rtx (V4HImode); - h1 = gen_reg_rtx (V4HImode); + r2 = gen_reg_rtx (V4HImode); l2 = gen_reg_rtx (V4HImode); - h2 = gen_reg_rtx (V4HImode); - - /* Zero-extend the QImode elements into two words of HImode elements. */ - emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), - operands[1], CONST0_RTX (V8QImode))); - emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), - operands[2], CONST0_RTX (V8QImode))); - emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), - operands[1], CONST0_RTX (V8QImode))); - emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), - operands[2], CONST0_RTX (V8QImode))); + + /* Zero-extend the QImode elements into two words of HImode elements + by interleaving them with zero bytes. */ + emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r1), + operands[1], CONST0_RTX (V8QImode))); + emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r2), + operands[2], CONST0_RTX (V8QImode))); + emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l1), + operands[1], CONST0_RTX (V8QImode))); + emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l2), + operands[2], CONST0_RTX (V8QImode))); /* Multiply. */ + rm = gen_reg_rtx (V4HImode); lm = gen_reg_rtx (V4HImode); - hm = gen_reg_rtx (V4HImode); + emit_insn (gen_mulv4hi3 (rm, r1, r2)); emit_insn (gen_mulv4hi3 (lm, l1, l2)); - emit_insn (gen_mulv4hi3 (hm, h1, h2)); - - /* Zap the high order bytes of the HImode elements. There are several - ways that this could be done. On Itanium2, there's 1 cycle latency - moving between the ALU units and the PALU units, so using AND would - be 3 cycles latency into the eventual pack insn, whereas using MIX - is only 2 cycles. */ - lz = gen_reg_rtx (V4HImode); - hz = gen_reg_rtx (V4HImode); - emit_insn (gen_mix1_r (gen_lowpart (V8QImode, lz), - gen_lowpart (V8QImode, lm), CONST0_RTX (V8QImode))); - emit_insn (gen_mix1_r (gen_lowpart (V8QImode, lz), - gen_lowpart (V8QImode, lm), CONST0_RTX (V8QImode))); - - /* Repack the HImode elements as QImode elements. */ - emit_insn (gen_pack2_sss (operands[0], lz, hz)); + + /* Zap the high order bytes of the HImode elements by overwriting those + in one part with the low order bytes of the other. */ + emit_insn (gen_mix1_r (operands[0], + gen_lowpart (V8QImode, rm), + gen_lowpart (V8QImode, lm))); DONE; }) @@ -218,7 +210,7 @@ (match_operand:V4HI 2 "gr_register_operand" "r")))] "" "pmpyshr2 %0 = %1, %2, 0" - [(set_attr "itanium_class" "mmalua")]) + [(set_attr "itanium_class" "mmmul")]) (define_expand "umax<mode>3" [(set (match_operand:VECINT 0 "gr_register_operand" "") @@ -450,7 +442,7 @@ "mix1.r %0 = %r2, %r1" [(set_attr "itanium_class" "mmshf")]) -(define_insn "*mix1_l" +(define_insn "mix1_l" [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (vec_concat:V16QI @@ -948,7 +940,7 @@ DONE; }) -(define_insn "*fpack_sfsf" +(define_insn "*fpack" [(set (match_operand:V2SF 0 "fr_register_operand" "=f") (vec_concat:V2SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG") @@ -957,38 +949,6 @@ "fpack %0 = %F2, %F1" [(set_attr "itanium_class" "fmisc")]) -(define_insn "*fpack_sfxf" - [(set (match_operand:V2SF 0 "fr_register_operand" "=f") - (vec_concat:V2SF - (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG") - (float_truncate:SF - (match_operand 2 "fr_register_operand" "f"))))] - "GET_MODE (operands[2]) == DFmode || GET_MODE (operands[2]) == XFmode" - "fpack %0 = %2, %F1" - [(set_attr "itanium_class" "fmisc")]) - -(define_insn "*fpack_xfsf" - [(set (match_operand:V2SF 0 "fr_register_operand" "=f") - (vec_concat:V2SF - (float_truncate:SF - (match_operand 1 "fr_register_operand" "f")) - (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))] - "GET_MODE (operands[1]) == DFmode || GET_MODE (operands[1]) == XFmode" - "fpack %0 = %F2, %1" - [(set_attr "itanium_class" "fmisc")]) - -(define_insn "*fpack_xfxf" - [(set (match_operand:V2SF 0 "fr_register_operand" "=f") - (vec_concat:V2SF - (float_truncate:SF - (match_operand 1 "fr_register_operand" "f")) - (float_truncate:SF - (match_operand 2 "fr_register_operand" "f"))))] - "(GET_MODE (operands[1]) == DFmode || GET_MODE (operands[1]) == XFmode) - && (GET_MODE (operands[2]) == DFmode || GET_MODE (operands[2]) == XFmode)" - "fpack %0 = %2, %1" - [(set_attr "itanium_class" "fmisc")]) - ;; Missing operations ;; fprcpa ;; fpsqrta diff --git a/gcc/testsuite/gcc.c-torture/execute/simd-6.c b/gcc/testsuite/gcc.c-torture/execute/simd-6.c new file mode 100644 index 0000000..ca4aabc --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/simd-6.c @@ -0,0 +1,22 @@ +extern void abort (void); +extern int memcmp (const void *, const void *, __SIZE_TYPE__); + +typedef unsigned char v8qi __attribute__((vector_size(8))); + +v8qi foo(v8qi x, v8qi y) +{ + return x * y; +} + +int main() +{ + v8qi a = { 1, 2, 3, 4, 5, 6, 7, 8 }; + v8qi b = { 3, 3, 3, 3, 3, 3, 3, 3 }; + v8qi c = { 3, 6, 9, 12, 15, 18, 21, 24 }; + v8qi r; + + r = foo (a, b); + if (memcmp (&r, &c, 8) != 0) + abort (); + return 0; +} |