diff options
author | Dorit Nuzman <dorit@il.ibm.com> | 2005-06-21 09:02:00 +0000 |
---|---|---|
committer | Dorit Nuzman <dorit@gcc.gnu.org> | 2005-06-21 09:02:00 +0000 |
commit | a6b46ba2c84f81e70811e13581c99350cdc76400 (patch) | |
tree | 9b2edf4d87ca9a2741f8f8a846bf5e277d74ab28 /gcc/config | |
parent | a3a2067ac5b2a5ce0b8439d42167df5694d2bb5b (diff) | |
download | gcc-a6b46ba2c84f81e70811e13581c99350cdc76400.zip gcc-a6b46ba2c84f81e70811e13581c99350cdc76400.tar.gz gcc-a6b46ba2c84f81e70811e13581c99350cdc76400.tar.bz2 |
genopinit.c (vec_shl_optab, [...]): Initialize new optabs.
* genopinit.c (vec_shl_optab, vec_shr_optab): Initialize new optabs.
(reduc_plus_optab): Removed. Replcaed with...
(reduc_splus_optab, reduc_uplus_optab): Initialize new optabs.
* optabs.c (optab_for_tree_code): Return reduc_splus_optab or
reduc_uplus_optab instead of reduc_plus_optab.
(expand_vec_shift_expr): New function.
(init_optabs): Initialize new optabs. Remove initialization of
reduc_plus_optab.
(optab_for_tree_code): Return vec_shl_optab/vec_shr_optab
for VEC_LSHIFT_EXPR/VEC_RSHIFT_EXPR.
* optabs.h (OTI_reduc_plus): Removed. Replaced with...
(OTI_reduc_splus, OTI_reduc_uplus): New.
(reduc_plus_optab): Removed. Replcaed with...
(reduc_splus_optab, reduc_uplus_optab): New optabs.
(vec_shl_optab, vec_shr_optab): New optabs.
(expand_vec_shift_expr): New function declaration.
* tree.def (VEC_LSHIFT_EXPR, VEC_RSHIFT_EXPR): New tree-codes.
* tree-inline.c (estimate_num_insns_1): Handle new tree-codes.
* expr.c (expand_expr_real_1): Handle new tree-codes.
* tree-pretty-print.c (dump_generic_node, op_symbol, op_prio): Likewise.
* tree-vect-generic.c (expand_vector_operations_1): Add assert.
* tree-vect-transform.c (vect_create_epilog_for_reduction): Add two
alternatives for generating reduction epilog code.
(vectorizable_reduction): Don't fail of direct reduction support is
not available.
(vectorizable_target_reduction_pattern): Likewise.
* config/rs6000/altivec.md (reduc_smax_v4si, reduc_smax_v4sf,
reduc_umax_v4si, reduc_smin_v4si, reduc_smin_v4sf, reduc_umin_v4si,
reduc_plus_v4si, reduc_plus_v4sf): Removed.
(vec_shl_<mode>, vec_shr_<mode>, altivec_vsumsws_nomode,
reduc_splus_<mode>, reduc_uplus_v16qi): New.
From-SVN: r101231
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/rs6000/altivec.md | 195 |
1 files changed, 69 insertions, 126 deletions
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 3b20447..7bfd5d9 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1825,157 +1825,100 @@ operands[3] = gen_reg_rtx (GET_MODE (operands[0])); }) -;; Reduction - -(define_expand "reduc_smax_v4si" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))] +;; Vector shift left in bits. Currently supported ony for shift +;; amounts that can be expressed as byte shifts (divisible by 8). +;; General shift amounts can be supported using vslo + vsl. We're +;; not expecting to see these yet (the vectorizer currently +;; generates only shifts divisible by byte_size). +(define_expand "vec_shl_<mode>" + [(set (match_operand:V 0 "register_operand" "=v") + (unspec:V [(match_operand:V 1 "register_operand" "v") + (match_operand:QI 2 "reg_or_short_operand" "")] 219 ))] "TARGET_ALTIVEC" " -{ - rtx vtmp1 = gen_reg_rtx (V4SImode); - rtx vtmp2 = gen_reg_rtx (V4SImode); - rtx vtmp3 = gen_reg_rtx (V4SImode); - - emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1], - gen_rtx_CONST_INT (SImode, 8))); - emit_insn (gen_smaxv4si3 (vtmp2, operands[1], vtmp1)); - emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2, - gen_rtx_CONST_INT (SImode, 4))); - emit_insn (gen_smaxv4si3 (operands[0], vtmp2, vtmp3)); +{ + rtx bitshift = operands[2]; + rtx byteshift = gen_reg_rtx (QImode); + HOST_WIDE_INT bitshift_val; + HOST_WIDE_INT byteshift_val; + + if (! CONSTANT_P (bitshift)) + FAIL; + bitshift_val = INTVAL (bitshift); + if (bitshift_val & 0x7) + FAIL; + byteshift_val = bitshift_val >> 3; + byteshift = gen_rtx_CONST_INT (QImode, byteshift_val); + emit_insn (gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1], + byteshift)); DONE; }") -(define_expand "reduc_smax_v4sf" - [(set (match_operand:V4SF 0 "register_operand" "=v") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))] +;; Vector shift left in bits. Currently supported ony for shift +;; amounts that can be expressed as byte shifts (divisible by 8). +;; General shift amounts can be supported using vsro + vsr. We're +;; not expecting to see these yet (the vectorizer currently +;; generates only shifts divisible by byte_size). +(define_expand "vec_shr_<mode>" + [(set (match_operand:V 0 "register_operand" "=v") + (unspec:V [(match_operand:V 1 "register_operand" "v") + (match_operand:QI 2 "reg_or_short_operand" "")] 219 ))] "TARGET_ALTIVEC" " -{ - rtx vtmp1 = gen_reg_rtx (V4SFmode); - rtx vtmp2 = gen_reg_rtx (V4SFmode); - rtx vtmp3 = gen_reg_rtx (V4SFmode); - - emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1], - gen_rtx_CONST_INT (SImode, 8))); - emit_insn (gen_smaxv4sf3 (vtmp2, operands[1], vtmp1)); - emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2, - gen_rtx_CONST_INT (SImode, 4))); - emit_insn (gen_smaxv4sf3 (operands[0], vtmp2, vtmp3)); +{ + rtx bitshift = operands[2]; + rtx byteshift = gen_reg_rtx (QImode); + HOST_WIDE_INT bitshift_val; + HOST_WIDE_INT byteshift_val; + + if (! CONSTANT_P (bitshift)) + FAIL; + bitshift_val = INTVAL (bitshift); + if (bitshift_val & 0x7) + FAIL; + byteshift_val = 16 - (bitshift_val >> 3); + byteshift = gen_rtx_CONST_INT (QImode, byteshift_val); + emit_insn (gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1], + byteshift)); DONE; }") -(define_expand "reduc_umax_v4si" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))] +(define_insn "altivec_vsumsws_nomode" + [(set (match_operand 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] 135)) + (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] "TARGET_ALTIVEC" - " -{ - rtx vtmp1 = gen_reg_rtx (V4SImode); - rtx vtmp2 = gen_reg_rtx (V4SImode); - rtx vtmp3 = gen_reg_rtx (V4SImode); - - emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1], - gen_rtx_CONST_INT (SImode, 8))); - emit_insn (gen_umaxv4si3 (vtmp2, operands[1], vtmp1)); - emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2, - gen_rtx_CONST_INT (SImode, 4))); - emit_insn (gen_umaxv4si3 (operands[0], vtmp2, vtmp3)); - DONE; -}") + "vsumsws %0,%1,%2" + [(set_attr "type" "veccomplex")]) -(define_expand "reduc_smin_v4si" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))] +(define_expand "reduc_splus_<mode>" + [(set (match_operand:VIshort 0 "register_operand" "=v") + (unspec:VIshort [(match_operand:VIshort 1 "register_operand" "v")] 217))] "TARGET_ALTIVEC" " { + rtx vzero = gen_reg_rtx (V4SImode); rtx vtmp1 = gen_reg_rtx (V4SImode); - rtx vtmp2 = gen_reg_rtx (V4SImode); - rtx vtmp3 = gen_reg_rtx (V4SImode); - - emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1], - gen_rtx_CONST_INT (SImode, 8))); - emit_insn (gen_sminv4si3 (vtmp2, operands[1], vtmp1)); - emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2, - gen_rtx_CONST_INT (SImode, 4))); - emit_insn (gen_sminv4si3 (operands[0], vtmp2, vtmp3)); - DONE; -}") -(define_expand "reduc_smin_v4sf" - [(set (match_operand:V4SF 0 "register_operand" "=v") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))] - "TARGET_ALTIVEC" - " -{ - rtx vtmp1 = gen_reg_rtx (V4SFmode); - rtx vtmp2 = gen_reg_rtx (V4SFmode); - rtx vtmp3 = gen_reg_rtx (V4SFmode); - - emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1], - gen_rtx_CONST_INT (SImode, 8))); - emit_insn (gen_sminv4sf3 (vtmp2, operands[1], vtmp1)); - emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2, - gen_rtx_CONST_INT (SImode, 4))); - emit_insn (gen_sminv4sf3 (operands[0], vtmp2, vtmp3)); + emit_insn (gen_altivec_vspltisw (vzero, const0_rtx)); + emit_insn (gen_altivec_vsum4s<VI_char>s (vtmp1, operands[1], vzero)); + emit_insn (gen_altivec_vsumsws_nomode (operands[0], vtmp1, vzero)); DONE; }") -(define_expand "reduc_umin_v4si" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))] +(define_expand "reduc_uplus_v16qi" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")] 217))] "TARGET_ALTIVEC" " { + rtx vzero = gen_reg_rtx (V4SImode); rtx vtmp1 = gen_reg_rtx (V4SImode); - rtx vtmp2 = gen_reg_rtx (V4SImode); - rtx vtmp3 = gen_reg_rtx (V4SImode); - - emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1], - gen_rtx_CONST_INT (SImode, 8))); - emit_insn (gen_uminv4si3 (vtmp2, operands[1], vtmp1)); - emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2, - gen_rtx_CONST_INT (SImode, 4))); - emit_insn (gen_uminv4si3 (operands[0], vtmp2, vtmp3)); - DONE; -}") -(define_expand "reduc_plus_v4si" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))] - "TARGET_ALTIVEC" - " -{ - rtx vtmp1 = gen_reg_rtx (V4SImode); - rtx vtmp2 = gen_reg_rtx (V4SImode); - rtx vtmp3 = gen_reg_rtx (V4SImode); - - emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1], - gen_rtx_CONST_INT (SImode, 8))); - emit_insn (gen_addv4si3 (vtmp2, operands[1], vtmp1)); - emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2, - gen_rtx_CONST_INT (SImode, 4))); - emit_insn (gen_addv4si3 (operands[0], vtmp2, vtmp3)); - DONE; -}") - -(define_expand "reduc_plus_v4sf" - [(set (match_operand:V4SF 0 "register_operand" "=v") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))] - "TARGET_ALTIVEC" - " -{ - rtx vtmp1 = gen_reg_rtx (V4SFmode); - rtx vtmp2 = gen_reg_rtx (V4SFmode); - rtx vtmp3 = gen_reg_rtx (V4SFmode); - - emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1], - gen_rtx_CONST_INT (SImode, 8))); - emit_insn (gen_addv4sf3 (vtmp2, operands[1], vtmp1)); - emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2, - gen_rtx_CONST_INT (SImode, 4))); - emit_insn (gen_addv4sf3 (operands[0], vtmp2, vtmp3)); + emit_insn (gen_altivec_vspltisw (vzero, const0_rtx)); + emit_insn (gen_altivec_vsum4ubs (vtmp1, operands[1], vzero)); + emit_insn (gen_altivec_vsumsws_nomode (operands[0], vtmp1, vzero)); DONE; }") |