aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/rs6000/altivec.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/rs6000/altivec.md')
-rw-r--r--gcc/config/rs6000/altivec.md195
1 files changed, 69 insertions, 126 deletions
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 3b20447..7bfd5d9 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1825,157 +1825,100 @@
operands[3] = gen_reg_rtx (GET_MODE (operands[0]));
})
-;; Reduction
-
-(define_expand "reduc_smax_v4si"
- [(set (match_operand:V4SI 0 "register_operand" "=v")
- (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
+;; Vector shift left in bits. Currently supported ony for shift
+;; amounts that can be expressed as byte shifts (divisible by 8).
+;; General shift amounts can be supported using vslo + vsl. We're
+;; not expecting to see these yet (the vectorizer currently
+;; generates only shifts divisible by byte_size).
+(define_expand "vec_shl_<mode>"
+ [(set (match_operand:V 0 "register_operand" "=v")
+ (unspec:V [(match_operand:V 1 "register_operand" "v")
+ (match_operand:QI 2 "reg_or_short_operand" "")] 219 ))]
"TARGET_ALTIVEC"
"
-{
- rtx vtmp1 = gen_reg_rtx (V4SImode);
- rtx vtmp2 = gen_reg_rtx (V4SImode);
- rtx vtmp3 = gen_reg_rtx (V4SImode);
-
- emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
- gen_rtx_CONST_INT (SImode, 8)));
- emit_insn (gen_smaxv4si3 (vtmp2, operands[1], vtmp1));
- emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
- gen_rtx_CONST_INT (SImode, 4)));
- emit_insn (gen_smaxv4si3 (operands[0], vtmp2, vtmp3));
+{
+ rtx bitshift = operands[2];
+ rtx byteshift = gen_reg_rtx (QImode);
+ HOST_WIDE_INT bitshift_val;
+ HOST_WIDE_INT byteshift_val;
+
+ if (! CONSTANT_P (bitshift))
+ FAIL;
+ bitshift_val = INTVAL (bitshift);
+ if (bitshift_val & 0x7)
+ FAIL;
+ byteshift_val = bitshift_val >> 3;
+ byteshift = gen_rtx_CONST_INT (QImode, byteshift_val);
+ emit_insn (gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
+ byteshift));
DONE;
}")
-(define_expand "reduc_smax_v4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=v")
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))]
+;; Vector shift left in bits. Currently supported ony for shift
+;; amounts that can be expressed as byte shifts (divisible by 8).
+;; General shift amounts can be supported using vsro + vsr. We're
+;; not expecting to see these yet (the vectorizer currently
+;; generates only shifts divisible by byte_size).
+(define_expand "vec_shr_<mode>"
+ [(set (match_operand:V 0 "register_operand" "=v")
+ (unspec:V [(match_operand:V 1 "register_operand" "v")
+ (match_operand:QI 2 "reg_or_short_operand" "")] 219 ))]
"TARGET_ALTIVEC"
"
-{
- rtx vtmp1 = gen_reg_rtx (V4SFmode);
- rtx vtmp2 = gen_reg_rtx (V4SFmode);
- rtx vtmp3 = gen_reg_rtx (V4SFmode);
-
- emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1],
- gen_rtx_CONST_INT (SImode, 8)));
- emit_insn (gen_smaxv4sf3 (vtmp2, operands[1], vtmp1));
- emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2,
- gen_rtx_CONST_INT (SImode, 4)));
- emit_insn (gen_smaxv4sf3 (operands[0], vtmp2, vtmp3));
+{
+ rtx bitshift = operands[2];
+ rtx byteshift = gen_reg_rtx (QImode);
+ HOST_WIDE_INT bitshift_val;
+ HOST_WIDE_INT byteshift_val;
+
+ if (! CONSTANT_P (bitshift))
+ FAIL;
+ bitshift_val = INTVAL (bitshift);
+ if (bitshift_val & 0x7)
+ FAIL;
+ byteshift_val = 16 - (bitshift_val >> 3);
+ byteshift = gen_rtx_CONST_INT (QImode, byteshift_val);
+ emit_insn (gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
+ byteshift));
DONE;
}")
-(define_expand "reduc_umax_v4si"
- [(set (match_operand:V4SI 0 "register_operand" "=v")
- (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
+(define_insn "altivec_vsumsws_nomode"
+ [(set (match_operand 0 "register_operand" "=v")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v")] 135))
+ (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
"TARGET_ALTIVEC"
- "
-{
- rtx vtmp1 = gen_reg_rtx (V4SImode);
- rtx vtmp2 = gen_reg_rtx (V4SImode);
- rtx vtmp3 = gen_reg_rtx (V4SImode);
-
- emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
- gen_rtx_CONST_INT (SImode, 8)));
- emit_insn (gen_umaxv4si3 (vtmp2, operands[1], vtmp1));
- emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
- gen_rtx_CONST_INT (SImode, 4)));
- emit_insn (gen_umaxv4si3 (operands[0], vtmp2, vtmp3));
- DONE;
-}")
+ "vsumsws %0,%1,%2"
+ [(set_attr "type" "veccomplex")])
-(define_expand "reduc_smin_v4si"
- [(set (match_operand:V4SI 0 "register_operand" "=v")
- (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
+(define_expand "reduc_splus_<mode>"
+ [(set (match_operand:VIshort 0 "register_operand" "=v")
+ (unspec:VIshort [(match_operand:VIshort 1 "register_operand" "v")] 217))]
"TARGET_ALTIVEC"
"
{
+ rtx vzero = gen_reg_rtx (V4SImode);
rtx vtmp1 = gen_reg_rtx (V4SImode);
- rtx vtmp2 = gen_reg_rtx (V4SImode);
- rtx vtmp3 = gen_reg_rtx (V4SImode);
-
- emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
- gen_rtx_CONST_INT (SImode, 8)));
- emit_insn (gen_sminv4si3 (vtmp2, operands[1], vtmp1));
- emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
- gen_rtx_CONST_INT (SImode, 4)));
- emit_insn (gen_sminv4si3 (operands[0], vtmp2, vtmp3));
- DONE;
-}")
-(define_expand "reduc_smin_v4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=v")
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))]
- "TARGET_ALTIVEC"
- "
-{
- rtx vtmp1 = gen_reg_rtx (V4SFmode);
- rtx vtmp2 = gen_reg_rtx (V4SFmode);
- rtx vtmp3 = gen_reg_rtx (V4SFmode);
-
- emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1],
- gen_rtx_CONST_INT (SImode, 8)));
- emit_insn (gen_sminv4sf3 (vtmp2, operands[1], vtmp1));
- emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2,
- gen_rtx_CONST_INT (SImode, 4)));
- emit_insn (gen_sminv4sf3 (operands[0], vtmp2, vtmp3));
+ emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
+ emit_insn (gen_altivec_vsum4s<VI_char>s (vtmp1, operands[1], vzero));
+ emit_insn (gen_altivec_vsumsws_nomode (operands[0], vtmp1, vzero));
DONE;
}")
-(define_expand "reduc_umin_v4si"
- [(set (match_operand:V4SI 0 "register_operand" "=v")
- (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
+(define_expand "reduc_uplus_v16qi"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")] 217))]
"TARGET_ALTIVEC"
"
{
+ rtx vzero = gen_reg_rtx (V4SImode);
rtx vtmp1 = gen_reg_rtx (V4SImode);
- rtx vtmp2 = gen_reg_rtx (V4SImode);
- rtx vtmp3 = gen_reg_rtx (V4SImode);
-
- emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
- gen_rtx_CONST_INT (SImode, 8)));
- emit_insn (gen_uminv4si3 (vtmp2, operands[1], vtmp1));
- emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
- gen_rtx_CONST_INT (SImode, 4)));
- emit_insn (gen_uminv4si3 (operands[0], vtmp2, vtmp3));
- DONE;
-}")
-(define_expand "reduc_plus_v4si"
- [(set (match_operand:V4SI 0 "register_operand" "=v")
- (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
- "TARGET_ALTIVEC"
- "
-{
- rtx vtmp1 = gen_reg_rtx (V4SImode);
- rtx vtmp2 = gen_reg_rtx (V4SImode);
- rtx vtmp3 = gen_reg_rtx (V4SImode);
-
- emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
- gen_rtx_CONST_INT (SImode, 8)));
- emit_insn (gen_addv4si3 (vtmp2, operands[1], vtmp1));
- emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
- gen_rtx_CONST_INT (SImode, 4)));
- emit_insn (gen_addv4si3 (operands[0], vtmp2, vtmp3));
- DONE;
-}")
-
-(define_expand "reduc_plus_v4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=v")
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))]
- "TARGET_ALTIVEC"
- "
-{
- rtx vtmp1 = gen_reg_rtx (V4SFmode);
- rtx vtmp2 = gen_reg_rtx (V4SFmode);
- rtx vtmp3 = gen_reg_rtx (V4SFmode);
-
- emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1],
- gen_rtx_CONST_INT (SImode, 8)));
- emit_insn (gen_addv4sf3 (vtmp2, operands[1], vtmp1));
- emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2,
- gen_rtx_CONST_INT (SImode, 4)));
- emit_insn (gen_addv4sf3 (operands[0], vtmp2, vtmp3));
+ emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
+ emit_insn (gen_altivec_vsum4ubs (vtmp1, operands[1], vzero));
+ emit_insn (gen_altivec_vsumsws_nomode (operands[0], vtmp1, vzero));
DONE;
}")