aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2011-09-16 21:15:45 +0200
committerJakub Jelinek <jakub@gcc.gnu.org>2011-09-16 21:15:45 +0200
commitc0b0ee6f188f16485717954d20d82a293299ff01 (patch)
tree09d69cbd98459ab74099f74ced05945923ef2646
parent6e2cb3913952ef1b7fd31f4f5b8754f55f689d9a (diff)
downloadgcc-c0b0ee6f188f16485717954d20d82a293299ff01.zip
gcc-c0b0ee6f188f16485717954d20d82a293299ff01.tar.gz
gcc-c0b0ee6f188f16485717954d20d82a293299ff01.tar.bz2
i386.c (ix86_expand_reduc_v4sf): Rename to ...
* config/i386/i386.c (ix86_expand_reduc_v4sf): Rename to ... (ix86_expand_reduc): ... this. Handle also V8SFmode and V4DFmode. * config/i386/sse.md (reduc_splus_v4sf, reduc_smax_v4sf, reduc_smin_v4sf): Adjust callers. (reduc_smax_v8sf, reduc_smin_v8sf, reduc_smax_v4df, reduc_smin_v4df): New expanders. * gcc.dg/vect/vect-reduc-10.c: New test. * gcc.target/i386/avx-reduc-1.c: New test. From-SVN: r178916
-rw-r--r--gcc/ChangeLog7
-rw-r--r--gcc/config/i386/i386-protos.h2
-rw-r--r--gcc/config/i386/i386.c47
-rw-r--r--gcc/config/i386/sse.md42
-rw-r--r--gcc/testsuite/ChangeLog3
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-reduc-10.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx-reduc-1.c48
7 files changed, 183 insertions, 17 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 834ae64..94a61ec 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,12 @@
2011-09-16 Jakub Jelinek <jakub@redhat.com>
+ * config/i386/i386.c (ix86_expand_reduc_v4sf): Rename to ...
+ (ix86_expand_reduc): ... this. Handle also V8SFmode and V4DFmode.
+ * config/i386/sse.md (reduc_splus_v4sf, reduc_smax_v4sf,
+ reduc_smin_v4sf): Adjust callers.
+ (reduc_smax_v8sf, reduc_smin_v8sf, reduc_smax_v4df, reduc_smin_v4df):
+ New expanders.
+
* config/i386/sse.md (vec_extract_hi_<mode>,
vec_extract_hi_v16hi, vec_extract_hi_v32qi): Use
vextracti128 instead of vextractf128 for -mavx2 and
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 900d1c5..707f217 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -211,7 +211,7 @@ extern rtx ix86_tls_module_base (void);
extern void ix86_expand_vector_init (bool, rtx, rtx);
extern void ix86_expand_vector_set (bool, rtx, rtx, int);
extern void ix86_expand_vector_extract (bool, rtx, rtx, int);
-extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx);
+extern void ix86_expand_reduc (rtx (*)(rtx, rtx, rtx), rtx, rtx);
extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned);
extern bool ix86_expand_pinsr (rtx *);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 6e7bcd9..d643839 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -32696,24 +32696,45 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
}
}
-/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
- pattern to reduce; DEST is the destination; IN is the input vector. */
+/* Expand a vector reduction. FN is the binary pattern to reduce;
+ DEST is the destination; IN is the input vector. */
void
-ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
+ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
{
- rtx tmp1, tmp2, tmp3;
+ rtx tmp1, tmp2, tmp3, tmp4, tmp5;
+ enum machine_mode mode = GET_MODE (in);
- tmp1 = gen_reg_rtx (V4SFmode);
- tmp2 = gen_reg_rtx (V4SFmode);
- tmp3 = gen_reg_rtx (V4SFmode);
+ tmp1 = gen_reg_rtx (mode);
+ tmp2 = gen_reg_rtx (mode);
+ tmp3 = gen_reg_rtx (mode);
- emit_insn (gen_sse_movhlps (tmp1, in, in));
- emit_insn (fn (tmp2, tmp1, in));
-
- emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
- const1_rtx, const1_rtx,
- GEN_INT (1+4), GEN_INT (1+4)));
+ switch (mode)
+ {
+ case V4SFmode:
+ emit_insn (gen_sse_movhlps (tmp1, in, in));
+ emit_insn (fn (tmp2, tmp1, in));
+ emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
+ const1_rtx, const1_rtx,
+ GEN_INT (1+4), GEN_INT (1+4)));
+ break;
+ case V8SFmode:
+ tmp4 = gen_reg_rtx (mode);
+ tmp5 = gen_reg_rtx (mode);
+ emit_insn (gen_avx_vperm2f128v8sf3 (tmp4, in, in, const1_rtx));
+ emit_insn (fn (tmp5, tmp4, in));
+ emit_insn (gen_avx_shufps256 (tmp1, tmp5, tmp5, GEN_INT (2+12)));
+ emit_insn (fn (tmp2, tmp1, tmp5));
+ emit_insn (gen_avx_shufps256 (tmp3, tmp2, tmp2, const1_rtx));
+ break;
+ case V4DFmode:
+ emit_insn (gen_avx_vperm2f128v4df3 (tmp1, in, in, const1_rtx));
+ emit_insn (fn (tmp2, tmp1, in));
+ emit_insn (gen_avx_shufpd256 (tmp3, tmp2, tmp2, const1_rtx));
+ break;
+ default:
+ gcc_unreachable ();
+ }
emit_insn (fn (dest, tmp2, tmp3));
}
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 01edc4e..8d46247 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1253,7 +1253,7 @@
emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
}
else
- ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
+ ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
DONE;
})
@@ -1263,7 +1263,7 @@
(match_operand:V4SF 1 "register_operand" "")]
"TARGET_SSE"
{
- ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
+ ix86_expand_reduc (gen_smaxv4sf3, operands[0], operands[1]);
DONE;
})
@@ -1272,7 +1272,43 @@
(match_operand:V4SF 1 "register_operand" "")]
"TARGET_SSE"
{
- ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
+ ix86_expand_reduc (gen_sminv4sf3, operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "reduc_smax_v8sf"
+ [(match_operand:V8SF 0 "register_operand" "")
+ (match_operand:V8SF 1 "register_operand" "")]
+ "TARGET_AVX"
+{
+ ix86_expand_reduc (gen_smaxv8sf3, operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "reduc_smin_v8sf"
+ [(match_operand:V8SF 0 "register_operand" "")
+ (match_operand:V8SF 1 "register_operand" "")]
+ "TARGET_AVX"
+{
+ ix86_expand_reduc (gen_sminv8sf3, operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "reduc_smax_v4df"
+ [(match_operand:V4DF 0 "register_operand" "")
+ (match_operand:V4DF 1 "register_operand" "")]
+ "TARGET_AVX"
+{
+ ix86_expand_reduc (gen_smaxv4df3, operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "reduc_smin_v4df"
+ [(match_operand:V4DF 0 "register_operand" "")
+ (match_operand:V4DF 1 "register_operand" "")]
+ "TARGET_AVX"
+{
+ ix86_expand_reduc (gen_sminv4df3, operands[0], operands[1]);
DONE;
})
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index b9d52b4..010f028 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,8 @@
2011-09-16 Jakub Jelinek <jakub@redhat.com>
+ * gcc.dg/vect/vect-reduc-10.c: New test.
+ * gcc.target/i386/avx-reduc-1.c: New test.
+
* gcc.target/i386/sse2-extract-1.c: New test.
* gcc.target/i386/avx-extract-1.c: New test.
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-10.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-10.c
new file mode 100644
index 0000000..ad21999
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-10.c
@@ -0,0 +1,51 @@
+#include "tree-vect.h"
+
+extern void abort (void);
+double ad[1024];
+float af[1024];
+short as[1024];
+int ai[1024];
+long long all[1024];
+unsigned short aus[1024];
+unsigned int au[1024];
+unsigned long long aull[1024];
+
+#define F(var) \
+__attribute__((noinline, noclone)) __typeof (var[0]) \
+f##var (void) \
+{ \
+ int i; \
+ __typeof (var[0]) r = 0; \
+ for (i = 0; i < 1024; i++) \
+ r = r > var[i] ? r : var[i]; \
+ return r; \
+}
+
+#define TESTS \
+F (ad) F (af) F (as) F (ai) F (all) F (aus) F (au) F (aull)
+
+TESTS
+
+int
+main ()
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < 1024; i++)
+ {
+#undef F
+#define F(var) var[i] = i;
+ TESTS
+ }
+ for (i = 1023; i < 32 * 1024; i += 1024 + 271)
+ {
+#undef F
+#define F(var) var[i & 1023] = i; if (f##var () != i) abort ();
+ TESTS
+ }
+ return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx-reduc-1.c b/gcc/testsuite/gcc.target/i386/avx-reduc-1.c
new file mode 100644
index 0000000..1df1ee0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-reduc-1.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -mavx" } */
+/* { dg-require-effective-target avx_runtime } */
+
+extern void abort (void);
+double ad[1024];
+float af[1024];
+short as[1024];
+int ai[1024];
+long long all[1024];
+unsigned short aus[1024];
+unsigned int au[1024];
+unsigned long long aull[1024];
+
+#define F(var) \
+__attribute__((noinline, noclone)) __typeof (var[0]) \
+f##var (void) \
+{ \
+ int i; \
+ __typeof (var[0]) r = 0; \
+ for (i = 0; i < 1024; i++) \
+ r = r > var[i] ? r : var[i]; \
+ return r; \
+}
+
+#define TESTS \
+F (ad) F (af) F (as) F (ai) F (all) F (aus) F (au) F (aull)
+
+TESTS
+
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 1024; i++)
+ {
+#undef F
+#define F(var) var[i] = i;
+ TESTS
+ }
+ for (i = 1023; i < 32 * 1024; i += 1024 + 271)
+ {
+#undef F
+#define F(var) var[i & 1023] = i; if (f##var () != i) abort ();
+ TESTS
+ }
+ return 0;
+}