diff options
author | Jakub Jelinek <jakub@redhat.com> | 2011-09-16 21:15:45 +0200 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2011-09-16 21:15:45 +0200 |
commit | c0b0ee6f188f16485717954d20d82a293299ff01 (patch) | |
tree | 09d69cbd98459ab74099f74ced05945923ef2646 | |
parent | 6e2cb3913952ef1b7fd31f4f5b8754f55f689d9a (diff) | |
download | gcc-c0b0ee6f188f16485717954d20d82a293299ff01.zip gcc-c0b0ee6f188f16485717954d20d82a293299ff01.tar.gz gcc-c0b0ee6f188f16485717954d20d82a293299ff01.tar.bz2 |
i386.c (ix86_expand_reduc_v4sf): Rename to ...
* config/i386/i386.c (ix86_expand_reduc_v4sf): Rename to ...
(ix86_expand_reduc): ... this. Handle also V8SFmode and V4DFmode.
* config/i386/sse.md (reduc_splus_v4sf, reduc_smax_v4sf,
reduc_smin_v4sf): Adjust callers.
(reduc_smax_v8sf, reduc_smin_v8sf, reduc_smax_v4df, reduc_smin_v4df):
New expanders.
* gcc.dg/vect/vect-reduc-10.c: New test.
* gcc.target/i386/avx-reduc-1.c: New test.
From-SVN: r178916
-rw-r--r-- | gcc/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/config/i386/i386-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 47 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 42 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 3 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-reduc-10.c | 51 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-reduc-1.c | 48 |
7 files changed, 183 insertions, 17 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 834ae64..94a61ec 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,12 @@ 2011-09-16 Jakub Jelinek <jakub@redhat.com> + * config/i386/i386.c (ix86_expand_reduc_v4sf): Rename to ... + (ix86_expand_reduc): ... this. Handle also V8SFmode and V4DFmode. + * config/i386/sse.md (reduc_splus_v4sf, reduc_smax_v4sf, + reduc_smin_v4sf): Adjust callers. + (reduc_smax_v8sf, reduc_smin_v8sf, reduc_smax_v4df, reduc_smin_v4df): + New expanders. + * config/i386/sse.md (vec_extract_hi_<mode>, vec_extract_hi_v16hi, vec_extract_hi_v32qi): Use vextracti128 instead of vextractf128 for -mavx2 and diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 900d1c5..707f217 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -211,7 +211,7 @@ extern rtx ix86_tls_module_base (void); extern void ix86_expand_vector_init (bool, rtx, rtx); extern void ix86_expand_vector_set (bool, rtx, rtx, int); extern void ix86_expand_vector_extract (bool, rtx, rtx, int); -extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx); +extern void ix86_expand_reduc (rtx (*)(rtx, rtx, rtx), rtx, rtx); extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned); extern bool ix86_expand_pinsr (rtx *); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 6e7bcd9..d643839 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -32696,24 +32696,45 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) } } -/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary - pattern to reduce; DEST is the destination; IN is the input vector. */ +/* Expand a vector reduction. FN is the binary pattern to reduce; + DEST is the destination; IN is the input vector. */ void -ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) +ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) { - rtx tmp1, tmp2, tmp3; + rtx tmp1, tmp2, tmp3, tmp4, tmp5; + enum machine_mode mode = GET_MODE (in); - tmp1 = gen_reg_rtx (V4SFmode); - tmp2 = gen_reg_rtx (V4SFmode); - tmp3 = gen_reg_rtx (V4SFmode); + tmp1 = gen_reg_rtx (mode); + tmp2 = gen_reg_rtx (mode); + tmp3 = gen_reg_rtx (mode); - emit_insn (gen_sse_movhlps (tmp1, in, in)); - emit_insn (fn (tmp2, tmp1, in)); - - emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2, - const1_rtx, const1_rtx, - GEN_INT (1+4), GEN_INT (1+4))); + switch (mode) + { + case V4SFmode: + emit_insn (gen_sse_movhlps (tmp1, in, in)); + emit_insn (fn (tmp2, tmp1, in)); + emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2, + const1_rtx, const1_rtx, + GEN_INT (1+4), GEN_INT (1+4))); + break; + case V8SFmode: + tmp4 = gen_reg_rtx (mode); + tmp5 = gen_reg_rtx (mode); + emit_insn (gen_avx_vperm2f128v8sf3 (tmp4, in, in, const1_rtx)); + emit_insn (fn (tmp5, tmp4, in)); + emit_insn (gen_avx_shufps256 (tmp1, tmp5, tmp5, GEN_INT (2+12))); + emit_insn (fn (tmp2, tmp1, tmp5)); + emit_insn (gen_avx_shufps256 (tmp3, tmp2, tmp2, const1_rtx)); + break; + case V4DFmode: + emit_insn (gen_avx_vperm2f128v4df3 (tmp1, in, in, const1_rtx)); + emit_insn (fn (tmp2, tmp1, in)); + emit_insn (gen_avx_shufpd256 (tmp3, tmp2, tmp2, const1_rtx)); + break; + default: + gcc_unreachable (); + } emit_insn (fn (dest, tmp2, tmp3)); } diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 01edc4e..8d46247 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1253,7 +1253,7 @@ emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp)); } else - ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]); + ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]); DONE; }) @@ -1263,7 +1263,7 @@ (match_operand:V4SF 1 "register_operand" "")] "TARGET_SSE" { - ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]); + ix86_expand_reduc (gen_smaxv4sf3, operands[0], operands[1]); DONE; }) @@ -1272,7 +1272,43 @@ (match_operand:V4SF 1 "register_operand" "")] "TARGET_SSE" { - ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]); + ix86_expand_reduc (gen_sminv4sf3, operands[0], operands[1]); + DONE; +}) + +(define_expand "reduc_smax_v8sf" + [(match_operand:V8SF 0 "register_operand" "") + (match_operand:V8SF 1 "register_operand" "")] + "TARGET_AVX" +{ + ix86_expand_reduc (gen_smaxv8sf3, operands[0], operands[1]); + DONE; +}) + +(define_expand "reduc_smin_v8sf" + [(match_operand:V8SF 0 "register_operand" "") + (match_operand:V8SF 1 "register_operand" "")] + "TARGET_AVX" +{ + ix86_expand_reduc (gen_sminv8sf3, operands[0], operands[1]); + DONE; +}) + +(define_expand "reduc_smax_v4df" + [(match_operand:V4DF 0 "register_operand" "") + (match_operand:V4DF 1 "register_operand" "")] + "TARGET_AVX" +{ + ix86_expand_reduc (gen_smaxv4df3, operands[0], operands[1]); + DONE; +}) + +(define_expand "reduc_smin_v4df" + [(match_operand:V4DF 0 "register_operand" "") + (match_operand:V4DF 1 "register_operand" "")] + "TARGET_AVX" +{ + ix86_expand_reduc (gen_sminv4df3, operands[0], operands[1]); DONE; }) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b9d52b4..010f028 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,8 @@ 2011-09-16 Jakub Jelinek <jakub@redhat.com> + * gcc.dg/vect/vect-reduc-10.c: New test. + * gcc.target/i386/avx-reduc-1.c: New test. + * gcc.target/i386/sse2-extract-1.c: New test. * gcc.target/i386/avx-extract-1.c: New test. diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-10.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-10.c new file mode 100644 index 0000000..ad21999 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-10.c @@ -0,0 +1,51 @@ +#include "tree-vect.h" + +extern void abort (void); +double ad[1024]; +float af[1024]; +short as[1024]; +int ai[1024]; +long long all[1024]; +unsigned short aus[1024]; +unsigned int au[1024]; +unsigned long long aull[1024]; + +#define F(var) \ +__attribute__((noinline, noclone)) __typeof (var[0]) \ +f##var (void) \ +{ \ + int i; \ + __typeof (var[0]) r = 0; \ + for (i = 0; i < 1024; i++) \ + r = r > var[i] ? r : var[i]; \ + return r; \ +} + +#define TESTS \ +F (ad) F (af) F (as) F (ai) F (all) F (aus) F (au) F (aull) + +TESTS + +int +main () +{ + int i; + + check_vect (); + + for (i = 0; i < 1024; i++) + { +#undef F +#define F(var) var[i] = i; + TESTS + } + for (i = 1023; i < 32 * 1024; i += 1024 + 271) + { +#undef F +#define F(var) var[i & 1023] = i; if (f##var () != i) abort (); + TESTS + } + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-reduc-1.c b/gcc/testsuite/gcc.target/i386/avx-reduc-1.c new file mode 100644 index 0000000..1df1ee0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-reduc-1.c @@ -0,0 +1,48 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -mavx" } */ +/* { dg-require-effective-target avx_runtime } */ + +extern void abort (void); +double ad[1024]; +float af[1024]; +short as[1024]; +int ai[1024]; +long long all[1024]; +unsigned short aus[1024]; +unsigned int au[1024]; +unsigned long long aull[1024]; + +#define F(var) \ +__attribute__((noinline, noclone)) __typeof (var[0]) \ +f##var (void) \ +{ \ + int i; \ + __typeof (var[0]) r = 0; \ + for (i = 0; i < 1024; i++) \ + r = r > var[i] ? r : var[i]; \ + return r; \ +} + +#define TESTS \ +F (ad) F (af) F (as) F (ai) F (all) F (aus) F (au) F (aull) + +TESTS + +int +main () +{ + int i; + for (i = 0; i < 1024; i++) + { +#undef F +#define F(var) var[i] = i; + TESTS + } + for (i = 1023; i < 32 * 1024; i += 1024 + 271) + { +#undef F +#define F(var) var[i & 1023] = i; if (f##var () != i) abort (); + TESTS + } + return 0; +} |