diff options
author | Jakub Jelinek <jakub@redhat.com> | 2011-10-13 18:08:04 +0200 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2011-10-13 18:08:04 +0200 |
commit | a520f3c39ddff5c64769c158480de8255e919c85 (patch) | |
tree | 4294158cd9016ef9a0d5e6b259c6ef0c407cd9a4 /gcc | |
parent | 35f5b1c1b98f5f7f6b8882614d5b37fe51c845f6 (diff) | |
download | gcc-a520f3c39ddff5c64769c158480de8255e919c85.zip gcc-a520f3c39ddff5c64769c158480de8255e919c85.tar.gz gcc-a520f3c39ddff5c64769c158480de8255e919c85.tar.bz2 |
sse.md (reduc_umin_v8hi): New pattern.
* config/i386/sse.md (reduc_umin_v8hi): New pattern.
* config/i386/i386.c (ix86_build_const_vector): Handle
also V32QI, V16QI, V16HI and V8HI modes.
(emit_reduc_half): New function.
(ix86_expand_reduc): Use phminposuw insn for V8HImode UMIN.
Use emit_reduc_half helper function.
* gcc.target/i386/sse4_1-phminposuw-2.c: New test.
* gcc.target/i386/sse4_1-phminposuw-3.c: New test.
* gcc.target/i386/avx-vphminposuw-2.c: New test.
* gcc.target/i386/avx-vphminposuw-3.c: New test.
From-SVN: r179929
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 130 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 10 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-vphminposuw-2.c | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-vphminposuw-3.c | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse4_1-phminposuw-2.c | 78 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse4_1-phminposuw-3.c | 6 |
8 files changed, 205 insertions, 49 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 88cbf20..c5567d1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2011-10-13 Jakub Jelinek <jakub@redhat.com> + + * config/i386/sse.md (reduc_umin_v8hi): New pattern. + * config/i386/i386.c (ix86_build_const_vector): Handle + also V32QI, V16QI, V16HI and V8HI modes. + (emit_reduc_half): New function. + (ix86_expand_reduc): Use phminposuw insn for V8HImode UMIN. + Use emit_reduc_half helper function. + 2011-10-13 Lawrence Crowl <crowl@google.com> Diego Novillo <dnovillo@google.com> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 3a53829..a81292b 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -17008,6 +17008,10 @@ ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value) switch (mode) { + case V32QImode: + case V16QImode: + case V16HImode: + case V8HImode: case V8SImode: case V4SImode: case V4DImode: @@ -33250,72 +33254,100 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) } } -/* Expand a vector reduction. FN is the binary pattern to reduce; - DEST is the destination; IN is the input vector. */ +/* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC + to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode. + The upper bits of DEST are undefined, though they shouldn't cause + exceptions (some bits from src or all zeros are ok). */ -void -ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) +static void +emit_reduc_half (rtx dest, rtx src, int i) { - rtx tmp1, tmp2, tmp3, tmp4, tmp5; - enum machine_mode mode = GET_MODE (in); - int i; - - tmp1 = gen_reg_rtx (mode); - tmp2 = gen_reg_rtx (mode); - tmp3 = gen_reg_rtx (mode); - - switch (mode) + rtx tem; + switch (GET_MODE (src)) { case V4SFmode: - emit_insn (gen_sse_movhlps (tmp1, in, in)); - emit_insn (fn (tmp2, tmp1, in)); - emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2, - const1_rtx, const1_rtx, - GEN_INT (1+4), GEN_INT (1+4))); + if (i == 128) + tem = gen_sse_movhlps (dest, src, src); + else + tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx, + GEN_INT (1 + 4), GEN_INT (1 + 4)); + break; + case V2DFmode: + tem = gen_vec_interleave_highv2df (dest, src, src); + break; + case V16QImode: + case V8HImode: + case V4SImode: + case V2DImode: + tem = gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, dest), + gen_lowpart (V1TImode, src), + GEN_INT (i / 2)); break; case V8SFmode: - tmp4 = gen_reg_rtx (mode); - tmp5 = gen_reg_rtx (mode); - emit_insn (gen_avx_vperm2f128v8sf3 (tmp4, in, in, const1_rtx)); - emit_insn (fn (tmp5, tmp4, in)); - emit_insn (gen_avx_shufps256 (tmp1, tmp5, tmp5, GEN_INT (2+12))); - emit_insn (fn (tmp2, tmp1, tmp5)); - emit_insn (gen_avx_shufps256 (tmp3, tmp2, tmp2, const1_rtx)); + if (i == 256) + tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx); + else + tem = gen_avx_shufps256 (dest, src, src, + GEN_INT (i == 128 ? 2 + (3 << 2) : 1)); break; case V4DFmode: - emit_insn (gen_avx_vperm2f128v4df3 (tmp1, in, in, const1_rtx)); - emit_insn (fn (tmp2, tmp1, in)); - emit_insn (gen_avx_shufpd256 (tmp3, tmp2, tmp2, const1_rtx)); + if (i == 256) + tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx); + else + tem = gen_avx_shufpd256 (dest, src, src, const1_rtx); break; case V32QImode: case V16HImode: case V8SImode: case V4DImode: - emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, tmp1), - gen_lowpart (V4DImode, in), - gen_lowpart (V4DImode, in), - const1_rtx)); - tmp4 = in; - tmp5 = tmp1; - for (i = 64; i >= GET_MODE_BITSIZE (GET_MODE_INNER (mode)); i >>= 1) - { - if (i != 64) - { - tmp2 = gen_reg_rtx (mode); - tmp3 = gen_reg_rtx (mode); - } - emit_insn (fn (tmp2, tmp4, tmp5)); - emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, tmp3), - gen_lowpart (V2TImode, tmp2), - GEN_INT (i))); - tmp4 = tmp2; - tmp5 = tmp3; - } + if (i == 256) + tem = gen_avx2_permv2ti (gen_lowpart (V4DImode, dest), + gen_lowpart (V4DImode, src), + gen_lowpart (V4DImode, src), + const1_rtx); + else + tem = gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, dest), + gen_lowpart (V2TImode, src), + GEN_INT (i / 2)); break; default: gcc_unreachable (); } - emit_insn (fn (dest, tmp2, tmp3)); + emit_insn (tem); +} + +/* Expand a vector reduction. FN is the binary pattern to reduce; + DEST is the destination; IN is the input vector. */ + +void +ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) +{ + rtx half, dst, vec = in; + enum machine_mode mode = GET_MODE (in); + int i; + + /* SSE4 has a special instruction for V8HImode UMIN reduction. */ + if (TARGET_SSE4_1 + && mode == V8HImode + && fn == gen_uminv8hi3) + { + emit_insn (gen_sse4_1_phminposuw (dest, in)); + return; + } + + for (i = GET_MODE_BITSIZE (mode); + i > GET_MODE_BITSIZE (GET_MODE_INNER (mode)); + i >>= 1) + { + half = gen_reg_rtx (mode); + emit_reduc_half (half, vec, i); + if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2) + dst = dest; + else + dst = gen_reg_rtx (mode); + emit_insn (fn (dst, half, vec)); + vec = dst; + } } /* Target hook for scalar_mode_supported_p. */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 3073ab2..748a66c 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1303,6 +1303,16 @@ DONE; }) +(define_expand "reduc_umin_v8hi" + [(umin:V8HI + (match_operand:V8HI 0 "register_operand" "") + (match_operand:V8HI 1 "register_operand" ""))] + "TARGET_SSE4_1" +{ + ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]); + DONE; +}) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel floating point comparisons diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 41bacce..9f06ab4 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2011-10-13 Jakub Jelinek <jakub@redhat.com> + + * gcc.target/i386/sse4_1-phminposuw-2.c: New test. + * gcc.target/i386/sse4_1-phminposuw-3.c: New test. + * gcc.target/i386/avx-vphminposuw-2.c: New test. + * gcc.target/i386/avx-vphminposuw-3.c: New test. + 2011-10-13 H.J. Lu <hongjiu.lu@intel.com> * gcc.target/i386/pr50712.c: Check ia32 instead of ilp32. diff --git a/gcc/testsuite/gcc.target/i386/avx-vphminposuw-2.c b/gcc/testsuite/gcc.target/i386/avx-vphminposuw-2.c new file mode 100644 index 0000000..3ae122c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vphminposuw-2.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O3 -mavx -mno-avx2" } */ + +#define CHECK_H "avx-check.h" +#define TEST avx_test + +#include "sse4_1-phminposuw-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx-vphminposuw-3.c b/gcc/testsuite/gcc.target/i386/avx-vphminposuw-3.c new file mode 100644 index 0000000..4a37ba5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vphminposuw-3.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx -mno-avx2" } */ + +#include "avx-vphminposuw-2.c" + +/* { dg-final { scan-assembler "vphminposuw\[^\n\r\]*xmm" } } */ diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-phminposuw-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-phminposuw-2.c new file mode 100644 index 0000000..c9f9c1c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-phminposuw-2.c @@ -0,0 +1,78 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O3 -msse4.1 -mno-avx2" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +extern void abort (void); + +#define N 1024 +short a[N], c, e; +unsigned short b[N], d, f; + +__attribute__((noinline)) short +vecsmax (void) +{ + int i; + short r = -32768; + for (i = 0; i < N; ++i) + if (r < a[i]) r = a[i]; + return r; +} + +__attribute__((noinline)) unsigned short +vecumax (void) +{ + int i; + unsigned short r = 0; + for (i = 0; i < N; ++i) + if (r < b[i]) r = b[i]; + return r; +} + +__attribute__((noinline)) short +vecsmin (void) +{ + int i; + short r = 32767; + for (i = 0; i < N; ++i) + if (r > a[i]) r = a[i]; + return r; +} + +__attribute__((noinline)) unsigned short +vecumin (void) +{ + int i; + unsigned short r = 65535; + for (i = 0; i < N; ++i) + if (r > b[i]) r = b[i]; + return r; +} + +static void +TEST (void) +{ + int i; + for (i = 0; i < N; ++i) + { + a[i] = i - N / 2; + b[i] = i + 32768 - N / 2; + } + a[N / 3] = N; + a[2 * N / 3] = -N; + b[N / 5] = 32768 + N; + b[4 * N / 5] = 32768 - N; + if (vecsmax () != N || vecsmin () != -N) + abort (); + if (vecumax () != 32768 + N || vecumin () != 32768 - N) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-phminposuw-3.c b/gcc/testsuite/gcc.target/i386/sse4_1-phminposuw-3.c new file mode 100644 index 0000000..95c5f05 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-phminposuw-3.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -msse4.1 -mno-avx2" } */ + +#include "sse4_1-phminposuw-2.c" + +/* { dg-final { scan-assembler "phminposuw\[^\n\r\]*xmm" } } */ |