diff options
author | Jakub Jelinek <jakub@redhat.com> | 2012-06-15 13:07:47 +0200 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2012-06-15 13:07:47 +0200 |
commit | 079c527f5e7593f8563e3415ad04cf1f0e5d9269 (patch) | |
tree | 5c72c952d5e64996ab124de5c892988b2985f3d4 | |
parent | c55224dc4d22bbb0536f2386371f362fe066527e (diff) | |
download | gcc-079c527f5e7593f8563e3415ad04cf1f0e5d9269.zip gcc-079c527f5e7593f8563e3415ad04cf1f0e5d9269.tar.gz gcc-079c527f5e7593f8563e3415ad04cf1f0e5d9269.tar.bz2 |
re PR tree-optimization/51581 (Integer division by constant is not vectorized)
PR tree-optimization/51581
* expr.h (choose_multiplier): New prototype.
* expmed.c (choose_multiplier): No longer static.
Change multiplier_ptr from rtx * to UHWI *.
(expand_divmod): Adjust callers.
* tree-vect-patterns.c (vect_recog_sdivmod_pow2_pattern):
Renamed to...
(vect_recog_divmod_pattern): ... this. Pass bb_vinfo as last
argument to new_stmt_vec_info. Attempt to optimize also divisions
by non-pow2 constants if integer vector division isn't supported.
* tree-vect-stmts.c (vect_analyze_stmt): If node != NULL,
don't look at pattern stmts and sequences.
* gcc.c-torture/execute/pr51581-1.c: New test.
* gcc.c-torture/execute/pr51581-2.c: New test.
* gcc.dg/vect/pr51581-1.c: New test.
* gcc.dg/vect/pr51581-2.c: New test.
* gcc.dg/vect/pr51581-3.c: New test.
* gcc.target/i386/avx-pr51581-1.c: New test.
* gcc.target/i386/avx-pr51581-2.c: New test.
* gcc.target/i386/avx2-pr51581-1.c: New test.
* gcc.target/i386/avx2-pr51581-2.c: New test.
* gcc.dg/vect/slp-26.c (main1): Divide by 0x8031 instead of 3.
From-SVN: r188656
-rw-r--r-- | gcc/ChangeLog | 15 | ||||
-rw-r--r-- | gcc/expmed.c | 33 | ||||
-rw-r--r-- | gcc/expr.h | 7 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.c-torture/execute/pr51581-1.c | 157 | ||||
-rw-r--r-- | gcc/testsuite/gcc.c-torture/execute/pr51581-2.c | 173 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr51581-1.c | 18 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr51581-2.c | 18 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr51581-3.c | 118 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/slp-26.c | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-pr51581-1.c | 23 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-pr51581-2.c | 23 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx2-pr51581-1.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx2-pr51581-2.c | 9 | ||||
-rw-r--r-- | gcc/tree-vect-patterns.c | 510 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.c | 6 |
16 files changed, 1031 insertions, 108 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0322fbf..00cffd6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2012-06-15 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/51581 + * expr.h (choose_multiplier): New prototype. + * expmed.c (choose_multiplier): No longer static. + Change multiplier_ptr from rtx * to UHWI *. + (expand_divmod): Adjust callers. + * tree-vect-patterns.c (vect_recog_sdivmod_pow2_pattern): + Renamed to... + (vect_recog_divmod_pattern): ... this. Pass bb_vinfo as last + argument to new_stmt_vec_info. Attempt to optimize also divisions + by non-pow2 constants if integer vector division isn't supported. + * tree-vect-stmts.c (vect_analyze_stmt): If node != NULL, + don't look at pattern stmts and sequences. + 2012-06-15 Eric Botcazou <ebotcazou@adacore.com> PR middle-end/53590 diff --git a/gcc/expmed.c b/gcc/expmed.c index 98f7c09..b456bac 100644 --- a/gcc/expmed.c +++ b/gcc/expmed.c @@ -2363,8 +2363,6 @@ static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT, struct algorithm *, enum mult_variant *, int); static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx, const struct algorithm *, enum mult_variant); -static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int, - int, rtx *, int *, int *); static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); static rtx extract_high_half (enum machine_mode, rtx); static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int); @@ -3293,10 +3291,10 @@ ceil_log2 (unsigned HOST_WIDE_INT x) Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR), where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */ -static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, - rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr) + unsigned HOST_WIDE_INT *multiplier_ptr, + int *post_shift_ptr, int *lgup_ptr) { HOST_WIDE_INT mhigh_hi, mlow_hi; unsigned HOST_WIDE_INT mhigh_lo, mlow_lo; @@ -3368,12 +3366,12 @@ choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, if (n < HOST_BITS_PER_WIDE_INT) { unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1; - *multiplier_ptr = GEN_INT (mhigh_lo & mask); + *multiplier_ptr = mhigh_lo & mask; return mhigh_lo >= mask; } else { - *multiplier_ptr = GEN_INT (mhigh_lo); + *multiplier_ptr = mhigh_lo; return mhigh_hi; } } @@ -4053,10 +4051,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, { if (unsignedp) { - unsigned HOST_WIDE_INT mh; + unsigned HOST_WIDE_INT mh, ml; int pre_shift, post_shift; int dummy; - rtx ml; unsigned HOST_WIDE_INT d = (INTVAL (op1) & GET_MODE_MASK (compute_mode)); @@ -4118,7 +4115,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, = (shift_cost[speed][compute_mode][post_shift - 1] + shift_cost[speed][compute_mode][1] + 2 * add_cost[speed][compute_mode]); - t1 = expand_mult_highpart (compute_mode, op0, ml, + t1 = expand_mult_highpart (compute_mode, op0, + GEN_INT (ml), NULL_RTX, 1, max_cost - extra_cost); if (t1 == 0) @@ -4149,7 +4147,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, extra_cost = (shift_cost[speed][compute_mode][pre_shift] + shift_cost[speed][compute_mode][post_shift]); - t2 = expand_mult_highpart (compute_mode, t1, ml, + t2 = expand_mult_highpart (compute_mode, t1, + GEN_INT (ml), NULL_RTX, 1, max_cost - extra_cost); if (t2 == 0) @@ -4262,8 +4261,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, else if (size <= HOST_BITS_PER_WIDE_INT) { choose_multiplier (abs_d, size, size - 1, - &mlr, &post_shift, &lgup); - ml = (unsigned HOST_WIDE_INT) INTVAL (mlr); + &ml, &post_shift, &lgup); if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1)) { rtx t1, t2, t3; @@ -4275,8 +4273,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, extra_cost = (shift_cost[speed][compute_mode][post_shift] + shift_cost[speed][compute_mode][size - 1] + add_cost[speed][compute_mode]); - t1 = expand_mult_highpart (compute_mode, op0, mlr, - NULL_RTX, 0, + t1 = expand_mult_highpart (compute_mode, op0, + GEN_INT (ml), NULL_RTX, 0, max_cost - extra_cost); if (t1 == 0) goto fail1; @@ -4356,10 +4354,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, /* We will come here only for signed operations. */ if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size) { - unsigned HOST_WIDE_INT mh; + unsigned HOST_WIDE_INT mh, ml; int pre_shift, lgup, post_shift; HOST_WIDE_INT d = INTVAL (op1); - rtx ml; if (d > 0) { @@ -4399,8 +4396,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, extra_cost = (shift_cost[speed][compute_mode][post_shift] + shift_cost[speed][compute_mode][size - 1] + 2 * add_cost[speed][compute_mode]); - t3 = expand_mult_highpart (compute_mode, t2, ml, - NULL_RTX, 1, + t3 = expand_mult_highpart (compute_mode, t2, + GEN_INT (ml), NULL_RTX, 1, max_cost - extra_cost); if (t3 != 0) { @@ -243,6 +243,13 @@ extern rtx emit_store_flag (rtx, enum rtx_code, rtx, rtx, enum machine_mode, /* Like emit_store_flag, but always succeeds. */ extern rtx emit_store_flag_force (rtx, enum rtx_code, rtx, rtx, enum machine_mode, int, int); + +/* Choose a minimal N + 1 bit approximation to 1/D that can be used to + replace division by D, and put the least significant N bits of the result + in *MULTIPLIER_PTR and return the most significant bit. */ +extern unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int, + int, unsigned HOST_WIDE_INT *, + int *, int *); /* Functions from builtins.c: */ extern rtx expand_builtin (tree, rtx, rtx, enum machine_mode, int); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index cbf9c02..f3d10e3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,17 @@ +2012-06-15 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/51581 + * gcc.c-torture/execute/pr51581-1.c: New test. + * gcc.c-torture/execute/pr51581-2.c: New test. + * gcc.dg/vect/pr51581-1.c: New test. + * gcc.dg/vect/pr51581-2.c: New test. + * gcc.dg/vect/pr51581-3.c: New test. + * gcc.target/i386/avx-pr51581-1.c: New test. + * gcc.target/i386/avx-pr51581-2.c: New test. + * gcc.target/i386/avx2-pr51581-1.c: New test. + * gcc.target/i386/avx2-pr51581-2.c: New test. + * gcc.dg/vect/slp-26.c (main1): Divide by 0x8031 instead of 3. + 2012-06-15 Richard Guenther <rguenther@suse.de> * gcc.c-torture/execute/20120615-1.c: New testcase. diff --git a/gcc/testsuite/gcc.c-torture/execute/pr51581-1.c b/gcc/testsuite/gcc.c-torture/execute/pr51581-1.c new file mode 100644 index 0000000..396b7aa --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr51581-1.c @@ -0,0 +1,157 @@ +/* PR tree-optimization/51581 */ + +extern void abort (void); + +#define N 4096 +int a[N], c[N]; +unsigned int b[N], d[N]; + +__attribute__((noinline, noclone)) void +f1 (void) +{ + int i; + for (i = 0; i < N; i++) + c[i] = a[i] / 3; +} + +__attribute__((noinline, noclone)) void +f2 (void) +{ + int i; + for (i = 0; i < N; i++) + d[i] = b[i] / 3; +} + +__attribute__((noinline, noclone)) void +f3 (void) +{ + int i; + for (i = 0; i < N; i++) + c[i] = a[i] / 18; +} + +__attribute__((noinline, noclone)) void +f4 (void) +{ + int i; + for (i = 0; i < N; i++) + d[i] = b[i] / 18; +} + +__attribute__((noinline, noclone)) void +f5 (void) +{ + int i; + for (i = 0; i < N; i++) + c[i] = a[i] / 19; +} + +__attribute__((noinline, noclone)) void +f6 (void) +{ + int i; + for (i = 0; i < N; i++) + d[i] = b[i] / 19; +} + +#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8 +__attribute__((noinline, noclone)) void +f7 (void) +{ + int i; + for (i = 0; i < N; i++) + c[i] = (int) ((unsigned long long) (a[i] * 0x55555556LL) >> 32) - (a[i] >> 31); +} + +__attribute__((noinline, noclone)) void +f8 (void) +{ + int i; + for (i = 0; i < N; i++) + d[i] = ((unsigned int) ((b[i] * 0xaaaaaaabULL) >> 32) >> 1); +} + +__attribute__((noinline, noclone)) void +f9 (void) +{ + int i; + for (i = 0; i < N; i++) + c[i] = (((int) ((unsigned long long) (a[i] * 0x38e38e39LL) >> 32)) >> 2) - (a[i] >> 31); +} + +__attribute__((noinline, noclone)) void +f10 (void) +{ + int i; + for (i = 0; i < N; i++) + d[i] = (unsigned int) ((b[i] * 0x38e38e39ULL) >> 32) >> 2; +} + +__attribute__((noinline, noclone)) void +f11 (void) +{ + int i; + for (i = 0; i < N; i++) + c[i] = (((int) ((unsigned long long) (a[i] * 0x6bca1af3LL) >> 32)) >> 3) - (a[i] >> 31); +} + +__attribute__((noinline, noclone)) void +f12 (void) +{ + int i; + for (i = 0; i < N; i++) + { + unsigned int tmp = (b[i] * 0xaf286bcbULL) >> 32; + d[i] = (((b[i] - tmp) >> 1) + tmp) >> 4; + } +} +#endif + +int +main () +{ + int i; + for (i = 0; i < N; i++) + { + asm (""); + a[i] = i - N / 2; + b[i] = i; + } + a[0] = -__INT_MAX__ - 1; + a[1] = -__INT_MAX__; + a[N - 1] = __INT_MAX__; + b[N - 1] = ~0; + f1 (); + f2 (); + for (i = 0; i < N; i++) + if (c[i] != a[i] / 3 || d[i] != b[i] / 3) + abort (); + f3 (); + f4 (); + for (i = 0; i < N; i++) + if (c[i] != a[i] / 18 || d[i] != b[i] / 18) + abort (); + f5 (); + f6 (); + for (i = 0; i < N; i++) + if (c[i] != a[i] / 19 || d[i] != b[i] / 19) + abort (); +#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8 + f7 (); + f8 (); + for (i = 0; i < N; i++) + if (c[i] != a[i] / 3 || d[i] != b[i] / 3) + abort (); + f9 (); + f10 (); + for (i = 0; i < N; i++) + if (c[i] != a[i] / 18 || d[i] != b[i] / 18) + abort (); + f11 (); + f12 (); + for (i = 0; i < N; i++) + if (c[i] != a[i] / 19 || d[i] != b[i] / 19) + abort (); +#endif + return 0; +} diff --git a/gcc/testsuite/gcc.c-torture/execute/pr51581-2.c b/gcc/testsuite/gcc.c-torture/execute/pr51581-2.c new file mode 100644 index 0000000..dc111c4 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr51581-2.c @@ -0,0 +1,173 @@ +/* PR tree-optimization/51581 */ + +extern void abort (void); + +#define N 4096 +int a[N], c[N]; +unsigned int b[N], d[N]; + +__attribute__((noinline, noclone)) void +f1 (void) +{ + int i; + for (i = 0; i < N; i++) + c[i] = a[i] % 3; +} + +__attribute__((noinline, noclone)) void +f2 (void) +{ + int i; + for (i = 0; i < N; i++) + d[i] = b[i] % 3; +} + +__attribute__((noinline, noclone)) void +f3 (void) +{ + int i; + for (i = 0; i < N; i++) + c[i] = a[i] % 18; +} + +__attribute__((noinline, noclone)) void +f4 (void) +{ + int i; + for (i = 0; i < N; i++) + d[i] = b[i] % 18; +} + +__attribute__((noinline, noclone)) void +f5 (void) +{ + int i; + for (i = 0; i < N; i++) + c[i] = a[i] % 19; +} + +__attribute__((noinline, noclone)) void +f6 (void) +{ + int i; + for (i = 0; i < N; i++) + d[i] = b[i] % 19; +} + +#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8 +__attribute__((noinline, noclone)) void +f7 (void) +{ + int i; + for (i = 0; i < N; i++) + { + int x = (int) ((unsigned long long) (a[i] * 0x55555556LL) >> 32) - (a[i] >> 31); + c[i] = a[i] - x * 3; + } +} + +__attribute__((noinline, noclone)) void +f8 (void) +{ + int i; + for (i = 0; i < N; i++) + { + unsigned int x = ((unsigned int) ((b[i] * 0xaaaaaaabULL) >> 32) >> 1); + d[i] = b[i] - x * 3; + } +} + +__attribute__((noinline, noclone)) void +f9 (void) +{ + int i; + for (i = 0; i < N; i++) + { + int x = (((int) ((unsigned long long) (a[i] * 0x38e38e39LL) >> 32)) >> 2) - (a[i] >> 31); + c[i] = a[i] - x * 18; + } +} + +__attribute__((noinline, noclone)) void +f10 (void) +{ + int i; + for (i = 0; i < N; i++) + { + unsigned int x = (unsigned int) ((b[i] * 0x38e38e39ULL) >> 32) >> 2; + d[i] = b[i] - x * 18; + } +} + +__attribute__((noinline, noclone)) void +f11 (void) +{ + int i; + for (i = 0; i < N; i++) + { + int x = (((int) ((unsigned long long) (a[i] * 0x6bca1af3LL) >> 32)) >> 3) - (a[i] >> 31); + c[i] = a[i] - x * 19; + } +} + +__attribute__((noinline, noclone)) void +f12 (void) +{ + int i; + for (i = 0; i < N; i++) + { + unsigned int tmp = (b[i] * 0xaf286bcbULL) >> 32; + unsigned int x = (((b[i] - tmp) >> 1) + tmp) >> 4; + d[i] = b[i] - x * 19; + } +} +#endif + +int +main () +{ + int i; + for (i = 0; i < N; i++) + { + asm (""); + a[i] = i - N / 2; + b[i] = i; + } + a[0] = -__INT_MAX__ - 1; + a[1] = -__INT_MAX__; + a[N - 1] = __INT_MAX__; + b[N - 1] = ~0; + f1 (); + f2 (); + for (i = 0; i < N; i++) + if (c[i] != a[i] % 3 || d[i] != b[i] % 3) + abort (); + f3 (); + f4 (); + for (i = 0; i < N; i++) + if (c[i] != a[i] % 18 || d[i] != b[i] % 18) + abort (); + f5 (); + f6 (); + for (i = 0; i < N; i++) + if (c[i] != a[i] % 19 || d[i] != b[i] % 19) + abort (); +#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8 + f7 (); + f8 (); + for (i = 0; i < N; i++) + if (c[i] != a[i] % 3 || d[i] != b[i] % 3) + abort (); + f9 (); + f10 (); + for (i = 0; i < N; i++) + if (c[i] != a[i] % 18 || d[i] != b[i] % 18) + abort (); + f11 (); + f12 (); + for (i = 0; i < N; i++) + if (c[i] != a[i] % 19 || d[i] != b[i] % 19) + abort (); +#endif + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr51581-1.c b/gcc/testsuite/gcc.dg/vect/pr51581-1.c new file mode 100644 index 0000000..d8d61be --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr51581-1.c @@ -0,0 +1,18 @@ +/* PR tree-optimization/51581 */ + +#include "tree-vect.h" + +#define main main1 +#include "../../gcc.c-torture/execute/pr51581-1.c" +#undef main + +int +main () +{ + int i; + check_vect (); + asm (""); + return main1 (); +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr51581-2.c b/gcc/testsuite/gcc.dg/vect/pr51581-2.c new file mode 100644 index 0000000..13b8ba9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr51581-2.c @@ -0,0 +1,18 @@ +/* PR tree-optimization/51581 */ + +#include "tree-vect.h" + +#define main main1 +#include "../../gcc.c-torture/execute/pr51581-2.c" +#undef main + +int +main () +{ + int i; + check_vect (); + asm (""); + return main1 (); +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr51581-3.c b/gcc/testsuite/gcc.dg/vect/pr51581-3.c new file mode 100644 index 0000000..a478136 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr51581-3.c @@ -0,0 +1,118 @@ +/* PR tree-optimization/51581 */ + +#include "tree-vect.h" + +int a[8], b[8]; +unsigned int c[8], d[8]; + +void +f1 (void) +{ + a[0] = b[0] / 8; + a[1] = b[1] / 8; + a[2] = b[2] / 8; + a[3] = b[3] / 8; + a[4] = b[4] / 8; + a[5] = b[5] / 8; + a[6] = b[6] / 8; + a[7] = b[7] / 8; +} + +void +f2 (void) +{ + c[0] = d[0] / 3; + c[1] = d[1] / 3; + c[2] = d[2] / 3; + c[3] = d[3] / 3; + c[4] = d[4] / 3; + c[5] = d[5] / 3; + c[6] = d[6] / 3; + c[7] = d[7] / 3; +} + +void +f3 (void) +{ + a[0] = b[0] / 8; + a[1] = b[1] / 4; + a[2] = b[2] / 8; + a[3] = b[3] / 4; + a[4] = b[4] / 8; + a[5] = b[5] / 4; + a[6] = b[6] / 8; + a[7] = b[7] / 4; +} + +void +f4 (void) +{ + c[0] = d[0] / 3; + c[1] = d[1] / 5; + c[2] = d[2] / 3; + c[3] = d[3] / 5; + c[4] = d[4] / 3; + c[5] = d[5] / 5; + c[6] = d[6] / 3; + c[7] = d[7] / 5; +} + +void +f5 (void) +{ + a[0] = b[0] / 14; + a[1] = b[1] / 15; + a[2] = b[2] / 14; + a[3] = b[3] / 15; + a[4] = b[4] / 14; + a[5] = b[5] / 15; + a[6] = b[6] / 14; + a[7] = b[7] / 15; +} + +void +f6 (void) +{ + c[0] = d[0] / 6; + c[1] = d[1] / 5; + c[2] = d[2] / 6; + c[3] = d[3] / 5; + c[4] = d[4] / 6; + c[5] = d[5] / 5; + c[6] = d[6] / 13; + c[7] = d[7] / 5; +} + +int +main () +{ + int i; + check_vect (); + asm (""); + for (i = 0; i < 8; i++) + { + asm (""); + b[i] = i - 4; + d[i] = i - 4; + } + f1 (); + f2 (); + for (i = 0; i < 8; i++) + if (a[i] != b[i] / 8 || c[i] != d[i] / 3) + abort (); + f3 (); + f4 (); + for (i = 0; i < 8; i+= 2) + if (a[i] != b[i] / 8 || a[i + 1] != b[i + 1] / 4 + || c[i] != d[i] / 3 || c[i + 1] != d[i + 1] / 5) + abort (); + f5 (); + f6 (); + for (i = 0; i < 8; i+= 2) + if (a[i] != b[i] / 14 || a[i + 1] != b[i + 1] / 15 + || c[i] != d[i] / (i == 6 ? 13 : 6) || c[i + 1] != d[i + 1] / 5) + abort (); + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-26.c b/gcc/testsuite/gcc.dg/vect/slp-26.c index 6821b2c..09a1ecd 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-26.c +++ b/gcc/testsuite/gcc.dg/vect/slp-26.c @@ -10,7 +10,7 @@ main1 () { int i; unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; - unsigned short out[N*8], a[N], b[N] = {3,6,9,12,15,18,21,24}; + unsigned short out[N*8], a[N], b[N] = {3,0x8031,0x7fff,0x8032,0xffff,0,0x8030,0x8000}; /* Partial SLP is not supported. */ for (i = 0; i < N; i++) @@ -20,7 +20,7 @@ main1 () out[i*4 + 2] = in[i*4 + 2]; out[i*4 + 3] = in[i*4 + 3]; - a[i] = b[i] / 3; + a[i] = b[i] / 0x8031; } /* check results: */ @@ -30,7 +30,7 @@ main1 () || out[i*4 + 1] != in[i*4 + 1] || out[i*4 + 2] != in[i*4 + 2] || out[i*4 + 3] != in[i*4 + 3] - || a[i] != b[i] / 3) + || a[i] != b[i] / 0x8031) abort (); } diff --git a/gcc/testsuite/gcc.target/i386/avx-pr51581-1.c b/gcc/testsuite/gcc.target/i386/avx-pr51581-1.c new file mode 100644 index 0000000..a1d84bf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-pr51581-1.c @@ -0,0 +1,23 @@ +/* PR tree-optimization/51581 */ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -mavx -fno-vect-cost-model" } */ +/* { dg-require-effective-target avx } */ + +#ifndef CHECK_H +#define CHECK_H "avx-check.h" +#endif +#ifndef TEST +#define TEST avx_test +#endif + +#define main main1 +#include "../../gcc.c-torture/execute/pr51581-1.c" +#undef main + +#include CHECK_H + +static void +TEST (void) +{ + main1 (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-pr51581-2.c b/gcc/testsuite/gcc.target/i386/avx-pr51581-2.c new file mode 100644 index 0000000..6ff54d9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-pr51581-2.c @@ -0,0 +1,23 @@ +/* PR tree-optimization/51581 */ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -mavx -fno-vect-cost-model" } */ +/* { dg-require-effective-target avx } */ + +#ifndef CHECK_H +#define CHECK_H "avx-check.h" +#endif +#ifndef TEST +#define TEST avx_test +#endif + +#define main main1 +#include "../../gcc.c-torture/execute/pr51581-2.c" +#undef main + +#include CHECK_H + +static void +TEST (void) +{ + main1 (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr51581-1.c b/gcc/testsuite/gcc.target/i386/avx2-pr51581-1.c new file mode 100644 index 0000000..74d507f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-pr51581-1.c @@ -0,0 +1,9 @@ +/* PR tree-optimization/51581 */ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -mavx2 -fno-vect-cost-model" } */ +/* { dg-require-effective-target avx2 } */ + +#define CHECK_H "avx2-check.h" +#define TEST avx2_test + +#include "avx-pr51581-1.c" diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr51581-2.c b/gcc/testsuite/gcc.target/i386/avx2-pr51581-2.c new file mode 100644 index 0000000..bf063c2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-pr51581-2.c @@ -0,0 +1,9 @@ +/* PR tree-optimization/51581 */ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -mavx2 -fno-vect-cost-model" } */ +/* { dg-require-effective-target avx2 } */ + +#define CHECK_H "avx2-check.h" +#define TEST avx2_test + +#include "avx-pr51581-2.c" diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index b4fadf8..aeb7eb2 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -53,8 +53,8 @@ static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **, tree *, tree *); static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **, tree *, tree *); -static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **, - tree *, tree *); +static gimple vect_recog_divmod_pattern (VEC (gimple, heap) **, + tree *, tree *); static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **, tree *, tree *); static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *); @@ -66,7 +66,7 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { vect_recog_widen_shift_pattern, vect_recog_over_widening_pattern, vect_recog_vector_vector_shift_pattern, - vect_recog_sdivmod_pow2_pattern, + vect_recog_divmod_pattern, vect_recog_mixed_size_cond_pattern, vect_recog_bool_pattern}; @@ -1585,29 +1585,30 @@ vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts, return pattern_stmt; } -/* Detect a signed division by power of two constant that wouldn't be +/* Detect a signed division by a constant that wouldn't be otherwise vectorized: type a_t, b_t; S1 a_t = b_t / N; - where type 'type' is a signed integral type and N is a constant positive - power of two. + where type 'type' is an integral type and N is a constant. - Similarly handle signed modulo by power of two constant: + Similarly handle modulo by a constant: S4 a_t = b_t % N; Input/Output: * STMTS: Contains a stmt from which the pattern search begins, - i.e. the division stmt. S1 is replaced by: + i.e. the division stmt. S1 is replaced by if N is a power + of two constant and type is signed: S3 y_t = b_t < 0 ? N - 1 : 0; S2 x_t = b_t + y_t; S1' a_t = x_t >> log2 (N); - S4 is replaced by (where *_T temporaries have unsigned type): + S4 is replaced if N is a power of two constant and + type is signed by (where *_T temporaries have unsigned type): S9 y_T = b_t < 0 ? -1U : 0U; S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N)); S7 z_t = (type) z_T; @@ -1625,16 +1626,22 @@ vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts, S1 or modulo S4 stmt. */ static gimple -vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts, - tree *type_in, tree *type_out) +vect_recog_divmod_pattern (VEC (gimple, heap) **stmts, + tree *type_in, tree *type_out) { gimple last_stmt = VEC_pop (gimple, *stmts); - tree oprnd0, oprnd1, vectype, itype, cond; + tree oprnd0, oprnd1, vectype, itype, witype, vecwtype, cond; gimple pattern_stmt, def_stmt; enum tree_code rhs_code; stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); optab optab; + tree dummy, q; + enum tree_code dummy_code; + int dummy_int, prec; + VEC (tree, heap) *dummy_vec; + stmt_vec_info def_stmt_vinfo; if (!is_gimple_assign (last_stmt)) return NULL; @@ -1658,10 +1665,7 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts, if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST || TREE_CODE (itype) != INTEGER_TYPE - || TYPE_UNSIGNED (itype) - || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)) - || !integer_pow2p (oprnd1) - || tree_int_cst_sgn (oprnd1) != 1) + || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))) return NULL; vectype = get_vectype_for_scalar_type (itype); @@ -1680,104 +1684,438 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts, return NULL; } - /* Pattern detected. */ - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: "); - - cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0)); - if (rhs_code == TRUNC_DIV_EXPR) + prec = TYPE_PRECISION (itype); + if (integer_pow2p (oprnd1)) { - tree var = vect_recog_temp_ssa_var (itype, NULL); - def_stmt - = gimple_build_assign_with_ops3 (COND_EXPR, var, cond, - fold_build2 (MINUS_EXPR, itype, - oprnd1, - build_int_cst (itype, - 1)), - build_int_cst (itype, 0)); - new_pattern_def_seq (stmt_vinfo, def_stmt); - var = vect_recog_temp_ssa_var (itype, NULL); - def_stmt - = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0, - gimple_assign_lhs (def_stmt)); - append_pattern_def_seq (stmt_vinfo, def_stmt); + if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1) + return NULL; - pattern_stmt - = gimple_build_assign_with_ops (RSHIFT_EXPR, - vect_recog_temp_ssa_var (itype, NULL), - var, - build_int_cst (itype, - tree_log2 (oprnd1))); + /* Pattern detected. */ + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "vect_recog_divmod_pattern: detected: "); + + cond = build2 (LT_EXPR, boolean_type_node, oprnd0, + build_int_cst (itype, 0)); + if (rhs_code == TRUNC_DIV_EXPR) + { + tree var = vect_recog_temp_ssa_var (itype, NULL); + tree shift; + def_stmt + = gimple_build_assign_with_ops3 (COND_EXPR, var, cond, + fold_build2 (MINUS_EXPR, itype, + oprnd1, + build_int_cst (itype, + 1)), + build_int_cst (itype, 0)); + new_pattern_def_seq (stmt_vinfo, def_stmt); + var = vect_recog_temp_ssa_var (itype, NULL); + def_stmt + = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0, + gimple_assign_lhs (def_stmt)); + append_pattern_def_seq (stmt_vinfo, def_stmt); + + shift = build_int_cst (itype, tree_log2 (oprnd1)); + pattern_stmt + = gimple_build_assign_with_ops (RSHIFT_EXPR, + vect_recog_temp_ssa_var (itype, + NULL), + var, shift); + } + else + { + tree signmask; + STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; + if (compare_tree_int (oprnd1, 2) == 0) + { + signmask = vect_recog_temp_ssa_var (itype, NULL); + def_stmt + = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond, + build_int_cst (itype, 1), + build_int_cst (itype, 0)); + append_pattern_def_seq (stmt_vinfo, def_stmt); + } + else + { + tree utype + = build_nonstandard_integer_type (prec, 1); + tree vecutype = get_vectype_for_scalar_type (utype); + tree shift + = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype)) + - tree_log2 (oprnd1)); + tree var = vect_recog_temp_ssa_var (utype, NULL); + + def_stmt + = gimple_build_assign_with_ops3 (COND_EXPR, var, cond, + build_int_cst (utype, -1), + build_int_cst (utype, 0)); + def_stmt_vinfo + = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); + set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); + STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype; + append_pattern_def_seq (stmt_vinfo, def_stmt); + var = vect_recog_temp_ssa_var (utype, NULL); + def_stmt + = gimple_build_assign_with_ops (RSHIFT_EXPR, var, + gimple_assign_lhs (def_stmt), + shift); + def_stmt_vinfo + = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); + set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); + STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype; + append_pattern_def_seq (stmt_vinfo, def_stmt); + signmask = vect_recog_temp_ssa_var (itype, NULL); + def_stmt + = gimple_build_assign_with_ops (NOP_EXPR, signmask, var, + NULL_TREE); + append_pattern_def_seq (stmt_vinfo, def_stmt); + } + def_stmt + = gimple_build_assign_with_ops (PLUS_EXPR, + vect_recog_temp_ssa_var (itype, + NULL), + oprnd0, signmask); + append_pattern_def_seq (stmt_vinfo, def_stmt); + def_stmt + = gimple_build_assign_with_ops (BIT_AND_EXPR, + vect_recog_temp_ssa_var (itype, + NULL), + gimple_assign_lhs (def_stmt), + fold_build2 (MINUS_EXPR, itype, + oprnd1, + build_int_cst (itype, + 1))); + append_pattern_def_seq (stmt_vinfo, def_stmt); + + pattern_stmt + = gimple_build_assign_with_ops (MINUS_EXPR, + vect_recog_temp_ssa_var (itype, + NULL), + gimple_assign_lhs (def_stmt), + signmask); + } + + if (vect_print_dump_info (REPORT_DETAILS)) + print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); + + VEC_safe_push (gimple, heap, *stmts, last_stmt); + + *type_in = vectype; + *type_out = vectype; + return pattern_stmt; } - else + + if (!host_integerp (oprnd1, TYPE_UNSIGNED (itype)) + || integer_zerop (oprnd1) + || prec > HOST_BITS_PER_WIDE_INT) + return NULL; + + witype = build_nonstandard_integer_type (prec * 2, + TYPE_UNSIGNED (itype)); + vecwtype = get_vectype_for_scalar_type (witype); + if (vecwtype == NULL_TREE) + return NULL; + + if (!supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, + vecwtype, vectype, + &dummy, &dummy, &dummy_code, + &dummy_code, &dummy_int, &dummy_vec)) + return NULL; + + STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; + + if (TYPE_UNSIGNED (itype)) { - tree signmask; - STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; - if (compare_tree_int (oprnd1, 2) == 0) + unsigned HOST_WIDE_INT mh, ml; + int pre_shift, post_shift; + unsigned HOST_WIDE_INT d = tree_low_cst (oprnd1, 1) + & GET_MODE_MASK (TYPE_MODE (itype)); + tree t1, t2, t3, t4, t5, t6; + + if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1))) + /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */ + return NULL; + + /* Find a suitable multiplier and right shift count + instead of multiplying with D. */ + mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int); + + /* If the suggested multiplier is more than SIZE bits, we can do better + for even divisors, using an initial right shift. */ + if (mh != 0 && (d & 1) == 0) + { + pre_shift = floor_log2 (d & -d); + mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift, + &ml, &post_shift, &dummy_int); + gcc_assert (!mh); + } + else + pre_shift = 0; + + if (mh != 0) { - signmask = vect_recog_temp_ssa_var (itype, NULL); + if (post_shift - 1 >= prec) + return NULL; + + /* t1 = oprnd0 w* ml; + t2 = t1 >> prec; + t3 = (type) t2; + t4 = oprnd0 - t3; + t5 = t4 >> 1; + t6 = t3 + t5; + q = t6 >> (post_shift - 1); */ + t1 = vect_recog_temp_ssa_var (witype, NULL); def_stmt - = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond, - build_int_cst (itype, 1), - build_int_cst (itype, 0)); + = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, t1, oprnd0, + build_int_cst (itype, ml)); + append_pattern_def_seq (stmt_vinfo, def_stmt); + def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); + set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); + STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype; + + t2 = vect_recog_temp_ssa_var (witype, NULL); + def_stmt + = gimple_build_assign_with_ops (RSHIFT_EXPR, t2, t1, + build_int_cst (itype, prec)); + append_pattern_def_seq (stmt_vinfo, def_stmt); + def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); + set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); + STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype; + + t3 = vect_recog_temp_ssa_var (itype, NULL); + def_stmt + = gimple_build_assign_with_ops (NOP_EXPR, t3, t2, NULL_TREE); + append_pattern_def_seq (stmt_vinfo, def_stmt); + + t4 = vect_recog_temp_ssa_var (itype, NULL); + def_stmt + = gimple_build_assign_with_ops (MINUS_EXPR, t4, oprnd0, t3); append_pattern_def_seq (stmt_vinfo, def_stmt); + + t5 = vect_recog_temp_ssa_var (itype, NULL); + def_stmt + = gimple_build_assign_with_ops (RSHIFT_EXPR, t5, t4, + integer_one_node); + append_pattern_def_seq (stmt_vinfo, def_stmt); + + t6 = vect_recog_temp_ssa_var (itype, NULL); + def_stmt + = gimple_build_assign_with_ops (PLUS_EXPR, t6, t3, t5); + + if (post_shift != 1) + { + append_pattern_def_seq (stmt_vinfo, def_stmt); + + q = vect_recog_temp_ssa_var (witype, NULL); + pattern_stmt + = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t6, + build_int_cst (itype, + post_shift + - 1)); + } + else + { + q = t6; + pattern_stmt = def_stmt; + } } else { - tree utype - = build_nonstandard_integer_type (TYPE_PRECISION (itype), 1); - tree vecutype = get_vectype_for_scalar_type (utype); - tree shift - = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype)) - - tree_log2 (oprnd1)); - tree var = vect_recog_temp_ssa_var (utype, NULL); - stmt_vec_info def_stmt_vinfo; + if (pre_shift >= prec || post_shift >= prec) + return NULL; + + /* t1 = oprnd0 >> pre_shift; + t2 = t1 w* ml; + t3 = t2 >> (prec + post_shift); + q = (type) t3; */ + if (pre_shift) + { + t1 = vect_recog_temp_ssa_var (itype, NULL); + def_stmt + = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0, + build_int_cst (NULL, + pre_shift)); + append_pattern_def_seq (stmt_vinfo, def_stmt); + } + else + t1 = oprnd0; + t2 = vect_recog_temp_ssa_var (witype, NULL); def_stmt - = gimple_build_assign_with_ops3 (COND_EXPR, var, cond, - build_int_cst (utype, -1), - build_int_cst (utype, 0)); - def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); + = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, t2, t1, + build_int_cst (itype, ml)); + append_pattern_def_seq (stmt_vinfo, def_stmt); + def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); - STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype; + STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype; + + t3 = vect_recog_temp_ssa_var (witype, NULL); + def_stmt + = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2, + build_int_cst (itype, post_shift + + prec)); append_pattern_def_seq (stmt_vinfo, def_stmt); - var = vect_recog_temp_ssa_var (utype, NULL); + def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); + set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); + STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype; + + q = vect_recog_temp_ssa_var (itype, NULL); + pattern_stmt + = gimple_build_assign_with_ops (NOP_EXPR, q, t3, NULL_TREE); + } + } + else + { + unsigned HOST_WIDE_INT ml; + int lgup, post_shift; + HOST_WIDE_INT d = tree_low_cst (oprnd1, 0); + unsigned HOST_WIDE_INT abs_d; + bool add = false; + tree uwitype = NULL, vecuwtype = NULL; + tree t1, t2, t3, t4, t5, t6, t7; + + /* Give up for -1. */ + if (d == -1) + return NULL; + + if (!vect_supportable_shift (RSHIFT_EXPR, witype)) + { + uwitype = build_nonstandard_integer_type (prec * 2, 1); + vecuwtype = get_vectype_for_scalar_type (uwitype); + if (vecuwtype == NULL_TREE) + return NULL; + } + + /* Since d might be INT_MIN, we have to cast to + unsigned HOST_WIDE_INT before negating to avoid + undefined signed overflow. */ + abs_d = (d >= 0 + ? (unsigned HOST_WIDE_INT) d + : - (unsigned HOST_WIDE_INT) d); + + /* n rem d = n rem -d */ + if (rhs_code == TRUNC_MOD_EXPR && d < 0) + { + d = abs_d; + oprnd1 = build_int_cst (itype, abs_d); + } + else if (HOST_BITS_PER_WIDE_INT >= prec + && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1)) + /* This case is not handled correctly below. */ + return NULL; + + choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &lgup); + if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1)) + { + add = true; + ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1); + } + if (post_shift >= prec) + return NULL; + + /* t1 = oprnd1 w* ml; */ + t1 = vect_recog_temp_ssa_var (witype, NULL); + def_stmt + = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, t1, oprnd0, + build_int_cst (itype, ml)); + append_pattern_def_seq (stmt_vinfo, def_stmt); + def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); + set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); + STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype; + + if (vecuwtype != NULL) + { + /* t2 = (uwtype) t1; */ + t2 = vect_recog_temp_ssa_var (uwitype, NULL); def_stmt - = gimple_build_assign_with_ops (RSHIFT_EXPR, var, - gimple_assign_lhs (def_stmt), - shift); - def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); + = gimple_build_assign_with_ops (NOP_EXPR, t2, t1, NULL_TREE); + append_pattern_def_seq (stmt_vinfo, def_stmt); + def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); - STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype; + STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecuwtype; + } + else + t2 = t1; + + /* t3 = t2 >> prec; or t3 = t2 >> (prec + post_shift); */ + t3 = vect_recog_temp_ssa_var (vecuwtype ? uwitype : witype, NULL); + def_stmt + = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2, + build_int_cst (itype, + prec + + (!add + && vecuwtype == NULL + ? post_shift : 0))); + append_pattern_def_seq (stmt_vinfo, def_stmt); + def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); + set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); + STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecuwtype ? vecuwtype : vecwtype; + + /* t4 = (type) t3; */ + t4 = vect_recog_temp_ssa_var (itype, NULL); + def_stmt + = gimple_build_assign_with_ops (NOP_EXPR, t4, t3, NULL_TREE); + append_pattern_def_seq (stmt_vinfo, def_stmt); + + if (add) + { + /* t5 = t4 + oprnd0; */ + t5 = vect_recog_temp_ssa_var (itype, NULL); + def_stmt + = gimple_build_assign_with_ops (PLUS_EXPR, t5, t4, oprnd0); append_pattern_def_seq (stmt_vinfo, def_stmt); - signmask = vect_recog_temp_ssa_var (itype, NULL); + } + else + t5 = t4; + + if ((add || vecuwtype != NULL) && post_shift) + { + /* t6 = t5 >> post_shift; */ + t6 = vect_recog_temp_ssa_var (itype, NULL); def_stmt - = gimple_build_assign_with_ops (NOP_EXPR, signmask, var, - NULL_TREE); + = gimple_build_assign_with_ops (RSHIFT_EXPR, t6, t5, + build_int_cst (itype, post_shift)); append_pattern_def_seq (stmt_vinfo, def_stmt); } + else + t6 = t5; + + /* t7 = oprnd0 >> (prec - 1); */ + t7 = vect_recog_temp_ssa_var (itype, NULL); def_stmt - = gimple_build_assign_with_ops (PLUS_EXPR, - vect_recog_temp_ssa_var (itype, NULL), - oprnd0, signmask); + = gimple_build_assign_with_ops (RSHIFT_EXPR, t7, oprnd0, + build_int_cst (itype, prec - 1)); append_pattern_def_seq (stmt_vinfo, def_stmt); + + /* q = t6 - t7; or q = t7 - t6; */ + q = vect_recog_temp_ssa_var (itype, NULL); + pattern_stmt + = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t7 : t6, + d < 0 ? t6 : t7); + } + + if (rhs_code == TRUNC_MOD_EXPR) + { + tree r, t1; + + /* We divided. Now finish by: + t1 = q * oprnd1; + r = oprnd0 - t1; */ + append_pattern_def_seq (stmt_vinfo, pattern_stmt); + + t1 = vect_recog_temp_ssa_var (itype, NULL); def_stmt - = gimple_build_assign_with_ops (BIT_AND_EXPR, - vect_recog_temp_ssa_var (itype, NULL), - gimple_assign_lhs (def_stmt), - fold_build2 (MINUS_EXPR, itype, - oprnd1, - build_int_cst (itype, - 1))); + = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1); append_pattern_def_seq (stmt_vinfo, def_stmt); + r = vect_recog_temp_ssa_var (itype, NULL); pattern_stmt - = gimple_build_assign_with_ops (MINUS_EXPR, - vect_recog_temp_ssa_var (itype, NULL), - gimple_assign_lhs (def_stmt), - signmask); + = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1); } + /* Pattern detected. */ + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "vect_recog_divmod_pattern: detected: "); + if (vect_print_dump_info (REPORT_DETAILS)) print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 9fa57e1..5853d4f 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -5361,7 +5361,9 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) Pattern statement needs to be analyzed instead of the original statement if the original statement is not relevant. Otherwise, we analyze both - statements. */ + statements. In basic blocks we are called from some SLP instance + traversal, don't analyze pattern stmts instead, the pattern stmts + already will be part of SLP instance. */ pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); if (!STMT_VINFO_RELEVANT_P (stmt_info) @@ -5390,6 +5392,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) } } else if (STMT_VINFO_IN_PATTERN_P (stmt_info) + && node == NULL && pattern_stmt && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) @@ -5406,6 +5409,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) } if (is_pattern_stmt_p (stmt_info) + && node == NULL && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))) { gimple_stmt_iterator si; |