aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@gcc.gnu.org>2013-12-16 19:24:15 +0100
committerJakub Jelinek <jakub@gcc.gnu.org>2013-12-16 19:24:15 +0100
commit7670d795d5fe6685c133a43ce15f5842c1d6aa25 (patch)
tree107508aba445e74a0f561829718b487976861963 /gcc
parentd5be9028803017ddc2e3e6471eb7c167e5458dc8 (diff)
downloadgcc-7670d795d5fe6685c133a43ce15f5842c1d6aa25.zip
gcc-7670d795d5fe6685c133a43ce15f5842c1d6aa25.tar.gz
gcc-7670d795d5fe6685c133a43ce15f5842c1d6aa25.tar.bz2
tree-vectorizer.h (struct _loop_vec_info): Add scalar_loop field.
* tree-vectorizer.h (struct _loop_vec_info): Add scalar_loop field. (LOOP_VINFO_SCALAR_LOOP): Define. (slpeel_tree_duplicate_loop_to_edge_cfg): Add scalar_loop argument. * config/i386/sse.md (maskload<mode>, maskstore<mode>): New expanders. * tree-data-ref.c (get_references_in_stmt): Handle MASK_LOAD and MASK_STORE. * internal-fn.def (LOOP_VECTORIZED, MASK_LOAD, MASK_STORE): New internal fns. * tree-if-conv.c: Include expr.h, optabs.h, tree-ssa-loop-ivopts.h and tree-ssa-address.h. (release_bb_predicate): New function. (free_bb_predicate): Use it. (reset_bb_predicate): Likewise. Don't unallocate bb->aux just to immediately allocate it again. (add_to_predicate_list): Add loop argument. If basic blocks that dominate loop->latch don't insert any predicate. (add_to_dst_predicate_list): Adjust caller. (if_convertible_phi_p): Add any_mask_load_store argument, if true, handle it like flag_tree_loop_if_convert_stores. (insert_gimplified_predicates): Likewise. (ifcvt_can_use_mask_load_store): New function. (if_convertible_gimple_assign_stmt_p): Add any_mask_load_store argument, check if some conditional loads or stores can't be converted into MASK_LOAD or MASK_STORE. (if_convertible_stmt_p): Add any_mask_load_store argument, pass it down to if_convertible_gimple_assign_stmt_p. (predicate_bbs): Don't return bool, only check if the last stmt of a basic block is GIMPLE_COND and handle that. Adjust add_to_predicate_list caller. (if_convertible_loop_p_1): Only call predicate_bbs if flag_tree_loop_if_convert_stores and free_bb_predicate in that case afterwards, check gimple_code of stmts here. Replace is_predicated check with dominance check. Add any_mask_load_store argument, pass it down to if_convertible_stmt_p and if_convertible_phi_p, call if_convertible_phi_p only after all if_convertible_stmt_p calls. (if_convertible_loop_p): Add any_mask_load_store argument, pass it down to if_convertible_loop_p_1. (predicate_mem_writes): Emit MASK_LOAD and/or MASK_STORE calls. (combine_blocks): Add any_mask_load_store argument, pass it down to insert_gimplified_predicates and call predicate_mem_writes if it is set. Call predicate_bbs. (version_loop_for_if_conversion): New function. (tree_if_conversion): Adjust if_convertible_loop_p and combine_blocks calls. Return todo flags instead of bool, call version_loop_for_if_conversion if if-conversion should be just for the vectorized loops and nothing else. (main_tree_if_conversion): Adjust caller. Don't call tree_if_conversion for dont_vectorize loops if if-conversion isn't explicitly enabled. * tree-vect-data-refs.c (vect_check_gather): Handle MASK_LOAD/MASK_STORE. (vect_analyze_data_refs, vect_supportable_dr_alignment): Likewise. * gimple.h (gimple_expr_type): Handle MASK_STORE. * internal-fn.c (expand_LOOP_VECTORIZED, expand_MASK_LOAD, expand_MASK_STORE): New functions. * tree-vectorizer.c: Include tree-cfg.h and gimple-fold.h. (vect_loop_vectorized_call, fold_loop_vectorized_call): New functions. (vectorize_loops): Don't try to vectorize loops with loop->dont_vectorize set. Set LOOP_VINFO_SCALAR_LOOP for if-converted loops, fold LOOP_VECTORIZED internal call depending on if loop has been vectorized or not. * tree-vect-loop-manip.c (slpeel_duplicate_current_defs_from_edges): New function. (slpeel_tree_duplicate_loop_to_edge_cfg): Add scalar_loop argument. If non-NULL, copy basic blocks from scalar_loop instead of loop, but still to loop's entry or exit edge. (slpeel_tree_peel_loop_to_edge): Add scalar_loop argument, pass it down to slpeel_tree_duplicate_loop_to_edge_cfg. (vect_do_peeling_for_loop_bound, vect_do_peeling_for_loop_alignment): Adjust callers. (vect_loop_versioning): If LOOP_VINFO_SCALAR_LOOP, perform loop versioning from that loop instead of LOOP_VINFO_LOOP, move it to the right place in the CFG afterwards. * tree-vect-loop.c (vect_determine_vectorization_factor): Handle MASK_STORE. * cfgloop.h (struct loop): Add dont_vectorize field. * tree-loop-distribution.c (copy_loop_before): Adjust slpeel_tree_duplicate_loop_to_edge_cfg caller. * optabs.def (maskload_optab, maskstore_optab): New optabs. * passes.def: Add a note that pass_vectorize must immediately follow pass_if_conversion. * tree-predcom.c (split_data_refs_to_components): Give up if DR_STMT is a call. * tree-vect-stmts.c (vect_mark_relevant): Don't crash if lhs is NULL. (exist_non_indexing_operands_for_use_p): Handle MASK_LOAD and MASK_STORE. (vectorizable_mask_load_store): New function. (vectorizable_call): Call it for MASK_LOAD or MASK_STORE. (vect_transform_stmt): Handle MASK_STORE. * tree-ssa-phiopt.c (cond_if_else_store_replacement): Ignore DR_STMT where lhs is NULL. * optabs.h (can_vec_perm_p): Fix up comment typo. (can_vec_mask_load_store_p): New prototype. * optabs.c (can_vec_mask_load_store_p): New function. * gcc.dg/vect/vect-cond-11.c: New test. * gcc.target/i386/vect-cond-1.c: New test. * gcc.target/i386/avx2-gather-5.c: New test. * gcc.target/i386/avx2-gather-6.c: New test. * gcc.dg/vect/vect-mask-loadstore-1.c: New test. * gcc.dg/vect/vect-mask-load-1.c: New test. From-SVN: r206022
Diffstat (limited to 'gcc')
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-cond-11.c116
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-mask-load-1.c52
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-mask-loadstore-1.c50
-rw-r--r--gcc/testsuite/gcc.target/i386/avx2-gather-5.c47
-rw-r--r--gcc/testsuite/gcc.target/i386/avx2-gather-6.c7
-rw-r--r--gcc/testsuite/gcc.target/i386/vect-cond-1.c21
6 files changed, 293 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-11.c b/gcc/testsuite/gcc.dg/vect/vect-cond-11.c
new file mode 100644
index 0000000..0301c89
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-11.c
@@ -0,0 +1,116 @@
+#include "tree-vect.h"
+
+#define N 1024
+typedef int V __attribute__((vector_size (4)));
+unsigned int a[N * 2] __attribute__((aligned));
+unsigned int b[N * 2] __attribute__((aligned));
+V c[N];
+
+__attribute__((noinline, noclone)) unsigned int
+foo (unsigned int *a, unsigned int *b)
+{
+ int i;
+ unsigned int r = 0;
+ for (i = 0; i < N; i++)
+ {
+ unsigned int x = a[i], y = b[i];
+ if (x < 32)
+ {
+ x = x + 127;
+ y = y * 2;
+ }
+ else
+ {
+ x = x - 16;
+ y = y + 1;
+ }
+ a[i] = x;
+ b[i] = y;
+ r += x;
+ }
+ return r;
+}
+
+__attribute__((noinline, noclone)) unsigned int
+bar (unsigned int *a, unsigned int *b)
+{
+ int i;
+ unsigned int r = 0;
+ for (i = 0; i < N; i++)
+ {
+ unsigned int x = a[i], y = b[i];
+ if (x < 32)
+ {
+ x = x + 127;
+ y = y * 2;
+ }
+ else
+ {
+ x = x - 16;
+ y = y + 1;
+ }
+ a[i] = x;
+ b[i] = y;
+ c[i] = c[i] + 1;
+ r += x;
+ }
+ return r;
+}
+
+void
+baz (unsigned int *a, unsigned int *b,
+ unsigned int (*fn) (unsigned int *, unsigned int *))
+{
+ int i;
+ for (i = -64; i < 0; i++)
+ {
+ a[i] = 19;
+ b[i] = 17;
+ }
+ for (; i < N; i++)
+ {
+ a[i] = i - 512;
+ b[i] = i;
+ }
+ for (; i < N + 64; i++)
+ {
+ a[i] = 27;
+ b[i] = 19;
+ }
+ if (fn (a, b) != -512U - (N - 32) * 16U + 32 * 127U)
+ __builtin_abort ();
+ for (i = -64; i < 0; i++)
+ if (a[i] != 19 || b[i] != 17)
+ __builtin_abort ();
+ for (; i < N; i++)
+ if (a[i] != (i - 512U < 32U ? i - 512U + 127 : i - 512U - 16)
+ || b[i] != (i - 512U < 32U ? i * 2U : i + 1U))
+ __builtin_abort ();
+ for (; i < N + 64; i++)
+ if (a[i] != 27 || b[i] != 19)
+ __builtin_abort ();
+}
+
+int
+main ()
+{
+ int i;
+ check_vect ();
+ baz (a + 512, b + 512, foo);
+ baz (a + 512, b + 512, bar);
+ baz (a + 512 + 1, b + 512 + 1, foo);
+ baz (a + 512 + 1, b + 512 + 1, bar);
+ baz (a + 512 + 31, b + 512 + 31, foo);
+ baz (a + 512 + 31, b + 512 + 31, bar);
+ baz (a + 512 + 1, b + 512, foo);
+ baz (a + 512 + 1, b + 512, bar);
+ baz (a + 512 + 31, b + 512, foo);
+ baz (a + 512 + 31, b + 512, bar);
+ baz (a + 512, b + 512 + 1, foo);
+ baz (a + 512, b + 512 + 1, bar);
+ baz (a + 512, b + 512 + 31, foo);
+ baz (a + 512, b + 512 + 31, bar);
+ return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-mask-load-1.c b/gcc/testsuite/gcc.dg/vect/vect-mask-load-1.c
new file mode 100644
index 0000000..2c30830
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-mask-load-1.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-additional-options "-Ofast -fno-common" } */
+/* { dg-additional-options "-Ofast -fno-common -mavx" { target avx_runtime } } */
+
+#include <stdlib.h>
+#include "tree-vect.h"
+
+__attribute__((noinline, noclone)) void
+foo (double *x, double *y)
+{
+ double *p = __builtin_assume_aligned (x, 16);
+ double *q = __builtin_assume_aligned (y, 16);
+ double z, h;
+ int i;
+ for (i = 0; i < 1024; i++)
+ {
+ if (p[i] < 0.0)
+ z = q[i], h = q[i] * 7.0 + 3.0;
+ else
+ z = p[i] + 6.0, h = p[1024 + i];
+ p[i] = z + 2.0 * h;
+ }
+}
+
+double a[2048] __attribute__((aligned (16)));
+double b[1024] __attribute__((aligned (16)));
+
+int
+main ()
+{
+ int i;
+ check_vect ();
+ for (i = 0; i < 1024; i++)
+ {
+ a[i] = (i & 1) ? -i : 2 * i;
+ a[i + 1024] = i;
+ b[i] = 7 * i;
+ asm ("");
+ }
+ foo (a, b);
+ for (i = 0; i < 1024; i++)
+ if (a[i] != ((i & 1)
+ ? 7 * i + 2.0 * (7 * i * 7.0 + 3.0)
+ : 2 * i + 6.0 + 2.0 * i)
+ || b[i] != 7 * i
+ || a[i + 1024] != i)
+ abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 1 "vect" { target avx_runtime } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-mask-loadstore-1.c b/gcc/testsuite/gcc.dg/vect/vect-mask-loadstore-1.c
new file mode 100644
index 0000000..ecc164f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-mask-loadstore-1.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-additional-options "-Ofast -fno-common" } */
+/* { dg-additional-options "-Ofast -fno-common -mavx" { target avx_runtime } } */
+
+#include <stdlib.h>
+#include "tree-vect.h"
+
+__attribute__((noinline, noclone)) void
+foo (float *__restrict x, float *__restrict y, float *__restrict z)
+{
+ float *__restrict p = __builtin_assume_aligned (x, 32);
+ float *__restrict q = __builtin_assume_aligned (y, 32);
+ float *__restrict r = __builtin_assume_aligned (z, 32);
+ int i;
+ for (i = 0; i < 1024; i++)
+ {
+ if (p[i] < 0.0f)
+ q[i] = p[i] + 2.0f;
+ else
+ p[i] = r[i] + 3.0f;
+ }
+}
+
+float a[1024] __attribute__((aligned (32)));
+float b[1024] __attribute__((aligned (32)));
+float c[1024] __attribute__((aligned (32)));
+
+int
+main ()
+{
+ int i;
+ check_vect ();
+ for (i = 0; i < 1024; i++)
+ {
+ a[i] = (i & 1) ? -i : i;
+ b[i] = 7 * i;
+ c[i] = a[i] - 3.0f;
+ asm ("");
+ }
+ foo (a, b, c);
+ for (i = 0; i < 1024; i++)
+ if (a[i] != ((i & 1) ? -i : i)
+ || b[i] != ((i & 1) ? a[i] + 2.0f : 7 * i)
+ || c[i] != a[i] - 3.0f)
+ abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 1 "vect" { target avx_runtime } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-5.c b/gcc/testsuite/gcc.target/i386/avx2-gather-5.c
new file mode 100644
index 0000000..892a200
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-gather-5.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx2 } */
+/* { dg-options "-O3 -mavx2 -fno-common" } */
+
+#include "avx2-check.h"
+
+#define N 1024
+float vf1[N+16], vf2[N], vf3[N];
+int k[N];
+
+__attribute__((noinline, noclone)) void
+foo (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ float f;
+ if (vf3[i] < 0.0f)
+ f = vf1[k[i]];
+ else
+ f = 7.0f;
+ vf2[i] = f;
+ }
+}
+
+static void
+avx2_test (void)
+{
+ int i;
+ for (i = 0; i < N + 16; i++)
+ {
+ vf1[i] = 5.5f * i;
+ if (i >= N)
+ continue;
+ vf2[i] = 2.0f;
+ vf3[i] = (i & 1) ? i : -i - 1;
+ k[i] = (i & 1) ? ((i & 2) ? -i : N / 2 + i) : (i * 7) % N;
+ asm ("");
+ }
+ foo ();
+ for (i = 0; i < N; i++)
+ if (vf1[i] != 5.5 * i
+ || vf2[i] != ((i & 1) ? 7.0f : 5.5f * ((i * 7) % N))
+ || vf3[i] != ((i & 1) ? i : -i - 1)
+ || k[i] != ((i & 1) ? ((i & 2) ? -i : N / 2 + i) : ((i * 7) % N)))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-6.c b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
new file mode 100644
index 0000000..38e2009
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx2 -fno-common -fdump-tree-vect-details" } */
+
+#include "avx2-gather-5.c"
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops in function" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-cond-1.c b/gcc/testsuite/gcc.target/i386/vect-cond-1.c
new file mode 100644
index 0000000..12ae771
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-cond-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mavx2" { target avx2 } } */
+
+int a[1024];
+
+int
+foo (int *p)
+{
+ int i;
+ for (i = 0; i < 1024; i++)
+ {
+ int t;
+ if (a[i] < 30)
+ t = *p;
+ else
+ t = a[i] + 12;
+ a[i] = t;
+ }
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */