diff options
-rw-r--r-- | gcc/omp-expand.c | 110 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-simd-17.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-simd-18.c | 40 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-simd-19.c | 40 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-simd-20.c | 43 |
5 files changed, 230 insertions, 5 deletions
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c index 9160022..99cb4f9 100644 --- a/gcc/omp-expand.c +++ b/gcc/omp-expand.c @@ -6452,6 +6452,56 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) } else expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1)); + tree altv = NULL_TREE, altn2 = NULL_TREE; + if (fd->collapse == 1 + && !broken_loop + && TREE_CODE (fd->loops[0].step) != INTEGER_CST) + { + /* The vectorizer currently punts on loops with non-constant steps + for the main IV (can't compute number of iterations and gives up + because of that). As for OpenMP loops it is always possible to + compute the number of iterations upfront, use an alternate IV + as the loop iterator: + altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0; + for (i = n1, altv = 0; altv < altn2; altv++, i += step) */ + altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v))); + expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv))); + tree itype = TREE_TYPE (fd->loop.v); + if (POINTER_TYPE_P (itype)) + itype = signed_type_for (itype); + t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, + fold_convert (itype, fd->loop.step), t); + t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2)); + t = fold_build2 (MINUS_EXPR, itype, t, + fold_convert (itype, fd->loop.v)); + if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, + fold_convert (itype, fd->loop.step))); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, + fold_convert (itype, fd->loop.step)); + t = fold_convert (TREE_TYPE (altv), t); + altn2 = create_tmp_var (TREE_TYPE (altv)); + expand_omp_build_assign (&gsi, altn2, t); + tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2); + t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE, + true, GSI_SAME_STMT); + t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2); + gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2, + build_zero_cst (TREE_TYPE (altv))); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + } + else if (fd->collapse > 1 + && !broken_loop + && !gimple_omp_for_combined_into_p (fd->for_stmt) + && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST) + { + altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v))); + altn2 = create_tmp_var (TREE_TYPE (altv)); + } if (cond_var) { if (POINTER_TYPE_P (type) @@ -6486,6 +6536,12 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) } else if (TREE_CODE (n2) != INTEGER_CST) expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type)); + if (altv) + { + t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv, + build_one_cst (TREE_TYPE (altv))); + expand_omp_build_assign (&gsi, altv, t); + } if (fd->collapse > 1) { @@ -6525,9 +6581,11 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) /* Emit the condition in L1_BB. */ gsi = gsi_start_bb (l1_bb); - if (fd->collapse > 1 - && !gimple_omp_for_combined_into_p (fd->for_stmt) - && !broken_loop) + if (altv) + t = build2 (LT_EXPR, boolean_type_node, altv, altn2); + else if (fd->collapse > 1 + && !gimple_omp_for_combined_into_p (fd->for_stmt) + && !broken_loop) { i = fd->collapse - 1; tree itype = TREE_TYPE (fd->loops[i].v); @@ -6704,7 +6762,7 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t); if (fd->loops[i + 1].m2) { - if (i + 2 == fd->collapse && n2var) + if (i + 2 == fd->collapse && (n2var || altv)) { gcc_assert (n2v == NULL_TREE); n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v)); @@ -6761,6 +6819,50 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t); expand_omp_build_assign (&gsi, n2var, t); } + if (i + 2 == fd->collapse && altv) + { + /* The vectorizer currently punts on loops with non-constant + steps for the main IV (can't compute number of iterations + and gives up because of that). As for OpenMP loops it is + always possible to compute the number of iterations upfront, + use an alternate IV as the loop iterator. */ + expand_omp_build_assign (&gsi, altv, + build_zero_cst (TREE_TYPE (altv))); + tree itype = TREE_TYPE (fd->loops[i + 1].v); + if (POINTER_TYPE_P (itype)) + itype = signed_type_for (itype); + t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR + ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, + fold_convert (itype, fd->loops[i + 1].step), t); + t = fold_build2 (PLUS_EXPR, itype, t, + fold_convert (itype, + fd->loops[i + 1].m2 + ? n2v : fd->loops[i + 1].n2)); + t = fold_build2 (MINUS_EXPR, itype, t, + fold_convert (itype, fd->loops[i + 1].v)); + tree step = fold_convert (itype, fd->loops[i + 1].step); + if (TYPE_UNSIGNED (itype) + && fd->loops[i + 1].cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, step)); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); + t = fold_convert (TREE_TYPE (altv), t); + expand_omp_build_assign (&gsi, altn2, t); + tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v), + fd->loops[i + 1].m2 + ? n2v : fd->loops[i + 1].n2); + t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE, + true, GSI_SAME_STMT); + t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node, + fd->loops[i + 1].v, t2); + gassign *g + = gimple_build_assign (altn2, COND_EXPR, t2, altn2, + build_zero_cst (TREE_TYPE (altv))); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + } n2v = nextn2v; make_edge (init_bb, last_bb, EDGE_FALLTHRU); diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-17.c b/gcc/testsuite/gcc.dg/vect/vect-simd-17.c index 9330aaa..951ba3a 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-17.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-17.c @@ -1,6 +1,6 @@ /* { dg-additional-options "-fopenmp-simd -fno-tree-vectorize" } */ /* { dg-additional-options "-mavx" { target avx_runtime } } */ -/* { dg-final { scan-tree-dump "vectorized \(\[4-9]\|1\[0-2]\) loops" "vect" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "vectorized 1\[1-2] loops" "vect" { target i?86-*-* x86_64-*-* } } } */ #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-18.c b/gcc/testsuite/gcc.dg/vect/vect-simd-18.c new file mode 100644 index 0000000..b25f5a5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-18.c @@ -0,0 +1,40 @@ +/* { dg-additional-options "-fopenmp-simd -fno-tree-vectorize" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target i?86-*-* x86_64-*-* } } } */ + +#include "tree-vect.h" + +__attribute__((noipa)) int +foo (int s, int *p) +{ + int r = 0, l = 0, i; + #pragma omp simd reduction (+:r) linear(l) + for (i = 0; i < 10000; i += s) + { + p[l++] = i; + r += i * 3; + } + return r; +} + +int p[10000 / 78]; + +int +main () +{ + int i, r; + check_vect (); + r = foo (78, p); + for (i = 0; i < 10000 / 78; i++) + if (p[i] != 78 * i) + abort (); + if (r != (10000 / 78) * (10000 / 78 + 1) / 2 * 78 * 3) + abort (); + r = foo (87, p); + for (i = 0; i < 10000 / 87; i++) + if (p[i] != 87 * i) + abort (); + if (r != (10000 / 87) * (10000 / 87 + 1) / 2 * 87 * 3) + abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-19.c b/gcc/testsuite/gcc.dg/vect/vect-simd-19.c new file mode 100644 index 0000000..a71dfa6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-19.c @@ -0,0 +1,40 @@ +/* { dg-additional-options "-fopenmp-simd -fno-tree-vectorize" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target i?86-*-* x86_64-*-* } } } */ + +#include "tree-vect.h" + +__attribute__((noipa)) int +foo (int s, int m, int n, int *p) +{ + int r = 0, l = 0, i; + #pragma omp simd reduction (+:r) linear(l) + for (i = m; i < n; i += s) + { + p[l++] = i; + r += i * 3; + } + return r; +} + +int p[10000 / 78]; + +int +main () +{ + int i, r; + check_vect (); + r = foo (78, 0, 10000, p); + for (i = 0; i < 10000 / 78; i++) + if (p[i] != 78 * i) + abort (); + if (r != (10000 / 78) * (10000 / 78 + 1) / 2 * 78 * 3) + abort (); + r = foo (87, 0, 10000, p); + for (i = 0; i < 10000 / 87; i++) + if (p[i] != 87 * i) + abort (); + if (r != (10000 / 87) * (10000 / 87 + 1) / 2 * 87 * 3) + abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-20.c b/gcc/testsuite/gcc.dg/vect/vect-simd-20.c new file mode 100644 index 0000000..c85f05f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-20.c @@ -0,0 +1,43 @@ +/* { dg-additional-options "-fopenmp-simd -fno-tree-vectorize" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target i?86-*-* x86_64-*-* } } } */ + +#include "tree-vect.h" + +__attribute__((noipa)) int +foo (int s, int m, int n, int *p) +{ + int r = 0, l = 0, i, j; + #pragma omp simd reduction (+:r) linear(l) collapse(2) + for (j = 0; j < 7; j++) + for (i = m; i < n; i += s) + { + p[l++] = i; + r += i * 3; + } + return r; +} + +int p[10000 / 78 * 7]; + +int +main () +{ + int i, j, r; + check_vect (); + r = foo (78, 0, 10000, p); + for (j = 0; j < 7; j++) + for (i = 0; i < 10000 / 78; i++) + if (p[j * (10000 / 78 + 1) + i] != 78 * i) + abort (); + if (r != (10000 / 78) * (10000 / 78 + 1) / 2 * 78 * 3 * 7) + abort (); + r = foo (87, 0, 10000, p); + for (j = 0; j < 7; j++) + for (i = 0; i < 10000 / 87; i++) + if (p[j * (10000 / 87 + 1) + i] != 87 * i) + abort (); + if (r != (10000 / 87) * (10000 / 87 + 1) / 2 * 87 * 3 * 7) + abort (); + return 0; +} |