diff options
-rw-r--r-- | gcc/ChangeLog | 19 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/vcond_10.c | 36 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/vcond_10_run.c | 24 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/vcond_11.c | 36 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/vcond_11_run.c | 28 | ||||
-rw-r--r-- | gcc/tree-vect-loop.c | 529 | ||||
-rw-r--r-- | gcc/tree-vect-slp.c | 121 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.c | 308 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 2 |
10 files changed, 637 insertions, 473 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1f67c9b..95e966a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2018-05-16 Richard Sandiford <richard.sandiford@linaro.org> + + * tree-vectorizer.h (vect_get_vector_types_for_stmt): Declare. + (vect_get_mask_type_for_stmt): Likewise. + * tree-vect-slp.c (vect_two_operations_perm_ok_p): New function, + split out from... + (vect_build_slp_tree_1): ...here. Use vect_get_vector_types_for_stmt + to determine the statement's vector type and the vector type that + should be used for calculating nunits. Deal with cases in which + the type has to be deferred. + (vect_slp_analyze_node_operations): Use vect_get_vector_types_for_stmt + and vect_get_mask_type_for_stmt to calculate STMT_VINFO_VECTYPE. + * tree-vect-loop.c (vect_determine_vf_for_stmt_1) + (vect_determine_vf_for_stmt): New functions, split out from... + (vect_determine_vectorization_factor): ...here. + * tree-vect-stmts.c (vect_get_vector_types_for_stmt) + (vect_get_mask_type_for_stmt): New functions, split out from + vect_determine_vectorization_factor. + 2018-05-16 Richard Biener <rguenther@suse.de> * tree-cfg.c (verify_gimple_assign_ternary): Properly diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 00f3c62..7e32f60 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2018-05-16 Richard Sandiford <richard.sandiford@linaro.org> + + * gcc.target/aarch64/sve/vcond_10.c: New test. + * gcc.target/aarch64/sve/vcond_10_run.c: Likewise. + * gcc.target/aarch64/sve/vcond_11.c: Likewise. + * gcc.target/aarch64/sve/vcond_11_run.c: Likewise. + 2018-05-15 Martin Sebor <msebor@redhat.com> PR tree-optimization/85753 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_10.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_10.c new file mode 100644 index 0000000..76babbd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_10.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define DEF_LOOP(TYPE) \ + void __attribute__ ((noinline, noclone)) \ + test_##TYPE (TYPE *a, TYPE a1, TYPE a2, TYPE a3, TYPE a4, int n) \ + { \ + for (int i = 0; i < n; i += 2) \ + { \ + a[i] = a[i] >= 1 && a[i] != 3 ? a1 : a2; \ + a[i + 1] = a[i + 1] >= 1 && a[i + 1] != 3 ? a3 : a4; \ + } \ + } + +#define FOR_EACH_TYPE(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (_Float16) \ + T (float) \ + T (double) + +FOR_EACH_TYPE (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tsel\tz[0-9]} 11 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_10_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_10_run.c new file mode 100644 index 0000000..3e841f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_10_run.c @@ -0,0 +1,24 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "vcond_10.c" + +#define N 133 + +#define TEST_LOOP(TYPE) \ + { \ + TYPE a[N]; \ + for (int i = 0; i < N; ++i) \ + a[i] = i % 7; \ + test_##TYPE (a, 10, 11, 12, 13, N); \ + for (int i = 0; i < N; ++i) \ + if (a[i] != 10 + (i & 1) * 2 + (i % 7 == 0 || i % 7 == 3)) \ + __builtin_abort (); \ + } + +int +main (void) +{ + FOR_EACH_TYPE (TEST_LOOP); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_11.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_11.c new file mode 100644 index 0000000..3c9e340 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_11.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define DEF_LOOP(TYPE) \ + void __attribute__ ((noinline, noclone)) \ + test_##TYPE (int *restrict a, TYPE *restrict b, int a1, int a2, \ + int a3, int a4, int n) \ + { \ + for (int i = 0; i < n; i += 2) \ + { \ + a[i] = a[i] >= 1 & b[i] != 3 ? a1 : a2; \ + a[i + 1] = a[i + 1] >= 1 & b[i + 1] != 3 ? a3 : a4; \ + } \ + } + +#define FOR_EACH_TYPE(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int64_t) \ + T (uint64_t) \ + T (double) + +FOR_EACH_TYPE (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */ +/* 4 for each 8-bit function, 2 for each 16-bit function, 1 for + each 64-bit function. */ +/* { dg-final { scan-assembler-times {\tld1w\t} 15 } } */ +/* 3 64-bit functions * 2 64-bit vectors per 32-bit vector. */ +/* { dg-final { scan-assembler-times {\tld1d\t} 6 } } */ +/* { dg-final { scan-assembler-times {\tsel\tz[0-9]} 15 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_11_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_11_run.c new file mode 100644 index 0000000..9a4edb8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_11_run.c @@ -0,0 +1,28 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "vcond_11.c" + +#define N 133 + +#define TEST_LOOP(TYPE) \ + { \ + int a[N]; \ + TYPE b[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = i % 5; \ + b[i] = i % 7; \ + } \ + test_##TYPE (a, b, 10, 11, 12, 13, N); \ + for (int i = 0; i < N; ++i) \ + if (a[i] != 10 + (i & 1) * 2 + (i % 5 == 0 || i % 7 == 3)) \ + __builtin_abort (); \ + } + +int +main (void) +{ + FOR_EACH_TYPE (TEST_LOOP); + return 0; +} diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 4ce721ed..fb217b85 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -155,6 +155,108 @@ along with GCC; see the file COPYING3. If not see static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *); +/* Subroutine of vect_determine_vf_for_stmt that handles only one + statement. VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE + may already be set for general statements (not just data refs). */ + +static bool +vect_determine_vf_for_stmt_1 (stmt_vec_info stmt_info, + bool vectype_maybe_set_p, + poly_uint64 *vf, + vec<stmt_vec_info > *mask_producers) +{ + gimple *stmt = stmt_info->stmt; + + if ((!STMT_VINFO_RELEVANT_P (stmt_info) + && !STMT_VINFO_LIVE_P (stmt_info)) + || gimple_clobber_p (stmt)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "skip.\n"); + return true; + } + + tree stmt_vectype, nunits_vectype; + if (!vect_get_vector_types_for_stmt (stmt_info, &stmt_vectype, + &nunits_vectype)) + return false; + + if (stmt_vectype) + { + if (STMT_VINFO_VECTYPE (stmt_info)) + /* The only case when a vectype had been already set is for stmts + that contain a data ref, or for "pattern-stmts" (stmts generated + by the vectorizer to represent/replace a certain idiom). */ + gcc_assert ((STMT_VINFO_DATA_REF (stmt_info) + || vectype_maybe_set_p) + && STMT_VINFO_VECTYPE (stmt_info) == stmt_vectype); + else if (stmt_vectype == boolean_type_node) + mask_producers->safe_push (stmt_info); + else + STMT_VINFO_VECTYPE (stmt_info) = stmt_vectype; + } + + if (nunits_vectype) + vect_update_max_nunits (vf, nunits_vectype); + + return true; +} + +/* Subroutine of vect_determine_vectorization_factor. Set the vector + types of STMT_INFO and all attached pattern statements and update + the vectorization factor VF accordingly. If some of the statements + produce a mask result whose vector type can only be calculated later, + add them to MASK_PRODUCERS. Return true on success or false if + something prevented vectorization. */ + +static bool +vect_determine_vf_for_stmt (stmt_vec_info stmt_info, poly_uint64 *vf, + vec<stmt_vec_info > *mask_producers) +{ + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: "); + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0); + } + if (!vect_determine_vf_for_stmt_1 (stmt_info, false, vf, mask_producers)) + return false; + + if (STMT_VINFO_IN_PATTERN_P (stmt_info) + && STMT_VINFO_RELATED_STMT (stmt_info)) + { + stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); + + /* If a pattern statement has def stmts, analyze them too. */ + gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info); + for (gimple_stmt_iterator si = gsi_start (pattern_def_seq); + !gsi_end_p (si); gsi_next (&si)) + { + stmt_vec_info def_stmt_info = vinfo_for_stmt (gsi_stmt (si)); + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "==> examining pattern def stmt: "); + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, + def_stmt_info->stmt, 0); + } + if (!vect_determine_vf_for_stmt_1 (def_stmt_info, true, + vf, mask_producers)) + return false; + } + + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "==> examining pattern statement: "); + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0); + } + if (!vect_determine_vf_for_stmt_1 (stmt_info, true, vf, mask_producers)) + return false; + } + + return true; +} + /* Function vect_determine_vectorization_factor Determine the vectorization factor (VF). VF is the number of data elements @@ -192,12 +294,6 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) tree vectype; stmt_vec_info stmt_info; unsigned i; - HOST_WIDE_INT dummy; - gimple *stmt, *pattern_stmt = NULL; - gimple_seq pattern_def_seq = NULL; - gimple_stmt_iterator pattern_def_si = gsi_none (); - bool analyze_pattern_stmt = false; - bool bool_result; auto_vec<stmt_vec_info> mask_producers; if (dump_enabled_p ()) @@ -269,304 +365,13 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) } } - for (gimple_stmt_iterator si = gsi_start_bb (bb); - !gsi_end_p (si) || analyze_pattern_stmt;) - { - tree vf_vectype; - - if (analyze_pattern_stmt) - stmt = pattern_stmt; - else - stmt = gsi_stmt (si); - - stmt_info = vinfo_for_stmt (stmt); - - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, - "==> examining statement: "); - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); - } - - gcc_assert (stmt_info); - - /* Skip stmts which do not need to be vectorized. */ - if ((!STMT_VINFO_RELEVANT_P (stmt_info) - && !STMT_VINFO_LIVE_P (stmt_info)) - || gimple_clobber_p (stmt)) - { - if (STMT_VINFO_IN_PATTERN_P (stmt_info) - && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) - && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) - || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) - { - stmt = pattern_stmt; - stmt_info = vinfo_for_stmt (pattern_stmt); - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, - "==> examining pattern statement: "); - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); - } - } - else - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, "skip.\n"); - gsi_next (&si); - continue; - } - } - else if (STMT_VINFO_IN_PATTERN_P (stmt_info) - && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) - && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) - || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) - analyze_pattern_stmt = true; - - /* If a pattern statement has def stmts, analyze them too. */ - if (is_pattern_stmt_p (stmt_info)) - { - if (pattern_def_seq == NULL) - { - pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info); - pattern_def_si = gsi_start (pattern_def_seq); - } - else if (!gsi_end_p (pattern_def_si)) - gsi_next (&pattern_def_si); - if (pattern_def_seq != NULL) - { - gimple *pattern_def_stmt = NULL; - stmt_vec_info pattern_def_stmt_info = NULL; - - while (!gsi_end_p (pattern_def_si)) - { - pattern_def_stmt = gsi_stmt (pattern_def_si); - pattern_def_stmt_info - = vinfo_for_stmt (pattern_def_stmt); - if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info) - || STMT_VINFO_LIVE_P (pattern_def_stmt_info)) - break; - gsi_next (&pattern_def_si); - } - - if (!gsi_end_p (pattern_def_si)) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, - "==> examining pattern def stmt: "); - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, - pattern_def_stmt, 0); - } - - stmt = pattern_def_stmt; - stmt_info = pattern_def_stmt_info; - } - else - { - pattern_def_si = gsi_none (); - analyze_pattern_stmt = false; - } - } - else - analyze_pattern_stmt = false; - } - - if (gimple_get_lhs (stmt) == NULL_TREE - /* MASK_STORE has no lhs, but is ok. */ - && (!is_gimple_call (stmt) - || !gimple_call_internal_p (stmt) - || gimple_call_internal_fn (stmt) != IFN_MASK_STORE)) - { - if (is_gimple_call (stmt)) - { - /* Ignore calls with no lhs. These must be calls to - #pragma omp simd functions, and what vectorization factor - it really needs can't be determined until - vectorizable_simd_clone_call. */ - if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si)) - { - pattern_def_seq = NULL; - gsi_next (&si); - } - continue; - } - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: irregular stmt."); - dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, - 0); - } - return false; - } - - if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt)))) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: vector stmt in loop:"); - dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); - } - return false; - } - - bool_result = false; - - if (STMT_VINFO_VECTYPE (stmt_info)) - { - /* The only case when a vectype had been already set is for stmts - that contain a dataref, or for "pattern-stmts" (stmts - generated by the vectorizer to represent/replace a certain - idiom). */ - gcc_assert (STMT_VINFO_DATA_REF (stmt_info) - || is_pattern_stmt_p (stmt_info) - || !gsi_end_p (pattern_def_si)); - vectype = STMT_VINFO_VECTYPE (stmt_info); - } - else - { - gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)); - if (gimple_call_internal_p (stmt, IFN_MASK_STORE)) - scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3)); - else - scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); - - /* Bool ops don't participate in vectorization factor - computation. For comparison use compared types to - compute a factor. */ - if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type) - && is_gimple_assign (stmt) - && gimple_assign_rhs_code (stmt) != COND_EXPR) - { - if (STMT_VINFO_RELEVANT_P (stmt_info) - || STMT_VINFO_LIVE_P (stmt_info)) - mask_producers.safe_push (stmt_info); - bool_result = true; - - if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) - == tcc_comparison - && !VECT_SCALAR_BOOLEAN_TYPE_P - (TREE_TYPE (gimple_assign_rhs1 (stmt)))) - scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); - else - { - if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si)) - { - pattern_def_seq = NULL; - gsi_next (&si); - } - continue; - } - } - - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, - "get vectype for scalar type: "); - dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type); - dump_printf (MSG_NOTE, "\n"); - } - vectype = get_vectype_for_scalar_type (scalar_type); - if (!vectype) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: unsupported " - "data-type "); - dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, - scalar_type); - dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); - } - return false; - } - - if (!bool_result) - STMT_VINFO_VECTYPE (stmt_info) = vectype; - - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, "vectype: "); - dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype); - dump_printf (MSG_NOTE, "\n"); - } - } - - /* Don't try to compute VF out scalar types if we stmt - produces boolean vector. Use result vectype instead. */ - if (VECTOR_BOOLEAN_TYPE_P (vectype)) - vf_vectype = vectype; - else - { - /* The vectorization factor is according to the smallest - scalar type (or the largest vector size, but we only - support one vector size per loop). */ - if (!bool_result) - scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, - &dummy); - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, - "get vectype for scalar type: "); - dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type); - dump_printf (MSG_NOTE, "\n"); - } - vf_vectype = get_vectype_for_scalar_type (scalar_type); - } - if (!vf_vectype) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: unsupported data-type "); - dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, - scalar_type); - dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); - } - return false; - } - - if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)), - GET_MODE_SIZE (TYPE_MODE (vf_vectype)))) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: different sized vector " - "types in statement, "); - dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, - vectype); - dump_printf (MSG_MISSED_OPTIMIZATION, " and "); - dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, - vf_vectype); - dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); - } - return false; - } - - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, "vectype: "); - dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype); - dump_printf (MSG_NOTE, "\n"); - } - - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, "nunits = "); - dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (vf_vectype)); - dump_printf (MSG_NOTE, "\n"); - } - - vect_update_max_nunits (&vectorization_factor, vf_vectype); - - if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si)) - { - pattern_def_seq = NULL; - gsi_next (&si); - } + for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); + gsi_next (&si)) + { + stmt_info = vinfo_for_stmt (gsi_stmt (si)); + if (!vect_determine_vf_for_stmt (stmt_info, &vectorization_factor, + &mask_producers)) + return false; } } @@ -589,119 +394,11 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) for (i = 0; i < mask_producers.length (); i++) { - tree mask_type = NULL; - - stmt = STMT_VINFO_STMT (mask_producers[i]); - - if (is_gimple_assign (stmt) - && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison - && !VECT_SCALAR_BOOLEAN_TYPE_P - (TREE_TYPE (gimple_assign_rhs1 (stmt)))) - { - scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); - mask_type = get_mask_type_for_scalar_type (scalar_type); - - if (!mask_type) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: unsupported mask\n"); - return false; - } - } - else - { - tree rhs; - ssa_op_iter iter; - gimple *def_stmt; - enum vect_def_type dt; - - FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE) - { - if (!vect_is_simple_use (rhs, mask_producers[i]->vinfo, - &def_stmt, &dt, &vectype)) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: can't compute mask type " - "for statement, "); - dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, - 0); - } - return false; - } - - /* No vectype probably means external definition. - Allow it in case there is another operand which - allows to determine mask type. */ - if (!vectype) - continue; - - if (!mask_type) - mask_type = vectype; - else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type), - TYPE_VECTOR_SUBPARTS (vectype))) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: different sized masks " - "types in statement, "); - dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, - mask_type); - dump_printf (MSG_MISSED_OPTIMIZATION, " and "); - dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, - vectype); - dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); - } - return false; - } - else if (VECTOR_BOOLEAN_TYPE_P (mask_type) - != VECTOR_BOOLEAN_TYPE_P (vectype)) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: mixed mask and " - "nonmask vector types in statement, "); - dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, - mask_type); - dump_printf (MSG_MISSED_OPTIMIZATION, " and "); - dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, - vectype); - dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); - } - return false; - } - } - - /* We may compare boolean value loaded as vector of integers. - Fix mask_type in such case. */ - if (mask_type - && !VECTOR_BOOLEAN_TYPE_P (mask_type) - && gimple_code (stmt) == GIMPLE_ASSIGN - && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison) - mask_type = build_same_sized_truth_vector_type (mask_type); - } - - /* No mask_type should mean loop invariant predicate. - This is probably a subject for optimization in - if-conversion. */ + stmt_info = mask_producers[i]; + tree mask_type = vect_get_mask_type_for_stmt (stmt_info); if (!mask_type) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: can't compute mask type " - "for statement, "); - dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, - 0); - } - return false; - } - - STMT_VINFO_VECTYPE (mask_producers[i]) = mask_type; + return false; + STMT_VINFO_VECTYPE (stmt_info) = mask_type; } return true; diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 4639bfc..aa239b9 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -608,6 +608,33 @@ vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size, return true; } +/* STMTS is a group of GROUP_SIZE SLP statements in which some + statements do the same operation as the first statement and in which + the others do ALT_STMT_CODE. Return true if we can take one vector + of the first operation and one vector of the second and permute them + to get the required result. VECTYPE is the type of the vector that + would be permuted. */ + +static bool +vect_two_operations_perm_ok_p (vec<gimple *> stmts, unsigned int group_size, + tree vectype, tree_code alt_stmt_code) +{ + unsigned HOST_WIDE_INT count; + if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count)) + return false; + + vec_perm_builder sel (count, count, 1); + for (unsigned int i = 0; i < count; ++i) + { + unsigned int elt = i; + if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code) + elt += count; + sel.quick_push (elt); + } + vec_perm_indices indices (sel, 2, count); + return can_vec_perm_const_p (TYPE_MODE (vectype), indices); +} + /* Verify if the scalar stmts STMTS are isomorphic, require data permutation or are of unsupported types of operation. Return true if they are, otherwise return false and indicate in *MATCHES @@ -636,17 +663,17 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, enum tree_code first_cond_code = ERROR_MARK; tree lhs; bool need_same_oprnds = false; - tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE; + tree vectype = NULL_TREE, first_op1 = NULL_TREE; optab optab; int icode; machine_mode optab_op2_mode; machine_mode vec_mode; - HOST_WIDE_INT dummy; gimple *first_load = NULL, *prev_first_load = NULL; /* For every stmt in NODE find its def stmt/s. */ FOR_EACH_VEC_ELT (stmts, i, stmt) { + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); swap[i] = 0; matches[i] = false; @@ -685,15 +712,19 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, return false; } - scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy); - vectype = get_vectype_for_scalar_type (scalar_type); - if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype, - max_nunits)) + tree nunits_vectype; + if (!vect_get_vector_types_for_stmt (stmt_info, &vectype, + &nunits_vectype) + || (nunits_vectype + && !vect_record_max_nunits (vinfo, stmt, group_size, + nunits_vectype, max_nunits))) { /* Fatal mismatch. */ matches[0] = false; - return false; - } + return false; + } + + gcc_assert (vectype); if (gcall *call_stmt = dyn_cast <gcall *> (stmt)) { @@ -730,6 +761,17 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, || rhs_code == LROTATE_EXPR || rhs_code == RROTATE_EXPR) { + if (vectype == boolean_type_node) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "Build SLP failed: shift of a" + " boolean.\n"); + /* Fatal mismatch. */ + matches[0] = false; + return false; + } + vec_mode = TYPE_MODE (vectype); /* First see if we have a vector/vector shift. */ @@ -973,29 +1015,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, /* If we allowed a two-operation SLP node verify the target can cope with the permute we are going to use. */ - poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); if (alt_stmt_code != ERROR_MARK && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference) { - unsigned HOST_WIDE_INT count; - if (!nunits.is_constant (&count)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "Build SLP failed: different operations " - "not allowed with variable-length SLP.\n"); - return false; - } - vec_perm_builder sel (count, count, 1); - for (i = 0; i < count; ++i) - { - unsigned int elt = i; - if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code) - elt += count; - sel.quick_push (elt); - } - vec_perm_indices indices (sel, 2, count); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (vectype == boolean_type_node + || !vect_two_operations_perm_ok_p (stmts, group_size, + vectype, alt_stmt_code)) { for (i = 0; i < group_size; ++i) if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code) @@ -2759,36 +2784,18 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, if (bb_vinfo && ! STMT_VINFO_DATA_REF (stmt_info)) { - gcc_assert (PURE_SLP_STMT (stmt_info)); - - tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, - "get vectype for scalar type: "); - dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type); - dump_printf (MSG_NOTE, "\n"); - } - - tree vectype = get_vectype_for_scalar_type (scalar_type); - if (!vectype) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not SLPed: unsupported data-type "); - dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, - scalar_type); - dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); - } - return false; - } - - if (dump_enabled_p ()) + tree vectype, nunits_vectype; + if (!vect_get_vector_types_for_stmt (stmt_info, &vectype, + &nunits_vectype)) + /* We checked this when building the node. */ + gcc_unreachable (); + if (vectype == boolean_type_node) { - dump_printf_loc (MSG_NOTE, vect_location, "vectype: "); - dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype); - dump_printf (MSG_NOTE, "\n"); + vectype = vect_get_mask_type_for_stmt (stmt_info); + if (!vectype) + /* vect_get_mask_type_for_stmt has already explained the + failure. */ + return false; } gimple *sstmt; diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 1e8ccbc..0fd9410 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -10520,3 +10520,311 @@ vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index, gimple_seq_add_stmt (seq, call); return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp); } + +/* Try to compute the vector types required to vectorize STMT_INFO, + returning true on success and false if vectorization isn't possible. + + On success: + + - Set *STMT_VECTYPE_OUT to: + - NULL_TREE if the statement doesn't need to be vectorized; + - boolean_type_node if the statement is a boolean operation whose + vector type can only be determined once all the other vector types + are known; and + - the equivalent of STMT_VINFO_VECTYPE otherwise. + + - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum + number of units needed to vectorize STMT_INFO, or NULL_TREE if the + statement does not help to determine the overall number of units. */ + +bool +vect_get_vector_types_for_stmt (stmt_vec_info stmt_info, + tree *stmt_vectype_out, + tree *nunits_vectype_out) +{ + gimple *stmt = stmt_info->stmt; + + *stmt_vectype_out = NULL_TREE; + *nunits_vectype_out = NULL_TREE; + + if (gimple_get_lhs (stmt) == NULL_TREE + /* MASK_STORE has no lhs, but is ok. */ + && !gimple_call_internal_p (stmt, IFN_MASK_STORE)) + { + if (is_a <gcall *> (stmt)) + { + /* Ignore calls with no lhs. These must be calls to + #pragma omp simd functions, and what vectorization factor + it really needs can't be determined until + vectorizable_simd_clone_call. */ + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "defer to SIMD clone analysis.\n"); + return true; + } + + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: irregular stmt."); + dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); + } + return false; + } + + if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt)))) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: vector stmt in loop:"); + dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); + } + return false; + } + + tree vectype; + tree scalar_type = NULL_TREE; + if (STMT_VINFO_VECTYPE (stmt_info)) + *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info); + else + { + gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)); + if (gimple_call_internal_p (stmt, IFN_MASK_STORE)) + scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3)); + else + scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); + + /* Pure bool ops don't participate in number-of-units computation. + For comparisons use the types being compared. */ + if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type) + && is_gimple_assign (stmt) + && gimple_assign_rhs_code (stmt) != COND_EXPR) + { + *stmt_vectype_out = boolean_type_node; + + tree rhs1 = gimple_assign_rhs1 (stmt); + if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison + && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))) + scalar_type = TREE_TYPE (rhs1); + else + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "pure bool operation.\n"); + return true; + } + } + + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "get vectype for scalar type: "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type); + dump_printf (MSG_NOTE, "\n"); + } + vectype = get_vectype_for_scalar_type (scalar_type); + if (!vectype) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: unsupported data-type "); + dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, + scalar_type); + dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); + } + return false; + } + + if (!*stmt_vectype_out) + *stmt_vectype_out = vectype; + + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, "vectype: "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype); + dump_printf (MSG_NOTE, "\n"); + } + } + + /* Don't try to compute scalar types if the stmt produces a boolean + vector; use the existing vector type instead. */ + tree nunits_vectype; + if (VECTOR_BOOLEAN_TYPE_P (vectype)) + nunits_vectype = vectype; + else + { + /* The number of units is set according to the smallest scalar + type (or the largest vector size, but we only support one + vector size per vectorization). */ + if (*stmt_vectype_out != boolean_type_node) + { + HOST_WIDE_INT dummy; + scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy); + } + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "get vectype for scalar type: "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type); + dump_printf (MSG_NOTE, "\n"); + } + nunits_vectype = get_vectype_for_scalar_type (scalar_type); + } + if (!nunits_vectype) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: unsupported data-type "); + dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type); + dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); + } + return false; + } + + if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)), + GET_MODE_SIZE (TYPE_MODE (nunits_vectype)))) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: different sized vector " + "types in statement, "); + dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype); + dump_printf (MSG_MISSED_OPTIMIZATION, " and "); + dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, nunits_vectype); + dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); + } + return false; + } + + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, "vectype: "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, nunits_vectype); + dump_printf (MSG_NOTE, "\n"); + + dump_printf_loc (MSG_NOTE, vect_location, "nunits = "); + dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype)); + dump_printf (MSG_NOTE, "\n"); + } + + *nunits_vectype_out = nunits_vectype; + return true; +} + +/* Try to determine the correct vector type for STMT_INFO, which is a + statement that produces a scalar boolean result. Return the vector + type on success, otherwise return NULL_TREE. */ + +tree +vect_get_mask_type_for_stmt (stmt_vec_info stmt_info) +{ + gimple *stmt = stmt_info->stmt; + tree mask_type = NULL; + tree vectype, scalar_type; + + if (is_gimple_assign (stmt) + && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison + && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt)))) + { + scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); + mask_type = get_mask_type_for_scalar_type (scalar_type); + + if (!mask_type) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: unsupported mask\n"); + return NULL_TREE; + } + } + else + { + tree rhs; + ssa_op_iter iter; + gimple *def_stmt; + enum vect_def_type dt; + + FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE) + { + if (!vect_is_simple_use (rhs, stmt_info->vinfo, + &def_stmt, &dt, &vectype)) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: can't compute mask type " + "for statement, "); + dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, + 0); + } + return NULL_TREE; + } + + /* No vectype probably means external definition. + Allow it in case there is another operand which + allows to determine mask type. */ + if (!vectype) + continue; + + if (!mask_type) + mask_type = vectype; + else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type), + TYPE_VECTOR_SUBPARTS (vectype))) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: different sized masks " + "types in statement, "); + dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, + mask_type); + dump_printf (MSG_MISSED_OPTIMIZATION, " and "); + dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, + vectype); + dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); + } + return NULL_TREE; + } + else if (VECTOR_BOOLEAN_TYPE_P (mask_type) + != VECTOR_BOOLEAN_TYPE_P (vectype)) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: mixed mask and " + "nonmask vector types in statement, "); + dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, + mask_type); + dump_printf (MSG_MISSED_OPTIMIZATION, " and "); + dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, + vectype); + dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); + } + return NULL_TREE; + } + } + + /* We may compare boolean value loaded as vector of integers. + Fix mask_type in such case. */ + if (mask_type + && !VECTOR_BOOLEAN_TYPE_P (mask_type) + && gimple_code (stmt) == GIMPLE_ASSIGN + && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison) + mask_type = build_same_sized_truth_vector_type (mask_type); + } + + /* No mask_type should mean loop invariant predicate. + This is probably a subject for optimization in if-conversion. */ + if (!mask_type && dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: can't compute mask type " + "for statement, "); + dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); + } + return mask_type; +} diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 7e2b00f..049e3dd 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1467,6 +1467,8 @@ extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &); extern void optimize_mask_stores (struct loop*); extern gcall *vect_gen_while (tree, tree, tree); extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree); +extern bool vect_get_vector_types_for_stmt (stmt_vec_info, tree *, tree *); +extern tree vect_get_mask_type_for_stmt (stmt_vec_info); /* In tree-vect-data-refs.c. */ extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); |