aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog31
-rw-r--r--gcc/cfgloop.h3
-rw-r--r--gcc/cfgloopmanip.c1
-rw-r--r--gcc/config/aarch64/aarch64.c2
-rw-r--r--gcc/config/arc/arc.c2
-rw-r--r--gcc/config/arm/arm.c4
-rw-r--r--gcc/config/i386/i386.c13
-rw-r--r--gcc/config/mips/mips.c2
-rw-r--r--gcc/doc/tm.texi4
-rw-r--r--gcc/omp-expand.c7
-rw-r--r--gcc/omp-general.c2
-rw-r--r--gcc/omp-low.c2
-rw-r--r--gcc/optabs-query.c2
-rw-r--r--gcc/target.def4
-rw-r--r--gcc/targhooks.c2
-rw-r--r--gcc/targhooks.h2
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-simd-1.c35
-rw-r--r--gcc/tree-vect-loop.c58
-rw-r--r--gcc/tree-vect-slp.c2
20 files changed, 160 insertions, 22 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e07f8a1..682d5f7 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,34 @@
+2019-05-20 Jakub Jelinek <jakub@redhat.com>
+
+ * cfgloop.h (struct loop): Add simdlen member.
+ * cfgloopmanip.c (copy_loop_info): Copy simdlen as well.
+ * omp-expand.c (expand_omp_simd): Set it if simdlen clause is present.
+ * tree-vect-loop.c (vect_analyze_loop): Pass loop->simdlen != 0
+ as new argument to autovectorize_vector_sizes target hook. If
+ loop->simdlen, pick up vector size where the vectorization factor
+ is equal to loop->simd, and if there is none, fall back to the first
+ successful one.
+ (vect_transform_loop): Adjust autovectorize_vector_sizes target hook
+ caller.
+ * omp-low.c (omp_clause_aligned_alignment): Likewise.
+ * omp-general.c (omp_max_vf): Likewise.
+ * optabs-query.c (can_vec_mask_load_store_p): Likewise.
+ * tree-vect-slp.c (vect_slp_bb): Likewise.
+ * target.def (autovectorize_vector_sizes): Add ALL argument and
+ document it.
+ * doc/tm.texi: Adjust documentation.
+ * targhooks.c (default_autovectorize_vector_sizes): Add bool argument.
+ * targhooks.h (default_autovectorize_vector_sizes): Likewise.
+ * config/aarch64/aarch64.c (aarch64_autovectorize_vector_sizes): Add
+ bool argument.
+ * config/arc/arc.c (arc_autovectorize_vector_sizes): Likewise.
+ * config/arm/arm.c (arm_autovectorize_vector_sizes): Likewise.
+ * config/mips/mips.c (mips_autovectorize_vector_sizes): Likewise.
+ * config/i386/i386.c (ix86_autovectorize_vector_sizes): Likewise. If
+ true and TARGET_AVX512F or TARGET_AVX, push 3 or 2 sizes even if
+ preferred vector size is not 512-bit or 256-bit, just put those
+ unpreferred ones last.
+
2019-05-20 Martin Liska <mliska@suse.cz>
* targhooks.c (default_libc_has_fast_function): New function.
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index e82cd7a..2f8ab10 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -174,6 +174,9 @@ struct GTY ((chain_next ("%h.next"))) loop {
of the loop can be safely evaluated concurrently. */
int safelen;
+ /* Preferred vectorization factor for the loop if non-zero. */
+ int simdlen;
+
/* Constraints are generally set by consumers and affect certain
semantics of niter analyzer APIs. Currently the APIs affected are
number_of_iterations_exit* functions and their callers. One typical
diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
index bfee48e..50250ec 100644
--- a/gcc/cfgloopmanip.c
+++ b/gcc/cfgloopmanip.c
@@ -1016,6 +1016,7 @@ copy_loop_info (struct loop *loop, struct loop *target)
target->nb_iterations_estimate = loop->nb_iterations_estimate;
target->estimate_state = loop->estimate_state;
target->safelen = loop->safelen;
+ target->simdlen = loop->simdlen;
target->constraints = loop->constraints;
target->can_be_parallel = loop->can_be_parallel;
target->warned_aggressive_loop_optimizations
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 971c4d0..8a290dc 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14109,7 +14109,7 @@ aarch64_preferred_simd_mode (scalar_mode mode)
/* Return a list of possible vector sizes for the vectorizer
to iterate over. */
static void
-aarch64_autovectorize_vector_sizes (vector_sizes *sizes)
+aarch64_autovectorize_vector_sizes (vector_sizes *sizes, bool)
{
if (TARGET_SVE)
sizes->safe_push (BYTES_PER_SVE_VECTOR);
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 1633d01..bce1899 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -480,7 +480,7 @@ arc_preferred_simd_mode (scalar_mode mode)
TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES. */
static void
-arc_autovectorize_vector_sizes (vector_sizes *sizes)
+arc_autovectorize_vector_sizes (vector_sizes *sizes, bool)
{
if (TARGET_PLUS_QMACW)
{
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 1d3be26..e3e71ea 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -288,7 +288,7 @@ static bool arm_builtin_support_vector_misalignment (machine_mode mode,
static void arm_conditional_register_usage (void);
static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
-static void arm_autovectorize_vector_sizes (vector_sizes *);
+static void arm_autovectorize_vector_sizes (vector_sizes *, bool);
static int arm_default_branch_cost (bool, bool);
static int arm_cortex_a5_branch_cost (bool, bool);
static int arm_cortex_m_branch_cost (bool, bool);
@@ -28351,7 +28351,7 @@ arm_vector_alignment (const_tree type)
}
static void
-arm_autovectorize_vector_sizes (vector_sizes *sizes)
+arm_autovectorize_vector_sizes (vector_sizes *sizes, bool)
{
if (!TARGET_NEON_VECTORIZE_DOUBLE)
{
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 384c633..696a474 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -21332,7 +21332,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
256bit and 128bit vectors. */
static void
-ix86_autovectorize_vector_sizes (vector_sizes *sizes)
+ix86_autovectorize_vector_sizes (vector_sizes *sizes, bool all)
{
if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
{
@@ -21340,11 +21340,22 @@ ix86_autovectorize_vector_sizes (vector_sizes *sizes)
sizes->safe_push (32);
sizes->safe_push (16);
}
+ else if (TARGET_AVX512F && all)
+ {
+ sizes->safe_push (32);
+ sizes->safe_push (16);
+ sizes->safe_push (64);
+ }
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
{
sizes->safe_push (32);
sizes->safe_push (16);
}
+ else if (TARGET_AVX && all)
+ {
+ sizes->safe_push (16);
+ sizes->safe_push (32);
+ }
}
/* Implemenation of targetm.vectorize.get_mask_mode. */
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 42cafed..6eafe3d 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -13460,7 +13460,7 @@ mips_preferred_simd_mode (scalar_mode mode)
/* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES. */
static void
-mips_autovectorize_vector_sizes (vector_sizes *sizes)
+mips_autovectorize_vector_sizes (vector_sizes *sizes, bool)
{
if (ISA_HAS_MSA)
sizes->safe_push (16);
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 0941039..622e8cf 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6021,11 +6021,13 @@ against lower halves of vectors recursively until the specified mode is
reached. The default is @var{mode} which means no splitting.
@end deftypefn
-@deftypefn {Target Hook} void TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES (vector_sizes *@var{sizes})
+@deftypefn {Target Hook} void TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES (vector_sizes *@var{sizes}, bool @var{all})
If the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is not
the only one that is worth considering, this hook should add all suitable
vector sizes to @var{sizes}, in order of decreasing preference. The first
one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.
+If @var{all} is true, add suitable vector sizes even when they are generally
+not expected to be worthwhile.
The hook does not need to do anything if the vector returned by
@code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is the only one relevant
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
index 7415973..0d7f104 100644
--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -4974,6 +4974,13 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
&& loop->safelen > 1)
{
loop->force_vectorize = true;
+ if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
+ {
+ unsigned HOST_WIDE_INT v
+ = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
+ if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
+ loop->simdlen = v;
+ }
cfun->has_force_vectorize_loops = true;
}
else if (dont_vectorize)
diff --git a/gcc/omp-general.c b/gcc/omp-general.c
index 82f0a04..4a9b15c 100644
--- a/gcc/omp-general.c
+++ b/gcc/omp-general.c
@@ -469,7 +469,7 @@ omp_max_vf (void)
return 1;
auto_vector_sizes sizes;
- targetm.vectorize.autovectorize_vector_sizes (&sizes);
+ targetm.vectorize.autovectorize_vector_sizes (&sizes, true);
if (!sizes.is_empty ())
{
poly_uint64 vf = 0;
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 04fc5f6..26ee70d 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -3600,7 +3600,7 @@ omp_clause_aligned_alignment (tree clause)
unsigned int al = 1;
opt_scalar_mode mode_iter;
auto_vector_sizes sizes;
- targetm.vectorize.autovectorize_vector_sizes (&sizes);
+ targetm.vectorize.autovectorize_vector_sizes (&sizes, true);
poly_uint64 vs = 0;
for (unsigned int i = 0; i < sizes.length (); ++i)
vs = ordered_max (vs, sizes[i]);
diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c
index 71c73fb..04c8d08 100644
--- a/gcc/optabs-query.c
+++ b/gcc/optabs-query.c
@@ -593,7 +593,7 @@ can_vec_mask_load_store_p (machine_mode mode,
return true;
auto_vector_sizes vector_sizes;
- targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+ targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, true);
for (unsigned int i = 0; i < vector_sizes.length (); ++i)
{
poly_uint64 cur = vector_sizes[i];
diff --git a/gcc/target.def b/gcc/target.def
index 23e260c..7d52102 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1899,12 +1899,14 @@ DEFHOOK
the only one that is worth considering, this hook should add all suitable\n\
vector sizes to @var{sizes}, in order of decreasing preference. The first\n\
one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.\n\
+If @var{all} is true, add suitable vector sizes even when they are generally\n\
+not expected to be worthwhile.\n\
\n\
The hook does not need to do anything if the vector returned by\n\
@code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is the only one relevant\n\
for autovectorization. The default implementation does nothing.",
void,
- (vector_sizes *sizes),
+ (vector_sizes *sizes, bool all),
default_autovectorize_vector_sizes)
/* Function to get a target mode for a vector mask. */
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index d820618..b271116 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1316,7 +1316,7 @@ default_split_reduction (machine_mode mode)
is tried. */
void
-default_autovectorize_vector_sizes (vector_sizes *)
+default_autovectorize_vector_sizes (vector_sizes *, bool)
{
}
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 810c2b1..229aacd 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -110,7 +110,7 @@ default_builtin_support_vector_misalignment (machine_mode mode,
int, bool);
extern machine_mode default_preferred_simd_mode (scalar_mode mode);
extern machine_mode default_split_reduction (machine_mode);
-extern void default_autovectorize_vector_sizes (vector_sizes *);
+extern void default_autovectorize_vector_sizes (vector_sizes *, bool);
extern opt_machine_mode default_get_mask_mode (poly_uint64, poly_uint64);
extern bool default_empty_mask_is_expensive (unsigned);
extern void *default_init_cost (struct loop *);
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 9b1e6d3..3ecff36 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2019-05-20 Jakub Jelinek <jakub@redhat.com>
+
+ * gcc.target/i386/avx512f-simd-1.c: New test.
+
2019-05-20 Christophe Lyon <christophe.lyon@linaro.org>
* gcc.target/aarch64/target_attr_10.c: Add quotes to expected
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-simd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-simd-1.c
new file mode 100644
index 0000000..235fb91
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-simd-1.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp-simd -O2 -mavx512f -masm=att" } */
+/* { dg-final { scan-assembler "vpadd\[^\n\r]*%xmm" } } */
+/* { dg-final { scan-assembler "vpadd\[^\n\r]*%ymm" } } */
+/* { dg-final { scan-assembler "vpadd\[^\n\r]*%zmm" } } */
+
+#define N 1024
+int a[N];
+
+void
+f1 (void)
+{
+ int i;
+ #pragma omp simd simdlen (4)
+ for (i = 0; i < N; ++i)
+ a[i] = a[i] + 1;
+}
+
+void
+f2 (void)
+{
+ int i;
+ #pragma omp simd simdlen (8)
+ for (i = 0; i < N; ++i)
+ a[i] = a[i] + 2;
+}
+
+void
+f3 (void)
+{
+ int i;
+ #pragma omp simd simdlen (16)
+ for (i = 0; i < N; ++i)
+ a[i] = a[i] + 3;
+}
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 5776417..e1229a5 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2254,7 +2254,8 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
/* Autodetect first vector size we try. */
current_vector_size = 0;
- targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+ targetm.vectorize.autovectorize_vector_sizes (&vector_sizes,
+ loop->simdlen != 0);
unsigned int next_size = 0;
DUMP_VECT_SCOPE ("analyze_loop_nest");
@@ -2273,6 +2274,8 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
unsigned n_stmts = 0;
poly_uint64 autodetected_vector_size = 0;
+ opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
+ poly_uint64 first_vector_size = 0;
while (1)
{
/* Check the CFG characteristics of the loop (nesting, entry/exit). */
@@ -2283,6 +2286,7 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"bad loop form.\n");
+ gcc_checking_assert (first_loop_vinfo == NULL);
return loop_vinfo;
}
@@ -2296,10 +2300,27 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
{
LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
- return loop_vinfo;
+ if (loop->simdlen
+ && maybe_ne (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
+ (unsigned HOST_WIDE_INT) loop->simdlen))
+ {
+ if (first_loop_vinfo == NULL)
+ {
+ first_loop_vinfo = loop_vinfo;
+ first_vector_size = current_vector_size;
+ loop->aux = NULL;
+ }
+ else
+ delete loop_vinfo;
+ }
+ else
+ {
+ delete first_loop_vinfo;
+ return loop_vinfo;
+ }
}
-
- delete loop_vinfo;
+ else
+ delete loop_vinfo;
if (next_size == 0)
autodetected_vector_size = current_vector_size;
@@ -2308,10 +2329,31 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
&& known_eq (vector_sizes[next_size], autodetected_vector_size))
next_size += 1;
- if (fatal
- || next_size == vector_sizes.length ()
+ if (fatal)
+ {
+ gcc_checking_assert (first_loop_vinfo == NULL);
+ return opt_loop_vec_info::propagate_failure (res);
+ }
+
+ if (next_size == vector_sizes.length ()
|| known_eq (current_vector_size, 0U))
- return opt_loop_vec_info::propagate_failure (res);
+ {
+ if (first_loop_vinfo)
+ {
+ current_vector_size = first_vector_size;
+ loop->aux = (loop_vec_info) first_loop_vinfo;
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "***** Choosing vector size ");
+ dump_dec (MSG_NOTE, current_vector_size);
+ dump_printf (MSG_NOTE, "\n");
+ }
+ return first_loop_vinfo;
+ }
+ else
+ return opt_loop_vec_info::propagate_failure (res);
+ }
/* Try the next biggest vector size. */
current_vector_size = vector_sizes[next_size++];
@@ -8670,7 +8712,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
if (epilogue)
{
auto_vector_sizes vector_sizes;
- targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+ targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, false);
unsigned int next_size = 0;
/* Note LOOP_VINFO_NITERS_KNOWN_P and LOOP_VINFO_INT_NITERS work
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 52c7b47..2810228 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2983,7 +2983,7 @@ vect_slp_bb (basic_block bb)
/* Autodetect first vector size we try. */
current_vector_size = 0;
- targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+ targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, false);
unsigned int next_size = 0;
gsi = gsi_start_bb (bb);