aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorAndrea Corallo <andrea.corallo@arm.com>2020-08-28 16:01:15 +0100
committerAndrea Corallo <andrea.corallo@arm.com>2020-09-16 15:17:17 +0200
commit052204fac580b21c967e57e6285d99a9828b8fac (patch)
tree0503459bcc84856c2f71ab5cca17878aebfb34f1 /gcc
parent453a20c65722719b9e2d84339f215e7ec87692dc (diff)
downloadgcc-052204fac580b21c967e57e6285d99a9828b8fac.zip
gcc-052204fac580b21c967e57e6285d99a9828b8fac.tar.gz
gcc-052204fac580b21c967e57e6285d99a9828b8fac.tar.bz2
vec: don't select partial vectors when unnecessary
gcc/ChangeLog 2020-09-09 Andrea Corallo <andrea.corallo@arm.com> * tree-vect-loop.c (vect_need_peeling_or_partial_vectors_p): New function. (vect_analyze_loop_2): Make use of it not to select partial vectors if no peel is required. (determine_peel_for_niter): Move out some logic into 'vect_need_peeling_or_partial_vectors_p'. gcc/testsuite/ChangeLog 2020-09-09 Andrea Corallo <andrea.corallo@arm.com> * gcc.target/aarch64/sve/cost_model_10.c: New test. * gcc.target/aarch64/sve/clastb_8.c: Update test for new vectorization strategy. * gcc.target/aarch64/sve/cost_model_5.c: Likewise. * gcc.target/aarch64/sve/struct_vect_14.c: Likewise. * gcc.target/aarch64/sve/struct_vect_15.c: Likewise. * gcc.target/aarch64/sve/struct_vect_16.c: Likewise. * gcc.target/aarch64/sve/struct_vect_17.c: Likewise.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c5
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cost_model_10.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/struct_vect_14.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/struct_vect_15.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/struct_vect_16.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/struct_vect_17.c8
-rw-r--r--gcc/tree-vect-loop.c85
8 files changed, 80 insertions, 58 deletions
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c
index 57c4208..e61ff4a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c
@@ -23,7 +23,4 @@ TEST_TYPE (uint64_t);
/* { dg-final { scan-assembler {\tclastb\t(h[0-9]+), p[0-7], \1, z[0-9]+\.h\n} } } */
/* { dg-final { scan-assembler {\tclastb\t(s[0-9]+), p[0-7], \1, z[0-9]+\.s\n} } } */
/* { dg-final { scan-assembler {\tclastb\t(d[0-9]+), p[0-7], \1, z[0-9]+\.d\n} } } */
-/* { dg-final { scan-assembler {\twhilelo\tp[0-9]+\.b,} } } */
-/* { dg-final { scan-assembler {\twhilelo\tp[0-9]+\.h,} } } */
-/* { dg-final { scan-assembler {\twhilelo\tp[0-9]+\.s,} } } */
-/* { dg-final { scan-assembler {\twhilelo\tp[0-9]+\.d,} } } */
+/* { dg-final { scan-assembler {\tptrue\tp[0-9]+\.b,} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_10.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_10.c
new file mode 100644
index 0000000..bfac09e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_10.c
@@ -0,0 +1,12 @@
+/* { dg-options "-O3 -msve-vector-bits=256" } */
+
+void
+f (int *restrict x, int *restrict y, unsigned int n)
+{
+ for (unsigned int i = 0; i < n * 8; ++i)
+ x[i] += y[i];
+}
+
+/* { dg-final { scan-assembler-not {\twhilelo\t} } } */
+/* { dg-final { scan-assembler {\tptrue\tp} } } */
+/* { dg-final { scan-assembler {\tcmp\tx[0-9]+, x[0-9]+\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c
index 250ca83..f3a29fc 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c
@@ -9,5 +9,5 @@ vset (int *restrict dst, int *restrict src, int count)
*dst++ = 1;
}
-/* { dg-final { scan-assembler-not {\tst1w\tz} } } */
-/* { dg-final { scan-assembler-times {\tstp\tq} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz} 2 } } */
+/* { dg-final { scan-assembler-not {\tstp\tq} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_14.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_14.c
index a16a79e..45644b6 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_14.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_14.c
@@ -43,12 +43,12 @@
#undef NAME
#undef TYPE
-/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_15.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_15.c
index bc00267..814dbb3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_15.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_15.c
@@ -3,12 +3,12 @@
#include "struct_vect_14.c"
-/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_16.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_16.c
index 9e2a549..6ecf89b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_16.c
@@ -3,12 +3,12 @@
#include "struct_vect_14.c"
-/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_17.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_17.c
index e791e2e..571c6d0 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_17.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_17.c
@@ -3,12 +3,12 @@
#include "struct_vect_14.c"
-/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 3af4cf7..3021be3 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -991,6 +991,51 @@ vect_min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor)
return wi::min_precision (max_ni * factor, UNSIGNED);
}
+/* True if the loop needs peeling or partial vectors when vectorized. */
+
+static bool
+vect_need_peeling_or_partial_vectors_p (loop_vec_info loop_vinfo)
+{
+ unsigned HOST_WIDE_INT const_vf;
+ HOST_WIDE_INT max_niter
+ = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
+
+ unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
+ if (!th && LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo))
+ th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO
+ (loop_vinfo));
+
+ if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
+ {
+ /* Work out the (constant) number of iterations that need to be
+ peeled for reasons other than niters. */
+ unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+ peel_niter += 1;
+ if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
+ return true;
+ }
+ else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+ /* ??? When peeling for gaps but not alignment, we could
+ try to check whether the (variable) niters is known to be
+ VF * N + 1. That's something of a niche case though. */
+ || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+ || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
+ || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
+ < (unsigned) exact_log2 (const_vf))
+ /* In case of versioning, check if the maximum number of
+ iterations is greater than th. If they are identical,
+ the epilogue is unnecessary. */
+ && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
+ || ((unsigned HOST_WIDE_INT) max_niter
+ > (th / const_vf) * const_vf))))
+ return true;
+
+ return false;
+}
+
/* Each statement in LOOP_VINFO can be masked where necessary. Check
whether we can actually generate the masks required. Return true if so,
storing the type of the scalar IV in LOOP_VINFO_RGROUP_COMPARE_TYPE. */
@@ -1967,44 +2012,10 @@ determine_peel_for_niter (loop_vec_info loop_vinfo)
{
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
- unsigned HOST_WIDE_INT const_vf;
- HOST_WIDE_INT max_niter
- = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
-
- unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
- if (!th && LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo))
- th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO
- (loop_vinfo));
-
if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
/* The main loop handles all iterations. */
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
- else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
- {
- /* Work out the (constant) number of iterations that need to be
- peeled for reasons other than niters. */
- unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
- if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
- peel_niter += 1;
- if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
- LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
- LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
- }
- else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
- /* ??? When peeling for gaps but not alignment, we could
- try to check whether the (variable) niters is known to be
- VF * N + 1. That's something of a niche case though. */
- || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
- || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
- || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
- < (unsigned) exact_log2 (const_vf))
- /* In case of versioning, check if the maximum number of
- iterations is greater than th. If they are identical,
- the epilogue is unnecessary. */
- && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
- || ((unsigned HOST_WIDE_INT) max_niter
- > (th / const_vf) * const_vf))))
+ else if (vect_need_peeling_or_partial_vectors_p (loop_vinfo))
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
}
@@ -2265,7 +2276,9 @@ start_over:
this vectorization factor. */
if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
{
- if (param_vect_partial_vector_usage == 0)
+ /* Don't use partial vectors if we don't need to peel the loop. */
+ if (param_vect_partial_vector_usage == 0
+ || !vect_need_peeling_or_partial_vectors_p (loop_vinfo))
LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
else if (vect_verify_full_masking (loop_vinfo)
|| vect_verify_loop_lens (loop_vinfo))