aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2023-01-27 17:03:50 +0000
committerRichard Sandiford <richard.sandiford@arm.com>2023-01-27 17:03:50 +0000
commit553f8003ba5ecfdf0574a171692843ef838226b4 (patch)
tree5a7d5e630b51bbdf9949787a2e3638b22bf8dfbd /gcc
parent32d1c3dd1d63399cca20567fb35d1ff90e30b655 (diff)
downloadgcc-553f8003ba5ecfdf0574a171692843ef838226b4.zip
gcc-553f8003ba5ecfdf0574a171692843ef838226b4.tar.gz
gcc-553f8003ba5ecfdf0574a171692843ef838226b4.tar.bz2
vect/aarch64: Fix various sve/cond*.c failures
Quite a few gcc.target/aarch64/sve/cond*.c tests started failing after g:68e0063397ba820e71adc220b2da0581dce29ffa, but it turns out that we were cheating passes before the patch. The tests involve comparing the cost of N wide compares, a pack sequence, and a narrow COND_EXPR with the cost of a single COND_EXPR on fewer elements. The costs for the former included all operations, but the costs for the latter didn't model the comparison embedded in the COND_EXPR. The patch made us include the comparison on both sides, making it apples-for-apples, but that's enough to tip the balance in favour of using the wider types. I think the new choice does reflect the current SVE cost model correctly. (Whether and how the model should be tweaked is a different question.) This patch therefore changes the tuning vector length to one that makes the choice more obvious. That in turn needs a tweak to compare_inside_loop_cost. The function compares body_cost1/vf1 with body_cost2/vf2, but for fully-amsked loops, it limits vf to the actual number of iterations. This is so that (say) an expensive 16-element vector body doesn't win over a cheaper 8-element vector body when there are only 7 elements to process. However, the limit was applied using known_le, regardless of the tuning target. For a heuristic like this, it seems better to use the likely minimum (which is a concept that was only added after this code went in). g:68e0063397ba820e71adc220b2da0581dce29ffa also fixed vcond_4_costly.c. gcc/ * tree-vectorizer.cc (vector_costs::compare_inside_loop_cost): Use the likely minimum VF when bounding the denominators to the estimated number of iterations. gcc/testsuite/ * gcc.target/aarch64/sve/cond_asrd_1.c: Tune for a 256-bit vector length. * gcc.target/aarch64/sve/cond_cnot_4.c: Likewise. * gcc.target/aarch64/sve/cond_cnot_6.c: Likewise. * gcc.target/aarch64/sve/cond_unary_5.c: Likewise. * gcc.target/aarch64/sve/cond_unary_6.c: Likewise. * gcc.target/aarch64/sve/cond_uxt_5.c: Likewise. * gcc.target/aarch64/sve/vcond_4_costly.c: Remove XFAILs.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cond_asrd_1.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cond_unary_6.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_5.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/vcond_4_costly.c4
-rw-r--r--gcc/tree-vectorizer.cc6
8 files changed, 12 insertions, 10 deletions
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_asrd_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_asrd_1.c
index 478b52a..aac06bd 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/cond_asrd_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_asrd_1.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256" } */
#include <stdint.h>
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c
index 729d3f4..f627891 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256" } */
#include <stdint.h>
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c
index d44e357..ef1b067 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256" } */
#include <stdint.h>
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5.c
index 17b3f86..03a6636 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256" } */
#include <stdint.h>
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_6.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_6.c
index 1bd342b..c49a304 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_6.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256" } */
#include <stdint.h>
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_5.c
index 1886628..9a2bd8f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_5.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256" } */
#include <stdint.h>
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_4_costly.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4_costly.c
index 4aa567e..76d7a28 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/vcond_4_costly.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4_costly.c
@@ -61,8 +61,8 @@ TEST_CMP (nuge)
TEST_CMP (nugt)
/* 2 each for: eq, ne, ueq, nueq. */
-/* { dg-final { scan-assembler-times {\tfcm(?:eq|ne)\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 8 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcm(?:eq|ne)\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 16 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcm(?:eq|ne)\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 8 } } */
+/* { dg-final { scan-assembler-times {\tfcm(?:eq|ne)\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 16 } } */
/* 2 each for: olt, ult, nult, ogt, ugt, nugt. */
/* { dg-final { scan-assembler-times {\tfcm[lg]t\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */
diff --git a/gcc/tree-vectorizer.cc b/gcc/tree-vectorizer.cc
index 875acbb..89cd0b8 100644
--- a/gcc/tree-vectorizer.cc
+++ b/gcc/tree-vectorizer.cc
@@ -1973,9 +1973,11 @@ vector_costs::compare_inside_loop_cost (const vector_costs *other) const
HOST_WIDE_INT estimated_max_niter = likely_max_stmt_executions_int (loop);
if (estimated_max_niter != -1)
{
- if (known_le (estimated_max_niter, this_vf))
+ if (estimated_poly_value (this_vf, POLY_VALUE_MIN)
+ >= estimated_max_niter)
this_vf = estimated_max_niter;
- if (known_le (estimated_max_niter, other_vf))
+ if (estimated_poly_value (other_vf, POLY_VALUE_MIN)
+ >= estimated_max_niter)
other_vf = estimated_max_niter;
}