aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2020-12-31 16:51:34 +0000
committerRichard Sandiford <richard.sandiford@arm.com>2020-12-31 16:51:34 +0000
commit58a12b0eadac62e691fcf7325ab2bc2c93d46b61 (patch)
treeb7ac28b1e0140db958c4791273cef3d0efea207f /gcc
parent9fa5b473b5b8e289b6542adfd5cfaddfb3036048 (diff)
downloadgcc-58a12b0eadac62e691fcf7325ab2bc2c93d46b61.zip
gcc-58a12b0eadac62e691fcf7325ab2bc2c93d46b61.tar.gz
gcc-58a12b0eadac62e691fcf7325ab2bc2c93d46b61.tar.bz2
vect: Avoid generating out-of-range shifts [PR98302]
In this testcase we end up with: unsigned long long x = ...; char y = (char) (x << 37); The overwidening pattern realised that only the low 8 bits of x << 37 are needed, but then tried to turn that into: unsigned long long x = ...; char y = (char) x << 37; which gives an out-of-range shift. In this case y can simply be replaced by zero, but as the comment in the patch says, it's kind-of awkward to do that in the middle of vectorisation. Most of the overwidening stuff is about keeping operations as narrow as possible, which is important for vectorisation but could be counter-productive for scalars (especially on RISC targets). In contrast, optimising y to zero in the above feels like an independent optimisation that would benefit scalar code and that should happen before vectorisation. gcc/ PR tree-optimization/98302 * tree-vect-patterns.c (vect_determine_precisions_from_users): Make sure that the precision remains greater than the shift count. gcc/testsuite/ PR tree-optimization/98302 * gcc.dg/vect/pr98302.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr98302.c22
-rw-r--r--gcc/tree-vect-patterns.c13
2 files changed, 33 insertions, 2 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/pr98302.c b/gcc/testsuite/gcc.dg/vect/pr98302.c
new file mode 100644
index 0000000..dec6016
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr98302.c
@@ -0,0 +1,22 @@
+#include "tree-vect.h"
+
+int c = 1705;
+char a;
+long f = 50887638;
+unsigned long long *h(unsigned long long *k, unsigned long long *l) {
+ return *k ? k : l;
+}
+void aa() {}
+int main() {
+ check_vect ();
+
+ long d = f;
+ for (char g = 0; g < (char)c - 10; g += 2) {
+ unsigned long long i = d, j = 4;
+ a = *h(&i, &j) << ((d ? 169392992 : 0) - 169392955LL);
+ }
+ if (a)
+ __builtin_abort();
+
+ return 0;
+}
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index ff1358a..081ae80 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -4961,10 +4961,19 @@ vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
unsigned int const_shift = TREE_INT_CST_LOW (shift);
if (code == LSHIFT_EXPR)
{
+ /* Avoid creating an undefined shift.
+
+ ??? We could instead use min_output_precision as-is and
+ optimize out-of-range shifts to zero. However, only
+ degenerate testcases shift away all their useful input data,
+ and it isn't natural to drop input operations in the middle
+ of vectorization. This sort of thing should really be
+ handled before vectorization. */
+ operation_precision = MAX (stmt_info->min_output_precision,
+ const_shift + 1);
/* We need CONST_SHIFT fewer bits of the input. */
- operation_precision = stmt_info->min_output_precision;
min_input_precision = (MAX (operation_precision, const_shift)
- - const_shift);
+ - const_shift);
}
else
{