aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2024-07-19 19:09:37 +0100
committerRichard Sandiford <richard.sandiford@arm.com>2024-07-19 19:09:37 +0100
commit348d890c287a7ec4c88d3082ae6105537bd39398 (patch)
tree48b755e45b01160ef360b3d74d83ed46d722e185
parentebdad26ed9902c04704409b729d896a646188634 (diff)
downloadgcc-348d890c287a7ec4c88d3082ae6105537bd39398.zip
gcc-348d890c287a7ec4c88d3082ae6105537bd39398.tar.gz
gcc-348d890c287a7ec4c88d3082ae6105537bd39398.tar.bz2
Treat boolean vector elements as 0/-1 [PR115406]
Previously we built vector boolean constants using 1 for true elements and 0 for false elements. This matches the predicates produced by SVE's PTRUE instruction, but leads to a miscompilation on AVX, where all bits of a boolean element should be set. One option for RTL would be to make this target-configurable. But that isn't really possible at the tree level, where vectors should work in a more target-independent way. (There is currently no way to create a "generic" packed boolean vector, but never say never :)) And, if we were going to pick a generic behaviour, it would make sense to use 0/-1 rather than 0/1, for consistency with integer vectors. Both behaviours should work with SVE on read, since SVE ignores the upper bits in each predicate element. And the choice shouldn't make much difference for RTL, since all SVE predicate modes are expressed as vectors of BI, rather than of multi-bit booleans. I suspect there might be some fallout from this change on SVE. But I think we should at least give it a go, and see whether any fallout provides a strong counterargument against the approach. gcc/ PR middle-end/115406 * fold-const.cc (native_encode_vector_part): For vector booleans, check whether an element is nonzero and, if so, set all of the correspending bits in the target image. * simplify-rtx.cc (native_encode_rtx): Likewise. gcc/testsuite/ PR middle-end/115406 * gcc.dg/torture/pr115406.c: New test.
-rw-r--r--gcc/fold-const.cc5
-rw-r--r--gcc/simplify-rtx.cc3
-rw-r--r--gcc/testsuite/gcc.dg/torture/pr115406.c18
3 files changed, 23 insertions, 3 deletions
diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 6179a09..83c32dd 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -8100,16 +8100,17 @@ native_encode_vector_part (const_tree expr, unsigned char *ptr, int len,
unsigned int elts_per_byte = BITS_PER_UNIT / elt_bits;
unsigned int first_elt = off * elts_per_byte;
unsigned int extract_elts = extract_bytes * elts_per_byte;
+ unsigned int elt_mask = (1 << elt_bits) - 1;
for (unsigned int i = 0; i < extract_elts; ++i)
{
tree elt = VECTOR_CST_ELT (expr, first_elt + i);
if (TREE_CODE (elt) != INTEGER_CST)
return 0;
- if (ptr && wi::extract_uhwi (wi::to_wide (elt), 0, 1))
+ if (ptr && integer_nonzerop (elt))
{
unsigned int bit = i * elt_bits;
- ptr[bit / BITS_PER_UNIT] |= 1 << (bit % BITS_PER_UNIT);
+ ptr[bit / BITS_PER_UNIT] |= elt_mask << (bit % BITS_PER_UNIT);
}
}
return extract_bytes;
diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index 35ba54c..a49eefb 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -7232,7 +7232,8 @@ native_encode_rtx (machine_mode mode, rtx x, vec<target_unit> &bytes,
target_unit value = 0;
for (unsigned int j = 0; j < BITS_PER_UNIT; j += elt_bits)
{
- value |= (INTVAL (CONST_VECTOR_ELT (x, elt)) & mask) << j;
+ if (INTVAL (CONST_VECTOR_ELT (x, elt)))
+ value |= mask << j;
elt += 1;
}
bytes.quick_push (value);
diff --git a/gcc/testsuite/gcc.dg/torture/pr115406.c b/gcc/testsuite/gcc.dg/torture/pr115406.c
new file mode 100644
index 0000000..800ef2f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr115406.c
@@ -0,0 +1,18 @@
+// { dg-do run }
+// { dg-additional-options "-mavx512f" { target avx512f_runtime } }
+
+typedef __attribute__((__vector_size__ (1))) signed char V;
+
+signed char
+foo (V v)
+{
+ return ((V) v == v)[0];
+}
+
+int
+main ()
+{
+ signed char x = foo ((V) { });
+ if (x != -1)
+ __builtin_abort ();
+}