aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2024-04-12 17:41:32 +0100
committerRichard Sandiford <richard.sandiford@arm.com>2024-04-12 17:41:32 +0100
commit9b8c6fffcb199b51bb9c4f46f2834f5fd0149d01 (patch)
tree6d98c660d3aefb59f6aad5156f13ac1b79a21d8c
parenta76f236e084cbd02e4e3711cdfc3191dc7eeb460 (diff)
downloadgcc-9b8c6fffcb199b51bb9c4f46f2834f5fd0149d01.zip
gcc-9b8c6fffcb199b51bb9c4f46f2834f5fd0149d01.tar.gz
gcc-9b8c6fffcb199b51bb9c4f46f2834f5fd0149d01.tar.bz2
aarch64: Avoid using mismatched ZERO ZA sizes
The svzero_mask_za intrinsic tried to use the shortest combination of .b, .h, .s and .d tiles, allowing mixtures of sizes where necessary. However, Iain S pointed out that LLVM instead requires the tiles to have the same suffix. GAS supports both versions, so this patch generates the LLVM-friendly form. gcc/ * config/aarch64/aarch64.cc (aarch64_output_sme_zero_za): Require all tiles to have the same suffix. gcc/testsuite/ * gcc.target/aarch64/sme/acle-asm/zero_mask_za.c (zero_mask_za_ab) (zero_mask_za_d7, zero_mask_za_bf): Expect a list of .d tiles instead of a mixture.
-rw-r--r--gcc/config/aarch64/aarch64.cc20
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c6
2 files changed, 15 insertions, 11 deletions
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index a2e3d20..1beec94 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -13210,29 +13210,33 @@ aarch64_output_sme_zero_za (rtx mask)
/* The last entry in the list has the form "za7.d }", but that's the
same length as "za7.d, ". */
static char buffer[sizeof("zero\t{ ") + sizeof ("za7.d, ") * 8 + 1];
- unsigned int i = 0;
- i += snprintf (buffer + i, sizeof (buffer) - i, "zero\t");
- const char *prefix = "{ ";
for (auto &tile : tiles)
{
unsigned int tile_mask = tile.mask;
unsigned int tile_index = 0;
+ unsigned int i = snprintf (buffer, sizeof (buffer), "zero\t");
+ const char *prefix = "{ ";
+ auto remaining_mask = mask_val;
while (tile_mask < 0x100)
{
- if ((mask_val & tile_mask) == tile_mask)
+ if ((remaining_mask & tile_mask) == tile_mask)
{
i += snprintf (buffer + i, sizeof (buffer) - i, "%sza%d.%c",
prefix, tile_index, tile.letter);
prefix = ", ";
- mask_val &= ~tile_mask;
+ remaining_mask &= ~tile_mask;
}
tile_mask <<= 1;
tile_index += 1;
}
+ if (remaining_mask == 0)
+ {
+ gcc_assert (i + 3 <= sizeof (buffer));
+ snprintf (buffer + i, sizeof (buffer) - i, " }");
+ return buffer;
+ }
}
- gcc_assert (mask_val == 0 && i + 3 <= sizeof (buffer));
- snprintf (buffer + i, sizeof (buffer) - i, " }");
- return buffer;
+ gcc_unreachable ();
}
/* Return size in bits of an arithmetic operand which is shifted/scaled and
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c
index 9ce7331..2ba8f8c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c
@@ -103,21 +103,21 @@ PROTO (zero_mask_za_aa, void, ()) { svzero_mask_za (0xaa); }
/*
** zero_mask_za_ab:
-** zero { za1\.h, za0\.d }
+** zero { za0\.d, za1\.d, za3\.d, za5\.d, za7\.d }
** ret
*/
PROTO (zero_mask_za_ab, void, ()) { svzero_mask_za (0xab); }
/*
** zero_mask_za_d7:
-** zero { za0\.h, za1\.d, za7\.d }
+** zero { za0\.d, za1\.d, za2\.d, za4\.d, za6\.d, za7\.d }
** ret
*/
PROTO (zero_mask_za_d7, void, ()) { svzero_mask_za (0xd7); }
/*
** zero_mask_za_bf:
-** zero { za1\.h, za0\.s, za2\.d }
+** zero { za0\.d, za1\.d, za2\.d, za3\.d, za4\.d, za5\.d, za7\.d }
** ret
*/
PROTO (zero_mask_za_bf, void, ()) { svzero_mask_za (0xbf); }