aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPengxuan Zheng <quic_pzheng@quicinc.com>2024-09-10 17:59:46 -0700
committerPengxuan Zheng <quic_pzheng@quicinc.com>2024-09-16 10:31:10 -0700
commita92f54f580c37732a5de01e47aed56882231f196 (patch)
tree1fa37e27bfce7b107f111b6efa121c4e851e4535
parent58bc39c73ca9aeca3d62d2d963be0121d0efeeac (diff)
downloadgcc-a92f54f580c37732a5de01e47aed56882231f196.zip
gcc-a92f54f580c37732a5de01e47aed56882231f196.tar.gz
gcc-a92f54f580c37732a5de01e47aed56882231f196.tar.bz2
aarch64: Improve vector constant generation using SVE INDEX instruction [PR113328]
SVE's INDEX instruction can be used to populate vectors by values starting from "base" and incremented by "step" for each subsequent value. We can take advantage of it to generate vector constants if TARGET_SVE is available and the base and step values are within [-16, 15]. For example, with the following function: typedef int v4si __attribute__ ((vector_size (16))); v4si f_v4si (void) { return (v4si){ 0, 1, 2, 3 }; } GCC currently generates: f_v4si: adrp x0, .LC4 ldr q0, [x0, #:lo12:.LC4] ret .LC4: .word 0 .word 1 .word 2 .word 3 With this patch, we generate an INDEX instruction instead if TARGET_SVE is available. f_v4si: index z0.s, #0, #1 ret PR target/113328 gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_simd_valid_immediate): Improve handling of some ADVSIMD vectors by using SVE's INDEX if TARGET_SVE is available. (aarch64_output_simd_mov_immediate): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/acle/general/dupq_1.c: Update test to use SVE's INDEX instruction. * gcc.target/aarch64/sve/acle/general/dupq_2.c: Likewise. * gcc.target/aarch64/sve/acle/general/dupq_3.c: Likewise. * gcc.target/aarch64/sve/acle/general/dupq_4.c: Likewise. * gcc.target/aarch64/sve/vec_init_3.c: New test. Signed-off-by: Pengxuan Zheng <quic_pzheng@quicinc.com>
-rw-r--r--gcc/config/aarch64/aarch64.cc13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c99
6 files changed, 115 insertions, 9 deletions
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 6ccf08d..92763d4 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -22987,7 +22987,8 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
if (CONST_VECTOR_P (op)
&& CONST_VECTOR_DUPLICATE_P (op))
n_elts = CONST_VECTOR_NPATTERNS (op);
- else if ((vec_flags & VEC_SVE_DATA)
+ else if (which == AARCH64_CHECK_MOV
+ && TARGET_SVE
&& const_vec_series_p (op, &base, &step))
{
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
@@ -25245,6 +25246,16 @@ aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
if (which == AARCH64_CHECK_MOV)
{
+ if (info.insn == simd_immediate_info::INDEX)
+ {
+ gcc_assert (TARGET_SVE);
+ snprintf (templ, sizeof (templ), "index\t%%Z0.%c, #"
+ HOST_WIDE_INT_PRINT_DEC ", #" HOST_WIDE_INT_PRINT_DEC,
+ element_char, INTVAL (info.u.index.base),
+ INTVAL (info.u.index.step));
+ return templ;
+ }
+
mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi";
shift_op = (info.u.mov.modifier == simd_immediate_info::MSL
? "msl" : "lsl");
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
index 216699b..0940bed 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
@@ -10,7 +10,6 @@ dupq (int x)
return svdupq_s32 (x, 1, 2, 3);
}
-/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */
/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
-/* { dg-final { scan-assembler {\t\.word\t1\n\t\.word\t2\n\t\.word\t3\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
index d494943..218a660 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
@@ -10,7 +10,6 @@ dupq (int x)
return svdupq_s32 (x, 1, 2, 3);
}
-/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
-/* { dg-final { scan-assembler {\t\.word\t3\n\t\.word\t2\n\t\.word\t1\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
index 4bc8259..245d43b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
@@ -10,7 +10,6 @@ dupq (int x)
return svdupq_s32 (0, 1, x, 3);
}
-/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */
/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
-/* { dg-final { scan-assembler {\t\.word\t0\n\t\.word\t1\n\t\.word\t[^\n]*\n\t\.word\t3\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
index 6f9f9f2..cbee6f2 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
@@ -10,7 +10,6 @@ dupq (int x)
return svdupq_s32 (0, 1, x, 3);
}
-/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
-/* { dg-final { scan-assembler {\t\.word\t3\n\t\.word\t[^\n]*\n\t\.word\t1\n\t\.word\t0\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c
new file mode 100644
index 0000000..25910db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c
@@ -0,0 +1,99 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+typedef char v16qi __attribute__ ((vector_size (16)));
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef long v2di __attribute__ ((vector_size (16)));
+
+/*
+** f_v16qi:
+** index z0\.b, #0, #1
+** ret
+*/
+v16qi
+f_v16qi (void)
+{
+ return (v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+}
+
+/*
+** f_v8qi:
+** index z0\.b, #0, #1
+** ret
+*/
+v8qi
+f_v8qi (void)
+{
+ return (v8qi){ 0, 1, 2, 3, 4, 5, 6, 7 };
+}
+
+/*
+** f_v8hi:
+** index z0\.h, #0, #1
+** ret
+*/
+v8hi
+f_v8hi (void)
+{
+ return (v8hi){ 0, 1, 2, 3, 4, 5, 6, 7 };
+}
+
+/*
+** f_v4hi:
+** index z0\.h, #0, #1
+** ret
+*/
+v4hi
+f_v4hi (void)
+{
+ return (v4hi){ 0, 1, 2, 3 };
+}
+
+/*
+** f_v4si:
+** index z0\.s, #0, #1
+** ret
+*/
+v4si
+f_v4si (void)
+{
+ return (v4si){ 0, 1, 2, 3 };
+}
+
+/*
+** f_v2si:
+** index z0\.s, #0, #1
+** ret
+*/
+v2si
+f_v2si (void)
+{
+ return (v2si){ 0, 1 };
+}
+
+/*
+** f_v2di:
+** index z0\.d, #0, #1
+** ret
+*/
+v2di
+f_v2di (void)
+{
+ return (v2di){ 0, 1 };
+}
+
+/*
+** g_v4si:
+** index z0\.s, #3, #-4
+** ret
+*/
+v4si
+g_v4si (void)
+{
+ return (v4si){ 3, -1, -5, -9 };
+}