aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2025-08-04 11:45:35 +0100
committerRichard Sandiford <richard.sandiford@arm.com>2025-08-04 11:45:35 +0100
commitb768e2786f8c85097442bd52010fee1b7ed12ed2 (patch)
tree22f03f33d222f814a54325bc61d6fb11ba4b2c9d
parent4ff15c5a998354c58dca19fc825c44dcb6d57bb6 (diff)
downloadgcc-b768e2786f8c85097442bd52010fee1b7ed12ed2.zip
gcc-b768e2786f8c85097442bd52010fee1b7ed12ed2.tar.gz
gcc-b768e2786f8c85097442bd52010fee1b7ed12ed2.tar.bz2
aarch64: Use VNx16BI for svdup_b*
This patch continues the work of making ACLE intrinsics use VNx16BI for svbool_t results. It deals with the predicate forms of svdup. gcc/ * config/aarch64/aarch64-protos.h (aarch64_emit_sve_pred_vec_duplicate): Declare. * config/aarch64/aarch64.cc (aarch64_emit_sve_pred_vec_duplicate): New function. * config/aarch64/aarch64-sve.md (vec_duplicate<PRED_ALL:mode>): Use it. * config/aarch64/aarch64-sve-builtins-base.cc (svdup_impl::expand): Handle boolean values specially. Check for constants and fall back on aarch64_emit_sve_pred_vec_duplicate for the variable case, ensuring that the result has mode VNx16BI. gcc/testsuite/ * gcc.target/aarch64/sve/acle/general/dup_1.c: New test.
-rw-r--r--gcc/config/aarch64/aarch64-protos.h1
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-base.cc18
-rw-r--r--gcc/config/aarch64/aarch64-sve.md5
-rw-r--r--gcc/config/aarch64/aarch64.cc21
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/general/dup_1.c47
5 files changed, 87 insertions, 5 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 38c307c..87b3f45 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1039,6 +1039,7 @@ void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode);
bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx);
rtx aarch64_replace_reg_mode (rtx, machine_mode);
void aarch64_split_sve_subreg_move (rtx, rtx, rtx);
+void aarch64_emit_sve_pred_vec_duplicate (machine_mode, rtx, rtx);
void aarch64_expand_prologue (void);
void aarch64_decompose_vec_struct_index (machine_mode, rtx *, rtx *, bool);
void aarch64_expand_vector_init (rtx, rtx);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index d58d597..314d53e 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1050,6 +1050,23 @@ public:
rtx
expand (function_expander &e) const override
{
+ machine_mode mode = e.vector_mode (0);
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+ {
+ gcc_assert (e.pred == PRED_none);
+
+ rtx src = e.args[0];
+ if (GET_CODE (src) == CONST_INT)
+ return (src == const0_rtx
+ ? CONST0_RTX (VNx16BImode)
+ : aarch64_ptrue_all (e.type_suffix (0).element_bytes));
+
+ rtx dest = e.get_reg_target ();
+ src = force_reg (GET_MODE (src), src);
+ aarch64_emit_sve_pred_vec_duplicate (mode, dest, src);
+ return dest;
+ }
+
if (e.pred == PRED_none || e.pred == PRED_x)
/* There's no benefit to using predicated instructions for _x here. */
return e.use_unpred_insn (e.direct_optab_handler (vec_duplicate_optab));
@@ -1058,7 +1075,6 @@ public:
the duplicate of the function argument and the "false" value
is the value of inactive lanes. */
insn_code icode;
- machine_mode mode = e.vector_mode (0);
if (valid_for_const_vector_p (GET_MODE_INNER (mode), e.args.last ()))
/* Duplicate the constant to fill a vector. The pattern optimizes
various cases involving constant operands, falling back to SEL
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index f01e05e..88d323a 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -2990,10 +2990,7 @@
(vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))]
"TARGET_SVE"
{
- rtx tmp = gen_reg_rtx (DImode);
- rtx op1 = gen_lowpart (DImode, operands[1]);
- emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
- emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
+ aarch64_emit_sve_pred_vec_duplicate (<MODE>mode, operands[0], operands[1]);
DONE;
}
)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 565c532..f72db5f 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -6752,6 +6752,27 @@ aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
dest, ptrue, src));
}
+/* Set predicate register DEST such that every element has the scalar
+ boolean value in SRC, with any nonzero source counting as "true".
+ MODE is a MODE_VECTOR_BOOL that determines the element size;
+ DEST can have this mode or VNx16BImode. In the latter case,
+ the upper bits of each element are defined to be zero, as for
+ the .H, .S, and .D forms of PTRUE. */
+
+void
+aarch64_emit_sve_pred_vec_duplicate (machine_mode mode, rtx dest, rtx src)
+{
+ rtx tmp = gen_reg_rtx (DImode);
+ emit_insn (gen_ashldi3 (tmp, gen_lowpart (DImode, src),
+ gen_int_mode (63, DImode)));
+ if (GET_MODE (dest) == VNx16BImode)
+ emit_insn (gen_aarch64_sve_while_acle (UNSPEC_WHILELO, DImode, mode,
+ dest, const0_rtx, tmp));
+ else
+ emit_insn (gen_while (UNSPEC_WHILELO, DImode, mode,
+ dest, const0_rtx, tmp));
+}
+
static bool
aarch64_function_ok_for_sibcall (tree, tree exp)
{
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dup_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dup_1.c
new file mode 100644
index 0000000..c3c4e2d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dup_1.c
@@ -0,0 +1,47 @@
+/* { dg-options "-O2" } */
+
+#include <arm_sve.h>
+
+svbool_t
+test1 (int x)
+{
+ return svand_z (svptrue_b16 (), svdup_b16 (x), svptrue_b16 ());
+}
+
+svbool_t
+test2 (int x)
+{
+ return svand_z (svptrue_b8 (), svdup_b32 (x), svptrue_b16 ());
+}
+
+svbool_t
+test3 (int x)
+{
+ return svand_z (svptrue_b32 (), svdup_b32 (x), svptrue_b16 ());
+}
+
+svbool_t
+test4 (int x)
+{
+ return svand_z (svptrue_b32 (), svdup_b32 (x), svptrue_b32 ());
+}
+
+svbool_t
+test5 (int x)
+{
+ return svand_z (svptrue_b8 (), svdup_b64 (x), svptrue_b32 ());
+}
+
+svbool_t
+test6 (int x)
+{
+ return svand_z (svptrue_b16 (), svdup_b64 (x), svptrue_b8 ());
+}
+
+svbool_t
+test7 (int x)
+{
+ return svand_z (svptrue_b16 (), svdup_b64 (x), svptrue_b64 ());
+}
+
+/* { dg-final { scan-assembler-not {\tand\t} } } */