aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJennifer Schmitz <jschmitz@nvidia.com>2024-07-16 01:59:50 -0700
committerKyrylo Tkachov <ktkachov@nvidia.com>2024-07-30 13:17:23 +0200
commit7cde140863edea536c676096cbc3d84a6d1424e4 (patch)
tree17c94a1ba0b6b04324e1efab09128c3cfbf39d2e
parent265aa32062167a5b299c2ffb616edce5997b64bf (diff)
downloadgcc-7cde140863edea536c676096cbc3d84a6d1424e4.zip
gcc-7cde140863edea536c676096cbc3d84a6d1424e4.tar.gz
gcc-7cde140863edea536c676096cbc3d84a6d1424e4.tar.bz2
SVE intrinsics: Add strength reduction for division by constant.
This patch folds SVE division where all divisor elements are the same power of 2 to svasrd (signed) or svlsr (unsigned). Tests were added to check 1) whether the transform is applied (existing test harness was amended), and 2) correctness using runtime tests for all input types of svdiv; for signed and unsigned integers, several corner cases were covered. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com> gcc/ * config/aarch64/aarch64-sve-builtins-base.cc (svdiv_impl::fold): Implement strength reduction. gcc/testsuite/ * gcc.target/aarch64/sve/div_const_run.c: New test. * gcc.target/aarch64/sve/acle/asm/div_s32.c: Likewise. * gcc.target/aarch64/sve/acle/asm/div_s64.c: Likewise. * gcc.target/aarch64/sve/acle/asm/div_u32.c: Likewise. * gcc.target/aarch64/sve/acle/asm/div_u64.c: Likewise.
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-base.cc49
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c273
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c273
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c201
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c201
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/div_const_run.c91
6 files changed, 1031 insertions, 57 deletions
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index a226835..d55bee0 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -746,6 +746,53 @@ public:
}
};
+class svdiv_impl : public rtx_code_function
+{
+public:
+ CONSTEXPR svdiv_impl ()
+ : rtx_code_function (DIV, UDIV, UNSPEC_COND_FDIV) {}
+
+ gimple *
+ fold (gimple_folder &f) const override
+ {
+ tree divisor = gimple_call_arg (f.call, 2);
+ tree divisor_cst = uniform_integer_cst_p (divisor);
+
+ if (!divisor_cst || !integer_pow2p (divisor_cst))
+ return NULL;
+
+ tree new_divisor;
+ gcall *call;
+
+ if (f.type_suffix (0).unsigned_p && tree_to_uhwi (divisor_cst) != 1)
+ {
+ function_instance instance ("svlsr", functions::svlsr,
+ shapes::binary_uint_opt_n, MODE_n,
+ f.type_suffix_ids, GROUP_none, f.pred);
+ call = f.redirect_call (instance);
+ tree d = INTEGRAL_TYPE_P (TREE_TYPE (divisor)) ? divisor : divisor_cst;
+ new_divisor = wide_int_to_tree (TREE_TYPE (d), tree_log2 (d));
+ }
+ else
+ {
+ if (tree_int_cst_sign_bit (divisor_cst)
+ || tree_to_shwi (divisor_cst) == 1)
+ return NULL;
+
+ function_instance instance ("svasrd", functions::svasrd,
+ shapes::shift_right_imm, MODE_n,
+ f.type_suffix_ids, GROUP_none, f.pred);
+ call = f.redirect_call (instance);
+ new_divisor = wide_int_to_tree (scalar_types[VECTOR_TYPE_svuint64_t],
+ tree_log2 (divisor_cst));
+ }
+
+ gimple_call_set_arg (call, 2, new_divisor);
+ return call;
+ }
+};
+
+
class svdot_impl : public function_base
{
public:
@@ -3043,7 +3090,7 @@ FUNCTION (svcreate3, svcreate_impl, (3))
FUNCTION (svcreate4, svcreate_impl, (4))
FUNCTION (svcvt, svcvt_impl,)
FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),)
-FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV))
+FUNCTION (svdiv, svdiv_impl,)
FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
FUNCTION (svdot, svdot_impl,)
FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c
index c49ca1a..d5a23bf 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c
@@ -2,6 +2,8 @@
#include "test_sve_acle.h"
+#define MAXPOW 1<<30
+
/*
** div_s32_m_tied1:
** sdiv z0\.s, p0/m, z0\.s, z1\.s
@@ -54,9 +56,26 @@ TEST_UNIFORM_ZX (div_w0_s32_m_untied, svint32_t, int32_t,
z0 = svdiv_m (p0, z1, x0))
/*
+** div_1_s32_m_tied1:
+** sel z0\.s, p0, z0\.s, z0\.s
+** ret
+*/
+TEST_UNIFORM_Z (div_1_s32_m_tied1, svint32_t,
+ z0 = svdiv_n_s32_m (p0, z0, 1),
+ z0 = svdiv_m (p0, z0, 1))
+
+/*
+** div_1_s32_m_untied:
+** sel z0\.s, p0, z1\.s, z1\.s
+** ret
+*/
+TEST_UNIFORM_Z (div_1_s32_m_untied, svint32_t,
+ z0 = svdiv_n_s32_m (p0, z1, 1),
+ z0 = svdiv_m (p0, z1, 1))
+
+/*
** div_2_s32_m_tied1:
-** mov (z[0-9]+\.s), #2
-** sdiv z0\.s, p0/m, z0\.s, \1
+** asrd z0\.s, p0/m, z0\.s, #1
** ret
*/
TEST_UNIFORM_Z (div_2_s32_m_tied1, svint32_t,
@@ -65,9 +84,8 @@ TEST_UNIFORM_Z (div_2_s32_m_tied1, svint32_t,
/*
** div_2_s32_m_untied:
-** mov (z[0-9]+\.s), #2
** movprfx z0, z1
-** sdiv z0\.s, p0/m, z0\.s, \1
+** asrd z0\.s, p0/m, z0\.s, #1
** ret
*/
TEST_UNIFORM_Z (div_2_s32_m_untied, svint32_t,
@@ -75,6 +93,67 @@ TEST_UNIFORM_Z (div_2_s32_m_untied, svint32_t,
z0 = svdiv_m (p0, z1, 2))
/*
+** div_3_s32_m_tied1:
+** mov (z[0-9]+\.s), #3
+** sdiv z0\.s, p0/m, z0\.s, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_3_s32_m_tied1, svint32_t,
+ z0 = svdiv_n_s32_m (p0, z0, 3),
+ z0 = svdiv_m (p0, z0, 3))
+
+/*
+** div_3_s32_m_untied:
+** mov (z[0-9]+\.s), #3
+** movprfx z0, z1
+** sdiv z0\.s, p0/m, z0\.s, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_3_s32_m_untied, svint32_t,
+ z0 = svdiv_n_s32_m (p0, z1, 3),
+ z0 = svdiv_m (p0, z1, 3))
+
+/*
+** div_maxpow_s32_m_tied1:
+** asrd z0\.s, p0/m, z0\.s, #30
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s32_m_tied1, svint32_t,
+ z0 = svdiv_n_s32_m (p0, z0, MAXPOW),
+ z0 = svdiv_m (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_s32_m_untied:
+** movprfx z0, z1
+** asrd z0\.s, p0/m, z0\.s, #30
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s32_m_untied, svint32_t,
+ z0 = svdiv_n_s32_m (p0, z1, MAXPOW),
+ z0 = svdiv_m (p0, z1, MAXPOW))
+
+/*
+** div_intmin_s32_m_tied1:
+** mov (z[0-9]+\.s), #-2147483648
+** sdiv z0\.s, p0/m, z0\.s, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_intmin_s32_m_tied1, svint32_t,
+ z0 = svdiv_n_s32_m (p0, z0, INT32_MIN),
+ z0 = svdiv_m (p0, z0, INT32_MIN))
+
+/*
+** div_intmin_s32_m_untied:
+** mov (z[0-9]+\.s), #-2147483648
+** movprfx z0, z1
+** sdiv z0\.s, p0/m, z0\.s, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_intmin_s32_m_untied, svint32_t,
+ z0 = svdiv_n_s32_m (p0, z1, INT32_MIN),
+ z0 = svdiv_m (p0, z1, INT32_MIN))
+
+/*
** div_s32_z_tied1:
** movprfx z0\.s, p0/z, z0\.s
** sdiv z0\.s, p0/m, z0\.s, z1\.s
@@ -137,19 +216,61 @@ TEST_UNIFORM_ZX (div_w0_s32_z_untied, svint32_t, int32_t,
z0 = svdiv_z (p0, z1, x0))
/*
-** div_2_s32_z_tied1:
-** mov (z[0-9]+\.s), #2
+** div_1_s32_z_tied1:
+** mov (z[0-9]+\.s), #1
** movprfx z0\.s, p0/z, z0\.s
** sdiv z0\.s, p0/m, z0\.s, \1
** ret
*/
+TEST_UNIFORM_Z (div_1_s32_z_tied1, svint32_t,
+ z0 = svdiv_n_s32_z (p0, z0, 1),
+ z0 = svdiv_z (p0, z0, 1))
+
+/*
+** div_1_s32_z_untied:
+** mov z0\.s, #1
+** movprfx z0\.s, p0/z, z0\.s
+** sdivr z0\.s, p0/m, z0\.s, z1\.s
+** ret
+*/
+TEST_UNIFORM_Z (div_1_s32_z_untied, svint32_t,
+ z0 = svdiv_n_s32_z (p0, z1, 1),
+ z0 = svdiv_z (p0, z1, 1))
+
+/*
+** div_2_s32_z_tied1:
+** movprfx z0\.s, p0/z, z0\.s
+** asrd z0\.s, p0/m, z0\.s, #1
+** ret
+*/
TEST_UNIFORM_Z (div_2_s32_z_tied1, svint32_t,
z0 = svdiv_n_s32_z (p0, z0, 2),
z0 = svdiv_z (p0, z0, 2))
/*
** div_2_s32_z_untied:
-** mov (z[0-9]+\.s), #2
+** movprfx z0\.s, p0/z, z1\.s
+** asrd z0\.s, p0/m, z0\.s, #1
+** ret
+*/
+TEST_UNIFORM_Z (div_2_s32_z_untied, svint32_t,
+ z0 = svdiv_n_s32_z (p0, z1, 2),
+ z0 = svdiv_z (p0, z1, 2))
+
+/*
+** div_3_s32_z_tied1:
+** mov (z[0-9]+\.s), #3
+** movprfx z0\.s, p0/z, z0\.s
+** sdiv z0\.s, p0/m, z0\.s, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_3_s32_z_tied1, svint32_t,
+ z0 = svdiv_n_s32_z (p0, z0, 3),
+ z0 = svdiv_z (p0, z0, 3))
+
+/*
+** div_3_s32_z_untied:
+** mov (z[0-9]+\.s), #3
** (
** movprfx z0\.s, p0/z, z1\.s
** sdiv z0\.s, p0/m, z0\.s, \1
@@ -159,9 +280,56 @@ TEST_UNIFORM_Z (div_2_s32_z_tied1, svint32_t,
** )
** ret
*/
-TEST_UNIFORM_Z (div_2_s32_z_untied, svint32_t,
- z0 = svdiv_n_s32_z (p0, z1, 2),
- z0 = svdiv_z (p0, z1, 2))
+TEST_UNIFORM_Z (div_3_s32_z_untied, svint32_t,
+ z0 = svdiv_n_s32_z (p0, z1, 3),
+ z0 = svdiv_z (p0, z1, 3))
+
+/*
+** div_maxpow_s32_z_tied1:
+** movprfx z0\.s, p0/z, z0\.s
+** asrd z0\.s, p0/m, z0\.s, #30
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s32_z_tied1, svint32_t,
+ z0 = svdiv_n_s32_z (p0, z0, MAXPOW),
+ z0 = svdiv_z (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_s32_z_untied:
+** movprfx z0\.s, p0/z, z1\.s
+** asrd z0\.s, p0/m, z0\.s, #30
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s32_z_untied, svint32_t,
+ z0 = svdiv_n_s32_z (p0, z1, MAXPOW),
+ z0 = svdiv_z (p0, z1, MAXPOW))
+
+/*
+** div_intmin_s32_z_tied1:
+** mov (z[0-9]+\.s), #-2147483648
+** movprfx z0\.s, p0/z, z0\.s
+** sdiv z0\.s, p0/m, z0\.s, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_intmin_s32_z_tied1, svint32_t,
+ z0 = svdiv_n_s32_z (p0, z0, INT32_MIN),
+ z0 = svdiv_z (p0, z0, INT32_MIN))
+
+/*
+** div_intmin_s32_z_untied:
+** mov (z[0-9]+\.s), #-2147483648
+** (
+** movprfx z0\.s, p0/z, z1\.s
+** sdiv z0\.s, p0/m, z0\.s, \1
+** |
+** movprfx z0\.s, p0/z, \1
+** sdivr z0\.s, p0/m, z0\.s, z1\.s
+** )
+** ret
+*/
+TEST_UNIFORM_Z (div_intmin_s32_z_untied, svint32_t,
+ z0 = svdiv_n_s32_z (p0, z1, INT32_MIN),
+ z0 = svdiv_z (p0, z1, INT32_MIN))
/*
** div_s32_x_tied1:
@@ -217,9 +385,25 @@ TEST_UNIFORM_ZX (div_w0_s32_x_untied, svint32_t, int32_t,
z0 = svdiv_x (p0, z1, x0))
/*
+** div_1_s32_x_tied1:
+** ret
+*/
+TEST_UNIFORM_Z (div_1_s32_x_tied1, svint32_t,
+ z0 = svdiv_n_s32_x (p0, z0, 1),
+ z0 = svdiv_x (p0, z0, 1))
+
+/*
+** div_1_s32_x_untied:
+** mov z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (div_1_s32_x_untied, svint32_t,
+ z0 = svdiv_n_s32_x (p0, z1, 1),
+ z0 = svdiv_x (p0, z1, 1))
+
+/*
** div_2_s32_x_tied1:
-** mov (z[0-9]+\.s), #2
-** sdiv z0\.s, p0/m, z0\.s, \1
+** asrd z0\.s, p0/m, z0\.s, #1
** ret
*/
TEST_UNIFORM_Z (div_2_s32_x_tied1, svint32_t,
@@ -228,10 +412,71 @@ TEST_UNIFORM_Z (div_2_s32_x_tied1, svint32_t,
/*
** div_2_s32_x_untied:
-** mov z0\.s, #2
-** sdivr z0\.s, p0/m, z0\.s, z1\.s
+** movprfx z0, z1
+** asrd z0\.s, p0/m, z0\.s, #1
** ret
*/
TEST_UNIFORM_Z (div_2_s32_x_untied, svint32_t,
z0 = svdiv_n_s32_x (p0, z1, 2),
z0 = svdiv_x (p0, z1, 2))
+
+/*
+** div_3_s32_x_tied1:
+** mov (z[0-9]+\.s), #3
+** sdiv z0\.s, p0/m, z0\.s, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_3_s32_x_tied1, svint32_t,
+ z0 = svdiv_n_s32_x (p0, z0, 3),
+ z0 = svdiv_x (p0, z0, 3))
+
+/*
+** div_3_s32_x_untied:
+** mov z0\.s, #3
+** sdivr z0\.s, p0/m, z0\.s, z1\.s
+** ret
+*/
+TEST_UNIFORM_Z (div_3_s32_x_untied, svint32_t,
+ z0 = svdiv_n_s32_x (p0, z1, 3),
+ z0 = svdiv_x (p0, z1, 3))
+
+/*
+** div_maxpow_s32_x_tied1:
+** asrd z0\.s, p0/m, z0\.s, #30
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s32_x_tied1, svint32_t,
+ z0 = svdiv_n_s32_x (p0, z0, MAXPOW),
+ z0 = svdiv_x (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_s32_x_untied:
+** movprfx z0, z1
+** asrd z0\.s, p0/m, z0\.s, #30
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s32_x_untied, svint32_t,
+ z0 = svdiv_n_s32_x (p0, z1, MAXPOW),
+ z0 = svdiv_x (p0, z1, MAXPOW))
+
+/*
+** div_intmin_s32_x_tied1:
+** mov (z[0-9]+\.s), #-2147483648
+** sdiv z0\.s, p0/m, z0\.s, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_intmin_s32_x_tied1, svint32_t,
+ z0 = svdiv_n_s32_x (p0, z0, INT32_MIN),
+ z0 = svdiv_x (p0, z0, INT32_MIN))
+
+/*
+** div_intmin_s32_x_untied:
+** mov z0\.s, #-2147483648
+** sdivr z0\.s, p0/m, z0\.s, z1\.s
+** ret
+*/
+TEST_UNIFORM_Z (div_intmin_s32_x_untied, svint32_t,
+ z0 = svdiv_n_s32_x (p0, z1, INT32_MIN),
+ z0 = svdiv_x (p0, z1, INT32_MIN))
+
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c
index 464dca2..cfed6f9 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c
@@ -2,6 +2,8 @@
#include "test_sve_acle.h"
+#define MAXPOW 1ULL<<62
+
/*
** div_s64_m_tied1:
** sdiv z0\.d, p0/m, z0\.d, z1\.d
@@ -54,9 +56,26 @@ TEST_UNIFORM_ZX (div_x0_s64_m_untied, svint64_t, int64_t,
z0 = svdiv_m (p0, z1, x0))
/*
+** div_1_s64_m_tied1:
+** sel z0\.d, p0, z0\.d, z0\.d
+** ret
+*/
+TEST_UNIFORM_Z (div_1_s64_m_tied1, svint64_t,
+ z0 = svdiv_n_s64_m (p0, z0, 1),
+ z0 = svdiv_m (p0, z0, 1))
+
+/*
+** div_1_s64_m_untied:
+** sel z0\.d, p0, z1\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (div_1_s64_m_untied, svint64_t,
+ z0 = svdiv_n_s64_m (p0, z1, 1),
+ z0 = svdiv_m (p0, z1, 1))
+
+/*
** div_2_s64_m_tied1:
-** mov (z[0-9]+\.d), #2
-** sdiv z0\.d, p0/m, z0\.d, \1
+** asrd z0\.d, p0/m, z0\.d, #1
** ret
*/
TEST_UNIFORM_Z (div_2_s64_m_tied1, svint64_t,
@@ -65,9 +84,8 @@ TEST_UNIFORM_Z (div_2_s64_m_tied1, svint64_t,
/*
** div_2_s64_m_untied:
-** mov (z[0-9]+\.d), #2
** movprfx z0, z1
-** sdiv z0\.d, p0/m, z0\.d, \1
+** asrd z0\.d, p0/m, z0\.d, #1
** ret
*/
TEST_UNIFORM_Z (div_2_s64_m_untied, svint64_t,
@@ -75,6 +93,67 @@ TEST_UNIFORM_Z (div_2_s64_m_untied, svint64_t,
z0 = svdiv_m (p0, z1, 2))
/*
+** div_3_s64_m_tied1:
+** mov (z[0-9]+\.d), #3
+** sdiv z0\.d, p0/m, z0\.d, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_3_s64_m_tied1, svint64_t,
+ z0 = svdiv_n_s64_m (p0, z0, 3),
+ z0 = svdiv_m (p0, z0, 3))
+
+/*
+** div_3_s64_m_untied:
+** mov (z[0-9]+\.d), #3
+** movprfx z0, z1
+** sdiv z0\.d, p0/m, z0\.d, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_3_s64_m_untied, svint64_t,
+ z0 = svdiv_n_s64_m (p0, z1, 3),
+ z0 = svdiv_m (p0, z1, 3))
+
+/*
+** div_maxpow_s64_m_tied1:
+** asrd z0\.d, p0/m, z0\.d, #62
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s64_m_tied1, svint64_t,
+ z0 = svdiv_n_s64_m (p0, z0, MAXPOW),
+ z0 = svdiv_m (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_s64_m_untied:
+** movprfx z0, z1
+** asrd z0\.d, p0/m, z0\.d, #62
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s64_m_untied, svint64_t,
+ z0 = svdiv_n_s64_m (p0, z1, MAXPOW),
+ z0 = svdiv_m (p0, z1, MAXPOW))
+
+/*
+** div_intmin_s64_m_tied1:
+** mov (z[0-9]+\.d), #-9223372036854775808
+** sdiv z0\.d, p0/m, z0\.d, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_intmin_s64_m_tied1, svint64_t,
+ z0 = svdiv_n_s64_m (p0, z0, INT64_MIN),
+ z0 = svdiv_m (p0, z0, INT64_MIN))
+
+/*
+** div_intmin_s64_m_untied:
+** mov (z[0-9]+\.d), #-9223372036854775808
+** movprfx z0, z1
+** sdiv z0\.d, p0/m, z0\.d, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_intmin_s64_m_untied, svint64_t,
+ z0 = svdiv_n_s64_m (p0, z1, INT64_MIN),
+ z0 = svdiv_m (p0, z1, INT64_MIN))
+
+/*
** div_s64_z_tied1:
** movprfx z0\.d, p0/z, z0\.d
** sdiv z0\.d, p0/m, z0\.d, z1\.d
@@ -137,19 +216,61 @@ TEST_UNIFORM_ZX (div_x0_s64_z_untied, svint64_t, int64_t,
z0 = svdiv_z (p0, z1, x0))
/*
-** div_2_s64_z_tied1:
-** mov (z[0-9]+\.d), #2
+** div_1_s64_z_tied1:
+** mov (z[0-9]+\.d), #1
** movprfx z0\.d, p0/z, z0\.d
** sdiv z0\.d, p0/m, z0\.d, \1
** ret
*/
+TEST_UNIFORM_Z (div_1_s64_z_tied1, svint64_t,
+ z0 = svdiv_n_s64_z (p0, z0, 1),
+ z0 = svdiv_z (p0, z0, 1))
+
+/*
+** div_1_s64_z_untied:
+** mov z0\.d, #1
+** movprfx z0\.d, p0/z, z0\.d
+** sdivr z0\.d, p0/m, z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (div_1_s64_z_untied, svint64_t,
+ z0 = svdiv_n_s64_z (p0, z1, 1),
+ z0 = svdiv_z (p0, z1, 1))
+
+/*
+** div_2_s64_z_tied1:
+** movprfx z0\.d, p0/z, z0\.d
+** asrd z0\.d, p0/m, z0\.d, #1
+** ret
+*/
TEST_UNIFORM_Z (div_2_s64_z_tied1, svint64_t,
z0 = svdiv_n_s64_z (p0, z0, 2),
z0 = svdiv_z (p0, z0, 2))
/*
** div_2_s64_z_untied:
-** mov (z[0-9]+\.d), #2
+** movprfx z0\.d, p0/z, z1\.d
+** asrd z0\.d, p0/m, z0\.d, #1
+** ret
+*/
+TEST_UNIFORM_Z (div_2_s64_z_untied, svint64_t,
+ z0 = svdiv_n_s64_z (p0, z1, 2),
+ z0 = svdiv_z (p0, z1, 2))
+
+/*
+** div_3_s64_z_tied1:
+** mov (z[0-9]+\.d), #3
+** movprfx z0\.d, p0/z, z0\.d
+** sdiv z0\.d, p0/m, z0\.d, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_3_s64_z_tied1, svint64_t,
+ z0 = svdiv_n_s64_z (p0, z0, 3),
+ z0 = svdiv_z (p0, z0, 3))
+
+/*
+** div_3_s64_z_untied:
+** mov (z[0-9]+\.d), #3
** (
** movprfx z0\.d, p0/z, z1\.d
** sdiv z0\.d, p0/m, z0\.d, \1
@@ -159,9 +280,56 @@ TEST_UNIFORM_Z (div_2_s64_z_tied1, svint64_t,
** )
** ret
*/
-TEST_UNIFORM_Z (div_2_s64_z_untied, svint64_t,
- z0 = svdiv_n_s64_z (p0, z1, 2),
- z0 = svdiv_z (p0, z1, 2))
+TEST_UNIFORM_Z (div_3_s64_z_untied, svint64_t,
+ z0 = svdiv_n_s64_z (p0, z1, 3),
+ z0 = svdiv_z (p0, z1, 3))
+
+/*
+** div_maxpow_s64_z_tied1:
+** movprfx z0\.d, p0/z, z0\.d
+** asrd z0\.d, p0/m, z0\.d, #62
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s64_z_tied1, svint64_t,
+ z0 = svdiv_n_s64_z (p0, z0, MAXPOW),
+ z0 = svdiv_z (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_s64_z_untied:
+** movprfx z0\.d, p0/z, z1\.d
+** asrd z0\.d, p0/m, z0\.d, #62
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s64_z_untied, svint64_t,
+ z0 = svdiv_n_s64_z (p0, z1, MAXPOW),
+ z0 = svdiv_z (p0, z1, MAXPOW))
+
+/*
+** div_intmin_s64_z_tied1:
+** mov (z[0-9]+\.d), #-9223372036854775808
+** movprfx z0\.d, p0/z, z0\.d
+** sdiv z0\.d, p0/m, z0\.d, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_intmin_s64_z_tied1, svint64_t,
+ z0 = svdiv_n_s64_z (p0, z0, INT64_MIN),
+ z0 = svdiv_z (p0, z0, INT64_MIN))
+
+/*
+** div_intmin_s64_z_untied:
+** mov (z[0-9]+\.d), #-9223372036854775808
+** (
+** movprfx z0\.d, p0/z, z1\.d
+** sdiv z0\.d, p0/m, z0\.d, \1
+** |
+** movprfx z0\.d, p0/z, \1
+** sdivr z0\.d, p0/m, z0\.d, z1\.d
+** )
+** ret
+*/
+TEST_UNIFORM_Z (div_intmin_s64_z_untied, svint64_t,
+ z0 = svdiv_n_s64_z (p0, z1, INT64_MIN),
+ z0 = svdiv_z (p0, z1, INT64_MIN))
/*
** div_s64_x_tied1:
@@ -217,9 +385,25 @@ TEST_UNIFORM_ZX (div_x0_s64_x_untied, svint64_t, int64_t,
z0 = svdiv_x (p0, z1, x0))
/*
+** div_1_s64_x_tied1:
+** ret
+*/
+TEST_UNIFORM_Z (div_1_s64_x_tied1, svint64_t,
+ z0 = svdiv_n_s64_x (p0, z0, 1),
+ z0 = svdiv_x (p0, z0, 1))
+
+/*
+** div_1_s64_x_untied:
+** mov z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (div_1_s64_x_untied, svint64_t,
+ z0 = svdiv_n_s64_x (p0, z1, 1),
+ z0 = svdiv_x (p0, z1, 1))
+
+/*
** div_2_s64_x_tied1:
-** mov (z[0-9]+\.d), #2
-** sdiv z0\.d, p0/m, z0\.d, \1
+** asrd z0\.d, p0/m, z0\.d, #1
** ret
*/
TEST_UNIFORM_Z (div_2_s64_x_tied1, svint64_t,
@@ -228,10 +412,71 @@ TEST_UNIFORM_Z (div_2_s64_x_tied1, svint64_t,
/*
** div_2_s64_x_untied:
-** mov z0\.d, #2
-** sdivr z0\.d, p0/m, z0\.d, z1\.d
+** movprfx z0, z1
+** asrd z0\.d, p0/m, z0\.d, #1
** ret
*/
TEST_UNIFORM_Z (div_2_s64_x_untied, svint64_t,
z0 = svdiv_n_s64_x (p0, z1, 2),
z0 = svdiv_x (p0, z1, 2))
+
+/*
+** div_3_s64_x_tied1:
+** mov (z[0-9]+\.d), #3
+** sdiv z0\.d, p0/m, z0\.d, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_3_s64_x_tied1, svint64_t,
+ z0 = svdiv_n_s64_x (p0, z0, 3),
+ z0 = svdiv_x (p0, z0, 3))
+
+/*
+** div_3_s64_x_untied:
+** mov z0\.d, #3
+** sdivr z0\.d, p0/m, z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (div_3_s64_x_untied, svint64_t,
+ z0 = svdiv_n_s64_x (p0, z1, 3),
+ z0 = svdiv_x (p0, z1, 3))
+
+/*
+** div_maxpow_s64_x_tied1:
+** asrd z0\.d, p0/m, z0\.d, #62
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s64_x_tied1, svint64_t,
+ z0 = svdiv_n_s64_x (p0, z0, MAXPOW),
+ z0 = svdiv_x (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_s64_x_untied:
+** movprfx z0, z1
+** asrd z0\.d, p0/m, z0\.d, #62
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s64_x_untied, svint64_t,
+ z0 = svdiv_n_s64_x (p0, z1, MAXPOW),
+ z0 = svdiv_x (p0, z1, MAXPOW))
+
+/*
+** div_intmin_s64_x_tied1:
+** mov (z[0-9]+\.d), #-9223372036854775808
+** sdiv z0\.d, p0/m, z0\.d, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_intmin_s64_x_tied1, svint64_t,
+ z0 = svdiv_n_s64_x (p0, z0, INT64_MIN),
+ z0 = svdiv_x (p0, z0, INT64_MIN))
+
+/*
+** div_intmin_s64_x_untied:
+** mov z0\.d, #-9223372036854775808
+** sdivr z0\.d, p0/m, z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (div_intmin_s64_x_untied, svint64_t,
+ z0 = svdiv_n_s64_x (p0, z1, INT64_MIN),
+ z0 = svdiv_x (p0, z1, INT64_MIN))
+
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c
index 232ccac..9707664 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c
@@ -2,6 +2,8 @@
#include "test_sve_acle.h"
+#define MAXPOW 1<<31
+
/*
** div_u32_m_tied1:
** udiv z0\.s, p0/m, z0\.s, z1\.s
@@ -54,9 +56,26 @@ TEST_UNIFORM_ZX (div_w0_u32_m_untied, svuint32_t, uint32_t,
z0 = svdiv_m (p0, z1, x0))
/*
+** div_1_u32_m_tied1:
+** sel z0\.s, p0, z0\.s, z0\.s
+** ret
+*/
+TEST_UNIFORM_Z (div_1_u32_m_tied1, svuint32_t,
+ z0 = svdiv_n_u32_m (p0, z0, 1),
+ z0 = svdiv_m (p0, z0, 1))
+
+/*
+** div_1_u32_m_untied:
+** sel z0\.s, p0, z1\.s, z1\.s
+** ret
+*/
+TEST_UNIFORM_Z (div_1_u32_m_untied, svuint32_t,
+ z0 = svdiv_n_u32_m (p0, z1, 1),
+ z0 = svdiv_m (p0, z1, 1))
+
+/*
** div_2_u32_m_tied1:
-** mov (z[0-9]+\.s), #2
-** udiv z0\.s, p0/m, z0\.s, \1
+** lsr z0\.s, p0/m, z0\.s, #1
** ret
*/
TEST_UNIFORM_Z (div_2_u32_m_tied1, svuint32_t,
@@ -65,9 +84,8 @@ TEST_UNIFORM_Z (div_2_u32_m_tied1, svuint32_t,
/*
** div_2_u32_m_untied:
-** mov (z[0-9]+\.s), #2
** movprfx z0, z1
-** udiv z0\.s, p0/m, z0\.s, \1
+** lsr z0\.s, p0/m, z0\.s, #1
** ret
*/
TEST_UNIFORM_Z (div_2_u32_m_untied, svuint32_t,
@@ -75,6 +93,46 @@ TEST_UNIFORM_Z (div_2_u32_m_untied, svuint32_t,
z0 = svdiv_m (p0, z1, 2))
/*
+** div_3_u32_m_tied1:
+** mov (z[0-9]+\.s), #3
+** udiv z0\.s, p0/m, z0\.s, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_3_u32_m_tied1, svuint32_t,
+ z0 = svdiv_n_u32_m (p0, z0, 3),
+ z0 = svdiv_m (p0, z0, 3))
+
+/*
+** div_3_u32_m_untied:
+** mov (z[0-9]+\.s), #3
+** movprfx z0, z1
+** udiv z0\.s, p0/m, z0\.s, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_3_u32_m_untied, svuint32_t,
+ z0 = svdiv_n_u32_m (p0, z1, 3),
+ z0 = svdiv_m (p0, z1, 3))
+
+/*
+** div_maxpow_u32_m_tied1:
+** lsr z0\.s, p0/m, z0\.s, #31
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u32_m_tied1, svuint32_t,
+ z0 = svdiv_n_u32_m (p0, z0, MAXPOW),
+ z0 = svdiv_m (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_u32_m_untied:
+** movprfx z0, z1
+** lsr z0\.s, p0/m, z0\.s, #31
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u32_m_untied, svuint32_t,
+ z0 = svdiv_n_u32_m (p0, z1, MAXPOW),
+ z0 = svdiv_m (p0, z1, MAXPOW))
+
+/*
** div_u32_z_tied1:
** movprfx z0\.s, p0/z, z0\.s
** udiv z0\.s, p0/m, z0\.s, z1\.s
@@ -137,19 +195,61 @@ TEST_UNIFORM_ZX (div_w0_u32_z_untied, svuint32_t, uint32_t,
z0 = svdiv_z (p0, z1, x0))
/*
-** div_2_u32_z_tied1:
-** mov (z[0-9]+\.s), #2
+** div_1_u32_z_tied1:
+** mov (z[0-9]+\.s), #1
** movprfx z0\.s, p0/z, z0\.s
** udiv z0\.s, p0/m, z0\.s, \1
** ret
*/
+TEST_UNIFORM_Z (div_1_u32_z_tied1, svuint32_t,
+ z0 = svdiv_n_u32_z (p0, z0, 1),
+ z0 = svdiv_z (p0, z0, 1))
+
+/*
+** div_1_u32_z_untied:
+** mov z0\.s, #1
+** movprfx z0\.s, p0/z, z0\.s
+** udivr z0\.s, p0/m, z0\.s, z1\.s
+** ret
+*/
+TEST_UNIFORM_Z (div_1_u32_z_untied, svuint32_t,
+ z0 = svdiv_n_u32_z (p0, z1, 1),
+ z0 = svdiv_z (p0, z1, 1))
+
+/*
+** div_2_u32_z_tied1:
+** movprfx z0\.s, p0/z, z0\.s
+** lsr z0\.s, p0/m, z0\.s, #1
+** ret
+*/
TEST_UNIFORM_Z (div_2_u32_z_tied1, svuint32_t,
z0 = svdiv_n_u32_z (p0, z0, 2),
z0 = svdiv_z (p0, z0, 2))
/*
** div_2_u32_z_untied:
-** mov (z[0-9]+\.s), #2
+** movprfx z0\.s, p0/z, z1\.s
+** lsr z0\.s, p0/m, z0\.s, #1
+** ret
+*/
+TEST_UNIFORM_Z (div_2_u32_z_untied, svuint32_t,
+ z0 = svdiv_n_u32_z (p0, z1, 2),
+ z0 = svdiv_z (p0, z1, 2))
+
+/*
+** div_3_u32_z_tied1:
+** mov (z[0-9]+\.s), #3
+** movprfx z0\.s, p0/z, z0\.s
+** udiv z0\.s, p0/m, z0\.s, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_3_u32_z_tied1, svuint32_t,
+ z0 = svdiv_n_u32_z (p0, z0, 3),
+ z0 = svdiv_z (p0, z0, 3))
+
+/*
+** div_3_u32_z_untied:
+** mov (z[0-9]+\.s), #3
** (
** movprfx z0\.s, p0/z, z1\.s
** udiv z0\.s, p0/m, z0\.s, \1
@@ -159,9 +259,29 @@ TEST_UNIFORM_Z (div_2_u32_z_tied1, svuint32_t,
** )
** ret
*/
-TEST_UNIFORM_Z (div_2_u32_z_untied, svuint32_t,
- z0 = svdiv_n_u32_z (p0, z1, 2),
- z0 = svdiv_z (p0, z1, 2))
+TEST_UNIFORM_Z (div_3_u32_z_untied, svuint32_t,
+ z0 = svdiv_n_u32_z (p0, z1, 3),
+ z0 = svdiv_z (p0, z1, 3))
+
+/*
+** div_maxpow_u32_z_tied1:
+** movprfx z0\.s, p0/z, z0\.s
+** lsr z0\.s, p0/m, z0\.s, #31
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u32_z_tied1, svuint32_t,
+ z0 = svdiv_n_u32_z (p0, z0, MAXPOW),
+ z0 = svdiv_z (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_u32_z_untied:
+** movprfx z0\.s, p0/z, z1\.s
+** lsr z0\.s, p0/m, z0\.s, #31
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u32_z_untied, svuint32_t,
+ z0 = svdiv_n_u32_z (p0, z1, MAXPOW),
+ z0 = svdiv_z (p0, z1, MAXPOW))
/*
** div_u32_x_tied1:
@@ -217,9 +337,25 @@ TEST_UNIFORM_ZX (div_w0_u32_x_untied, svuint32_t, uint32_t,
z0 = svdiv_x (p0, z1, x0))
/*
+** div_1_u32_x_tied1:
+** ret
+*/
+TEST_UNIFORM_Z (div_1_u32_x_tied1, svuint32_t,
+ z0 = svdiv_n_u32_x (p0, z0, 1),
+ z0 = svdiv_x (p0, z0, 1))
+
+/*
+** div_1_u32_x_untied:
+** mov z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (div_1_u32_x_untied, svuint32_t,
+ z0 = svdiv_n_u32_x (p0, z1, 1),
+ z0 = svdiv_x (p0, z1, 1))
+
+/*
** div_2_u32_x_tied1:
-** mov (z[0-9]+\.s), #2
-** udiv z0\.s, p0/m, z0\.s, \1
+** lsr z0\.s, z0\.s, #1
** ret
*/
TEST_UNIFORM_Z (div_2_u32_x_tied1, svuint32_t,
@@ -228,10 +364,47 @@ TEST_UNIFORM_Z (div_2_u32_x_tied1, svuint32_t,
/*
** div_2_u32_x_untied:
-** mov z0\.s, #2
-** udivr z0\.s, p0/m, z0\.s, z1\.s
+** lsr z0\.s, z1\.s, #1
** ret
*/
TEST_UNIFORM_Z (div_2_u32_x_untied, svuint32_t,
z0 = svdiv_n_u32_x (p0, z1, 2),
z0 = svdiv_x (p0, z1, 2))
+
+/*
+** div_3_u32_x_tied1:
+** mov (z[0-9]+\.s), #3
+** udiv z0\.s, p0/m, z0\.s, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_3_u32_x_tied1, svuint32_t,
+ z0 = svdiv_n_u32_x (p0, z0, 3),
+ z0 = svdiv_x (p0, z0, 3))
+
+/*
+** div_3_u32_x_untied:
+** mov z0\.s, #3
+** udivr z0\.s, p0/m, z0\.s, z1\.s
+** ret
+*/
+TEST_UNIFORM_Z (div_3_u32_x_untied, svuint32_t,
+ z0 = svdiv_n_u32_x (p0, z1, 3),
+ z0 = svdiv_x (p0, z1, 3))
+
+/*
+** div_maxpow_u32_x_tied1:
+** lsr z0\.s, z0\.s, #31
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u32_x_tied1, svuint32_t,
+ z0 = svdiv_n_u32_x (p0, z0, MAXPOW),
+ z0 = svdiv_x (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_u32_x_untied:
+** lsr z0\.s, z1\.s, #31
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u32_x_untied, svuint32_t,
+ z0 = svdiv_n_u32_x (p0, z1, MAXPOW),
+ z0 = svdiv_x (p0, z1, MAXPOW))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c
index ac7c026..5247ebd 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c
@@ -2,6 +2,8 @@
#include "test_sve_acle.h"
+#define MAXPOW 1ULL<<63
+
/*
** div_u64_m_tied1:
** udiv z0\.d, p0/m, z0\.d, z1\.d
@@ -54,9 +56,26 @@ TEST_UNIFORM_ZX (div_x0_u64_m_untied, svuint64_t, uint64_t,
z0 = svdiv_m (p0, z1, x0))
/*
+** div_1_u64_m_tied1:
+** sel z0\.d, p0, z0\.d, z0\.d
+** ret
+*/
+TEST_UNIFORM_Z (div_1_u64_m_tied1, svuint64_t,
+ z0 = svdiv_n_u64_m (p0, z0, 1),
+ z0 = svdiv_m (p0, z0, 1))
+
+/*
+** div_1_u64_m_untied:
+** sel z0\.d, p0, z1\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (div_1_u64_m_untied, svuint64_t,
+ z0 = svdiv_n_u64_m (p0, z1, 1),
+ z0 = svdiv_m (p0, z1, 1))
+
+/*
** div_2_u64_m_tied1:
-** mov (z[0-9]+\.d), #2
-** udiv z0\.d, p0/m, z0\.d, \1
+** lsr z0\.d, p0/m, z0\.d, #1
** ret
*/
TEST_UNIFORM_Z (div_2_u64_m_tied1, svuint64_t,
@@ -65,9 +84,8 @@ TEST_UNIFORM_Z (div_2_u64_m_tied1, svuint64_t,
/*
** div_2_u64_m_untied:
-** mov (z[0-9]+\.d), #2
** movprfx z0, z1
-** udiv z0\.d, p0/m, z0\.d, \1
+** lsr z0\.d, p0/m, z0\.d, #1
** ret
*/
TEST_UNIFORM_Z (div_2_u64_m_untied, svuint64_t,
@@ -75,6 +93,46 @@ TEST_UNIFORM_Z (div_2_u64_m_untied, svuint64_t,
z0 = svdiv_m (p0, z1, 2))
/*
+** div_3_u64_m_tied1:
+** mov (z[0-9]+\.d), #3
+** udiv z0\.d, p0/m, z0\.d, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_3_u64_m_tied1, svuint64_t,
+ z0 = svdiv_n_u64_m (p0, z0, 3),
+ z0 = svdiv_m (p0, z0, 3))
+
+/*
+** div_3_u64_m_untied:
+** mov (z[0-9]+\.d), #3
+** movprfx z0, z1
+** udiv z0\.d, p0/m, z0\.d, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_3_u64_m_untied, svuint64_t,
+ z0 = svdiv_n_u64_m (p0, z1, 3),
+ z0 = svdiv_m (p0, z1, 3))
+
+/*
+** div_maxpow_u64_m_tied1:
+** lsr z0\.d, p0/m, z0\.d, #63
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u64_m_tied1, svuint64_t,
+ z0 = svdiv_n_u64_m (p0, z0, MAXPOW),
+ z0 = svdiv_m (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_u64_m_untied:
+** movprfx z0, z1
+** lsr z0\.d, p0/m, z0\.d, #63
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u64_m_untied, svuint64_t,
+ z0 = svdiv_n_u64_m (p0, z1, MAXPOW),
+ z0 = svdiv_m (p0, z1, MAXPOW))
+
+/*
** div_u64_z_tied1:
** movprfx z0\.d, p0/z, z0\.d
** udiv z0\.d, p0/m, z0\.d, z1\.d
@@ -137,19 +195,61 @@ TEST_UNIFORM_ZX (div_x0_u64_z_untied, svuint64_t, uint64_t,
z0 = svdiv_z (p0, z1, x0))
/*
-** div_2_u64_z_tied1:
-** mov (z[0-9]+\.d), #2
+** div_1_u64_z_tied1:
+** mov (z[0-9]+\.d), #1
** movprfx z0\.d, p0/z, z0\.d
** udiv z0\.d, p0/m, z0\.d, \1
** ret
*/
+TEST_UNIFORM_Z (div_1_u64_z_tied1, svuint64_t,
+ z0 = svdiv_n_u64_z (p0, z0, 1),
+ z0 = svdiv_z (p0, z0, 1))
+
+/*
+** div_1_u64_z_untied:
+** mov z0\.d, #1
+** movprfx z0\.d, p0/z, z0\.d
+** udivr z0\.d, p0/m, z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (div_1_u64_z_untied, svuint64_t,
+ z0 = svdiv_n_u64_z (p0, z1, 1),
+ z0 = svdiv_z (p0, z1, 1))
+
+/*
+** div_2_u64_z_tied1:
+** movprfx z0\.d, p0/z, z0\.d
+** lsr z0\.d, p0/m, z0\.d, #1
+** ret
+*/
TEST_UNIFORM_Z (div_2_u64_z_tied1, svuint64_t,
z0 = svdiv_n_u64_z (p0, z0, 2),
z0 = svdiv_z (p0, z0, 2))
/*
** div_2_u64_z_untied:
-** mov (z[0-9]+\.d), #2
+** movprfx z0\.d, p0/z, z1\.d
+** lsr z0\.d, p0/m, z0\.d, #1
+** ret
+*/
+TEST_UNIFORM_Z (div_2_u64_z_untied, svuint64_t,
+ z0 = svdiv_n_u64_z (p0, z1, 2),
+ z0 = svdiv_z (p0, z1, 2))
+
+/*
+** div_3_u64_z_tied1:
+** mov (z[0-9]+\.d), #3
+** movprfx z0\.d, p0/z, z0\.d
+** udiv z0\.d, p0/m, z0\.d, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_3_u64_z_tied1, svuint64_t,
+ z0 = svdiv_n_u64_z (p0, z0, 3),
+ z0 = svdiv_z (p0, z0, 3))
+
+/*
+** div_3_u64_z_untied:
+** mov (z[0-9]+\.d), #3
** (
** movprfx z0\.d, p0/z, z1\.d
** udiv z0\.d, p0/m, z0\.d, \1
@@ -159,9 +259,29 @@ TEST_UNIFORM_Z (div_2_u64_z_tied1, svuint64_t,
** )
** ret
*/
-TEST_UNIFORM_Z (div_2_u64_z_untied, svuint64_t,
- z0 = svdiv_n_u64_z (p0, z1, 2),
- z0 = svdiv_z (p0, z1, 2))
+TEST_UNIFORM_Z (div_3_u64_z_untied, svuint64_t,
+ z0 = svdiv_n_u64_z (p0, z1, 3),
+ z0 = svdiv_z (p0, z1, 3))
+
+/*
+** div_maxpow_u64_z_tied1:
+** movprfx z0\.d, p0/z, z0\.d
+** lsr z0\.d, p0/m, z0\.d, #63
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u64_z_tied1, svuint64_t,
+ z0 = svdiv_n_u64_z (p0, z0, MAXPOW),
+ z0 = svdiv_z (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_u64_z_untied:
+** movprfx z0\.d, p0/z, z1\.d
+** lsr z0\.d, p0/m, z0\.d, #63
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u64_z_untied, svuint64_t,
+ z0 = svdiv_n_u64_z (p0, z1, MAXPOW),
+ z0 = svdiv_z (p0, z1, MAXPOW))
/*
** div_u64_x_tied1:
@@ -217,9 +337,25 @@ TEST_UNIFORM_ZX (div_x0_u64_x_untied, svuint64_t, uint64_t,
z0 = svdiv_x (p0, z1, x0))
/*
+** div_1_u64_x_tied1:
+** ret
+*/
+TEST_UNIFORM_Z (div_1_u64_x_tied1, svuint64_t,
+ z0 = svdiv_n_u64_x (p0, z0, 1),
+ z0 = svdiv_x (p0, z0, 1))
+
+/*
+** div_1_u64_x_untied:
+** mov z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (div_1_u64_x_untied, svuint64_t,
+ z0 = svdiv_n_u64_x (p0, z1, 1),
+ z0 = svdiv_x (p0, z1, 1))
+
+/*
** div_2_u64_x_tied1:
-** mov (z[0-9]+\.d), #2
-** udiv z0\.d, p0/m, z0\.d, \1
+** lsr z0\.d, z0\.d, #1
** ret
*/
TEST_UNIFORM_Z (div_2_u64_x_tied1, svuint64_t,
@@ -228,10 +364,47 @@ TEST_UNIFORM_Z (div_2_u64_x_tied1, svuint64_t,
/*
** div_2_u64_x_untied:
-** mov z0\.d, #2
-** udivr z0\.d, p0/m, z0\.d, z1\.d
+** lsr z0\.d, z1\.d, #1
** ret
*/
TEST_UNIFORM_Z (div_2_u64_x_untied, svuint64_t,
z0 = svdiv_n_u64_x (p0, z1, 2),
z0 = svdiv_x (p0, z1, 2))
+
+/*
+** div_3_u64_x_tied1:
+** mov (z[0-9]+\.d), #3
+** udiv z0\.d, p0/m, z0\.d, \1
+** ret
+*/
+TEST_UNIFORM_Z (div_3_u64_x_tied1, svuint64_t,
+ z0 = svdiv_n_u64_x (p0, z0, 3),
+ z0 = svdiv_x (p0, z0, 3))
+
+/*
+** div_3_u64_x_untied:
+** mov z0\.d, #3
+** udivr z0\.d, p0/m, z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (div_3_u64_x_untied, svuint64_t,
+ z0 = svdiv_n_u64_x (p0, z1, 3),
+ z0 = svdiv_x (p0, z1, 3))
+
+/*
+** div_maxpow_u64_x_tied1:
+** lsr z0\.d, z0\.d, #63
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u64_x_tied1, svuint64_t,
+ z0 = svdiv_n_u64_x (p0, z0, MAXPOW),
+ z0 = svdiv_x (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_u64_x_untied:
+** lsr z0\.d, z1\.d, #63
+** ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u64_x_untied, svuint64_t,
+ z0 = svdiv_n_u64_x (p0, z1, MAXPOW),
+ z0 = svdiv_x (p0, z1, MAXPOW))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/div_const_run.c b/gcc/testsuite/gcc.target/aarch64/sve/div_const_run.c
new file mode 100644
index 0000000..c96bb27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/div_const_run.c
@@ -0,0 +1,91 @@
+/* { dg-do run { target aarch64_sve128_hw } } */
+/* { dg-options "-O2 -msve-vector-bits=128" } */
+
+#include <arm_sve.h>
+#include <stdint.h>
+
+typedef svbool_t pred __attribute__((arm_sve_vector_bits(128)));
+typedef svfloat16_t svfloat16_ __attribute__((arm_sve_vector_bits(128)));
+typedef svfloat32_t svfloat32_ __attribute__((arm_sve_vector_bits(128)));
+typedef svfloat64_t svfloat64_ __attribute__((arm_sve_vector_bits(128)));
+typedef svint32_t svint32_ __attribute__((arm_sve_vector_bits(128)));
+typedef svint64_t svint64_ __attribute__((arm_sve_vector_bits(128)));
+typedef svuint32_t svuint32_ __attribute__((arm_sve_vector_bits(128)));
+typedef svuint64_t svuint64_ __attribute__((arm_sve_vector_bits(128)));
+
+#define F(T, TS, P, OP1, OP2) \
+{ \
+ T##_t op1 = (T##_t) OP1; \
+ T##_t op2 = (T##_t) OP2; \
+ sv##T##_ res = svdiv_##P (pg, svdup_##TS (op1), svdup_##TS (op2)); \
+ sv##T##_ exp = svdup_##TS (op1 / op2); \
+ if (svptest_any (pg, svcmpne (pg, exp, res))) \
+ __builtin_abort (); \
+ \
+ sv##T##_ res_n = svdiv_##P (pg, svdup_##TS (op1), op2); \
+ if (svptest_any (pg, svcmpne (pg, exp, res_n))) \
+ __builtin_abort (); \
+}
+
+#define TEST_TYPES_1(T, TS) \
+ F (T, TS, m, 79, 16) \
+ F (T, TS, z, 79, 16) \
+ F (T, TS, x, 79, 16)
+
+#define TEST_TYPES \
+ TEST_TYPES_1 (float16, f16) \
+ TEST_TYPES_1 (float32, f32) \
+ TEST_TYPES_1 (float64, f64) \
+ TEST_TYPES_1 (int32, s32) \
+ TEST_TYPES_1 (int64, s64) \
+ TEST_TYPES_1 (uint32, u32) \
+ TEST_TYPES_1 (uint64, u64)
+
+#define TEST_VALUES_S_1(B, OP1, OP2) \
+ F (int##B, s##B, x, OP1, OP2)
+
+#define TEST_VALUES_S \
+ TEST_VALUES_S_1 (32, INT32_MIN, INT32_MIN) \
+ TEST_VALUES_S_1 (64, INT64_MIN, INT64_MIN) \
+ TEST_VALUES_S_1 (32, -7, 4) \
+ TEST_VALUES_S_1 (64, -7, 4) \
+ TEST_VALUES_S_1 (32, INT32_MAX, (1 << 30)) \
+ TEST_VALUES_S_1 (64, INT64_MAX, (1ULL << 62)) \
+ TEST_VALUES_S_1 (32, INT32_MIN, (1 << 30)) \
+ TEST_VALUES_S_1 (64, INT64_MIN, (1ULL << 62)) \
+ TEST_VALUES_S_1 (32, INT32_MAX, 1) \
+ TEST_VALUES_S_1 (64, INT64_MAX, 1) \
+ TEST_VALUES_S_1 (32, INT32_MIN, 16) \
+ TEST_VALUES_S_1 (64, INT64_MIN, 16) \
+ TEST_VALUES_S_1 (32, INT32_MAX, -5) \
+ TEST_VALUES_S_1 (64, INT64_MAX, -5) \
+ TEST_VALUES_S_1 (32, INT32_MIN, -4) \
+ TEST_VALUES_S_1 (64, INT64_MIN, -4)
+
+#define TEST_VALUES_U_1(B, OP1, OP2) \
+ F (uint##B, u##B, x, OP1, OP2)
+
+#define TEST_VALUES_U \
+ TEST_VALUES_U_1 (32, UINT32_MAX, UINT32_MAX) \
+ TEST_VALUES_U_1 (64, UINT64_MAX, UINT64_MAX) \
+ TEST_VALUES_U_1 (32, UINT32_MAX, (1 << 31)) \
+ TEST_VALUES_U_1 (64, UINT64_MAX, (1ULL << 63)) \
+ TEST_VALUES_U_1 (32, 7, 4) \
+ TEST_VALUES_U_1 (64, 7, 4) \
+ TEST_VALUES_U_1 (32, 7, 3) \
+ TEST_VALUES_U_1 (64, 7, 3) \
+ TEST_VALUES_U_1 (32, 11, 1) \
+ TEST_VALUES_U_1 (64, 11, 1)
+
+#define TEST_VALUES \
+ TEST_VALUES_S \
+ TEST_VALUES_U
+
+int
+main (void)
+{
+ const pred pg = svptrue_b8 ();
+ TEST_TYPES
+ TEST_VALUES
+ return 0;
+}