aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorChristophe Lyon <christophe.lyon@arm.com>2024-06-25 15:47:23 +0200
committerChristophe Lyon <christophe.lyon@arm.com>2024-10-16 22:02:54 +0200
commit74caf97572d84c7c4503d10773e0f8e8544c50d9 (patch)
tree4d5fe2235dd29f82f47c02c06b0e80f821dc866e /gcc
parent79dae32843854dacfff22f059a71b5a657d7c96f (diff)
downloadgcc-74caf97572d84c7c4503d10773e0f8e8544c50d9.zip
gcc-74caf97572d84c7c4503d10773e0f8e8544c50d9.tar.gz
gcc-74caf97572d84c7c4503d10773e0f8e8544c50d9.tar.bz2
arm: [MVE intrinsics] Improve vdupq_n implementation
This patch makes the non-predicated vdupq_n MVE intrinsics use vec_duplicate rather than an unspec. This enables the compiler to generate better code sequences (for instance using vmov when possible). The patch renames the existing mve_vdup<mode> pattern into @mve_vdupq_n<mode>, and removes the now useless @mve_<mve_insn>q_n_f<mode> and @mve_<mve_insn>q_n_<supf><mode> ones. As a side-effect, it needs to update the mve_unpredicated_insn predicates in @mve_<mve_insn>q_m_n_<supf><mode> and @mve_<mve_insn>q_m_n_f<mode>. Using vec_duplicates means the compiler is now able to use vmov in the tests with an immediate argument in vdupq_n_[su]{8,16,32}.c: vmov.i8 q0,#0x1 However, this is only possible when the immediate has a suitable value (MVE encoding constraints, see imm_for_neon_mov_operand predicate). Provided we adjust the cost computations in arm_rtx_costs_internal(), when the immediate does not meet the vmov constraints, we now generate: mov r0, #imm vdup.xx q0,r0 or ldr r0, .L4 vdup.32 q0,r0 in the f32 case (with 1.1 as immediate). Without the cost adjustment, we would generate: vldr.64 d0, .L4 vldr.64 d1, .L4+8 and an associated literal pool entry. Regarding the testsuite updates: -------------------------------- * The signed versions of vdupq_* tests lack a version with an immediate argument. This patch adds them, similar to what we already have for vdupq_n_u*.c tests. * Code generation for different immediate values is checked with the new tests this patch introduces. Note there's no need for s8/u8 tests because 8-bit immediates always comply wth imm_for_neon_mov_operand. * We can remove xfail from vcmp*f tests since we now generate: movw r3, #15462 vcmp.f16 eq, q0, r3 instead of the previous: vldr.64 d6, .L5 vldr.64 d7, .L5+8 vcmp.f16 eq, q0, q3 Tested on arm-linux-gnueabihf and arm-none-eabi with no regression. 2024-07-02 Jolen Li <jolen.li@arm.com> Christophe Lyon <christophe.lyon@arm.com> gcc/ * config/arm/arm-mve-builtins-base.cc (vdupq_impl): New class. (vdupq): Use new implementation. * config/arm/arm.cc (arm_rtx_costs_internal): Handle HFmode for COST_DOUBLE. Update costing for CONST_VECTOR. * config/arm/arm_mve_builtins.def: Merge vdupq_n_f, vdupq_n_s and vdupq_n_u into vdupq_n. * config/arm/mve.md (mve_vdup<mode>): Rename into ... (@mve_vdup_n<mode>): ... this. (@mve_<mve_insn>q_n_f<mode>): Delete. (@mve_<mve_insn>q_n_<supf><mode>): Delete.. (@mve_<mve_insn>q_m_n_<supf><mode>): Update mve_unpredicated_insn attribute. (@mve_<mve_insn>q_m_n_f<mode>): Likewise. gcc/testsuite/ * gcc.target/arm/mve/intrinsics/vdupq_n_u8.c (foo1): Update expected code. * gcc.target/arm/mve/intrinsics/vdupq_n_u16.c (foo1): Likewise. * gcc.target/arm/mve/intrinsics/vdupq_n_u32.c (foo1): Likewise. * gcc.target/arm/mve/intrinsics/vdupq_n_s8.c: Add test with immediate argument. * gcc.target/arm/mve/intrinsics/vdupq_n_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vdupq_n_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vdupq_n_f16.c (foo1): Update expected code. * gcc.target/arm/mve/intrinsics/vdupq_n_f32.c (foo1): Likewise. * gcc.target/arm/mve/intrinsics/vdupq_m_n_s16.c: Add test with immediate argument. * gcc.target/arm/mve/intrinsics/vdupq_m_n_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vdupq_m_n_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vdupq_x_n_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vdupq_x_n_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vdupq_x_n_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vdupq_n_f32-2.c: New test. * gcc.target/arm/mve/intrinsics/vdupq_n_s16-2.c: New test. * gcc.target/arm/mve/intrinsics/vdupq_n_s32-2.c: New test. * gcc.target/arm/mve/intrinsics/vdupq_n_u16-2.c: New test. * gcc.target/arm/mve/intrinsics/vdupq_n_u32-2.c: New test. * gcc.target/arm/mve/intrinsics/vcmpeqq_n_f16.c: Remove xfail. * gcc.target/arm/mve/intrinsics/vcmpeqq_n_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpgeq_n_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpgeq_n_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpgtq_n_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpgtq_n_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpleq_n_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpleq_n_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpltq_n_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpltq_n_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpneq_n_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpneq_n_f32.c: Likewise.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/arm/arm-mve-builtins-base.cc55
-rw-r--r--gcc/config/arm/arm.cc10
-rw-r--r--gcc/config/arm/arm_mve_builtins.def4
-rw-r--r--gcc/config/arm/mve.md41
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_n_f16.c4
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_n_f32.c4
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_n_f16.c4
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_n_f32.c4
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_n_f16.c2
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_n_f32.c4
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_n_f16.c4
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_n_f32.c4
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_n_f16.c4
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_n_f32.c4
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_n_f16.c4
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_n_f32.c4
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_m_n_s16.c18
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_m_n_s32.c18
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_m_n_s8.c18
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_f16.c3
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_f32-2.c29
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_f32.c5
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s16-2.c30
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s16.c14
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s32-2.c30
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s32.c14
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s8.c14
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u16-2.c30
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u16.c4
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u32-2.c30
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u32.c4
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u8.c4
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_x_n_s16.c18
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_x_n_s32.c18
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_x_n_s8.c18
35 files changed, 394 insertions, 81 deletions
diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index e0ae593..be0f9c2 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -39,6 +39,59 @@ using namespace arm_mve;
namespace {
+/* Implements vdup_* intrinsics. */
+class vdupq_impl : public quiet<function_base>
+{
+public:
+ CONSTEXPR vdupq_impl (int unspec_for_m_n_sint,
+ int unspec_for_m_n_uint,
+ int unspec_for_m_n_fp)
+ : m_unspec_for_m_n_sint (unspec_for_m_n_sint),
+ m_unspec_for_m_n_uint (unspec_for_m_n_uint),
+ m_unspec_for_m_n_fp (unspec_for_m_n_fp)
+ {}
+ int m_unspec_for_m_n_sint;
+ int m_unspec_for_m_n_uint;
+ int m_unspec_for_m_n_fp;
+
+ rtx expand (function_expander &e) const override
+ {
+ gcc_assert (e.mode_suffix_id == MODE_n);
+
+ insn_code code;
+ machine_mode mode = e.vector_mode (0);
+
+ switch (e.pred)
+ {
+ case PRED_none:
+ /* No predicate, _n suffix. */
+ code = code_for_mve_vdupq_n (mode);
+ return e.use_exact_insn (code);
+
+ case PRED_m:
+ case PRED_x:
+ /* "m" or "x" predicate, _n suffix. */
+ if (e.type_suffix (0).integer_p)
+ if (e.type_suffix (0).unsigned_p)
+ code = code_for_mve_q_m_n (m_unspec_for_m_n_uint,
+ m_unspec_for_m_n_uint, mode);
+ else
+ code = code_for_mve_q_m_n (m_unspec_for_m_n_sint,
+ m_unspec_for_m_n_sint, mode);
+ else
+ code = code_for_mve_q_m_n_f (m_unspec_for_m_n_fp, mode);
+
+ if (e.pred == PRED_m)
+ return e.use_cond_insn (code, 0);
+ else
+ return e.use_pred_x_insn (code);
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+};
+
/* Implements vreinterpretq_* intrinsics. */
class vreinterpretq_impl : public quiet<function_base>
{
@@ -339,7 +392,7 @@ FUNCTION (vcmpltq, unspec_based_mve_function_exact_insn_vcmp, (LT, UNKNOWN, LT,
FUNCTION (vcmpcsq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GEU, UNKNOWN, UNKNOWN, VCMPCSQ_M_U, UNKNOWN, UNKNOWN, VCMPCSQ_M_N_U, UNKNOWN))
FUNCTION (vcmphiq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GTU, UNKNOWN, UNKNOWN, VCMPHIQ_M_U, UNKNOWN, UNKNOWN, VCMPHIQ_M_N_U, UNKNOWN))
FUNCTION_WITHOUT_M_N (vcreateq, VCREATEQ)
-FUNCTION_ONLY_N (vdupq, VDUPQ)
+FUNCTION (vdupq, vdupq_impl, (VDUPQ_M_N_S, VDUPQ_M_N_U, VDUPQ_M_N_F))
FUNCTION_WITH_RTX_M (veorq, XOR, VEORQ)
FUNCTION (vfmaq, unspec_mve_function_exact_insn, (-1, -1, VFMAQ_F, -1, -1, VFMAQ_N_F, -1, -1, VFMAQ_M_F, -1, -1, VFMAQ_M_N_F))
FUNCTION (vfmasq, unspec_mve_function_exact_insn, (-1, -1, -1, -1, -1, VFMASQ_N_F, -1, -1, -1, -1, -1, VFMASQ_M_N_F))
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 50dd005..0d32b70 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -11911,7 +11911,7 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
case CONST_DOUBLE:
if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
- && (mode == SFmode || !TARGET_VFP_SINGLE))
+ && (mode == SFmode || mode == HFmode || !TARGET_VFP_SINGLE))
{
if (vfp3_const_double_rtx (x))
{
@@ -11936,12 +11936,18 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
return true;
case CONST_VECTOR:
- /* Fixme. */
if (((TARGET_NEON && TARGET_HARD_FLOAT
&& (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
|| TARGET_HAVE_MVE)
&& simd_immediate_valid_for_move (x, mode, NULL, NULL))
*cost = COSTS_N_INSNS (1);
+ else if (TARGET_HAVE_MVE)
+ {
+ /* 128-bit vector requires two vldr.64 on MVE. */
+ *cost = COSTS_N_INSNS (2);
+ if (speed_p)
+ *cost += extra_cost->ldst.loadd * 2;
+ }
else
*cost = COSTS_N_INSNS (4);
return true;
diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def
index f141aab..dd99a90 100644
--- a/gcc/config/arm/arm_mve_builtins.def
+++ b/gcc/config/arm/arm_mve_builtins.def
@@ -27,7 +27,7 @@ VAR2 (UNOP_NONE_NONE, vrndmq_f, v8hf, v4sf)
VAR2 (UNOP_NONE_NONE, vrndaq_f, v8hf, v4sf)
VAR2 (UNOP_NONE_NONE, vrev64q_f, v8hf, v4sf)
VAR2 (UNOP_NONE_NONE, vnegq_f, v8hf, v4sf)
-VAR2 (UNOP_NONE_NONE, vdupq_n_f, v8hf, v4sf)
+VAR5 (UNOP_NONE_NONE, vdupq_n, v8hf, v4sf, v16qi, v8hi, v4si)
VAR2 (UNOP_NONE_NONE, vabsq_f, v8hf, v4sf)
VAR1 (UNOP_NONE_NONE, vrev32q_f, v8hf)
VAR1 (UNOP_NONE_NONE, vcvttq_f32_f16, v4sf)
@@ -39,7 +39,6 @@ VAR3 (UNOP_SNONE_SNONE, vqnegq_s, v16qi, v8hi, v4si)
VAR3 (UNOP_SNONE_SNONE, vqabsq_s, v16qi, v8hi, v4si)
VAR3 (UNOP_SNONE_SNONE, vnegq_s, v16qi, v8hi, v4si)
VAR3 (UNOP_SNONE_SNONE, vmvnq_s, v16qi, v8hi, v4si)
-VAR3 (UNOP_SNONE_SNONE, vdupq_n_s, v16qi, v8hi, v4si)
VAR3 (UNOP_SNONE_SNONE, vclzq_s, v16qi, v8hi, v4si)
VAR3 (UNOP_SNONE_SNONE, vclsq_s, v16qi, v8hi, v4si)
VAR3 (UNOP_SNONE_SNONE, vaddvq_s, v16qi, v8hi, v4si)
@@ -57,7 +56,6 @@ VAR1 (UNOP_SNONE_SNONE, vrev16q_s, v16qi)
VAR1 (UNOP_SNONE_SNONE, vaddlvq_s, v4si)
VAR3 (UNOP_UNONE_UNONE, vrev64q_u, v16qi, v8hi, v4si)
VAR3 (UNOP_UNONE_UNONE, vmvnq_u, v16qi, v8hi, v4si)
-VAR3 (UNOP_UNONE_UNONE, vdupq_n_u, v16qi, v8hi, v4si)
VAR3 (UNOP_UNONE_UNONE, vclzq_u, v16qi, v8hi, v4si)
VAR3 (UNOP_UNONE_UNONE, vaddvq_u, v16qi, v8hi, v4si)
VAR2 (UNOP_UNONE_UNONE, vrev32q_u, v16qi, v8hi)
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 3f01bc1..7ace8b1 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -94,13 +94,16 @@
(set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*")
(set_attr "neg_pool_range" "*,*,*,*,996,*,*,*")])
-(define_insn "mve_vdup<mode>"
+;;
+;; [vdupq_n_u, vdupq_n_s, vdupq_n_f]
+;;
+(define_insn "@mve_vdupq_n<mode>"
[(set (match_operand:MVE_VLD_ST 0 "s_register_operand" "=w")
(vec_duplicate:MVE_VLD_ST
(match_operand:<V_elem> 1 "s_register_operand" "r")))]
"TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT"
"vdup.<V_sz_elem>\t%q0, %1"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vdup<mode>"))
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vdupq_n<mode>"))
(set_attr "length" "4")
(set_attr "type" "mve_move")])
@@ -189,21 +192,6 @@
])
;;
-;; [vdupq_n_f])
-;;
-(define_insn "@mve_<mve_insn>q_n_f<mode>"
- [
- (set (match_operand:MVE_0 0 "s_register_operand" "=w")
- (unspec:MVE_0 [(match_operand:<V_elem> 1 "s_register_operand" "r")]
- MVE_FP_N_VDUPQ_ONLY))
- ]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "<mve_insn>.%#<V_sz_elem>\t%q0, %1"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_f<mode>"))
- (set_attr "type" "mve_move")
-])
-
-;;
;; [vrev32q_f])
;;
(define_insn "@mve_<mve_insn>q_f<mode>"
@@ -329,21 +317,6 @@
)
;;
-;; [vdupq_n_u, vdupq_n_s])
-;;
-(define_insn "@mve_<mve_insn>q_n_<supf><mode>"
- [
- (set (match_operand:MVE_2 0 "s_register_operand" "=w")
- (unspec:MVE_2 [(match_operand:<V_elem> 1 "s_register_operand" "r")]
- VDUPQ_N))
- ]
- "TARGET_HAVE_MVE"
- "<mve_insn>.%#<V_sz_elem>\t%q0, %1"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
- (set_attr "type" "mve_move")
-])
-
-;;
;; [vclzq_u, vclzq_s])
;;
(define_insn "@mve_vclzq_s<mode>"
@@ -1903,7 +1876,7 @@
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n<mode>"))
(set_attr "type" "mve_move")
(set_attr "length""8")])
@@ -2317,7 +2290,7 @@
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_f<mode>"))
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n<mode>"))
(set_attr "type" "mve_move")
(set_attr "length""8")])
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_n_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_n_f16.c
index 2f84d75..335e511 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_n_f16.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_n_f16.c
@@ -39,7 +39,7 @@ foo1 (float16x8_t a, float16_t b)
}
/*
-**foo2: { xfail *-*-* }
+**foo2:
** ...
** vcmp.f16 eq, q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
** ...
@@ -56,4 +56,4 @@ foo2 (float16x8_t a)
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_n_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_n_f32.c
index 6cfe733..e5c16be 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_n_f32.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_n_f32.c
@@ -39,7 +39,7 @@ foo1 (float32x4_t a, float32_t b)
}
/*
-**foo2: { xfail *-*-* }
+**foo2:
** ...
** vcmp.f32 eq, q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
** ...
@@ -56,4 +56,4 @@ foo2 (float32x4_t a)
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_n_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_n_f16.c
index 978bd7d..47d5486 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_n_f16.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_n_f16.c
@@ -39,7 +39,7 @@ foo1 (float16x8_t a, float16_t b)
}
/*
-**foo2: { xfail *-*-* }
+**foo2:
** ...
** vcmp.f16 ge, q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
** ...
@@ -56,4 +56,4 @@ foo2 (float16x8_t a)
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_n_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_n_f32.c
index 66b6d8b..1b775ea 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_n_f32.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_n_f32.c
@@ -39,7 +39,7 @@ foo1 (float32x4_t a, float32_t b)
}
/*
-**foo2: { xfail *-*-* }
+**foo2:
** ...
** vcmp.f32 ge, q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
** ...
@@ -56,4 +56,4 @@ foo2 (float32x4_t a)
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_n_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_n_f16.c
index 9c5f1f2..89d8e2b 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_n_f16.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_n_f16.c
@@ -39,7 +39,7 @@ foo1 (float16x8_t a, float16_t b)
}
/*
-**foo2: { xfail *-*-* }
+**foo2:
** ...
** vcmp.f16 gt, q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
** ...
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_n_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_n_f32.c
index 2723aa7..a5510e8 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_n_f32.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_n_f32.c
@@ -39,7 +39,7 @@ foo1 (float32x4_t a, float32_t b)
}
/*
-**foo2: { xfail *-*-* }
+**foo2:
** ...
** vcmp.f32 gt, q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
** ...
@@ -56,4 +56,4 @@ foo2 (float32x4_t a)
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_n_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_n_f16.c
index 1d1f4bf..c94b3119 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_n_f16.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_n_f16.c
@@ -39,7 +39,7 @@ foo1 (float16x8_t a, float16_t b)
}
/*
-**foo2: { xfail *-*-* }
+**foo2:
** ...
** vcmp.f16 le, q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
** ...
@@ -56,4 +56,4 @@ foo2 (float16x8_t a)
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_n_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_n_f32.c
index bf77a80..80e2cfa 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_n_f32.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_n_f32.c
@@ -39,7 +39,7 @@ foo1 (float32x4_t a, float32_t b)
}
/*
-**foo2: { xfail *-*-* }
+**foo2:
** ...
** vcmp.f32 le, q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
** ...
@@ -56,4 +56,4 @@ foo2 (float32x4_t a)
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_n_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_n_f16.c
index f9f091c..c3a1064 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_n_f16.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_n_f16.c
@@ -39,7 +39,7 @@ foo1 (float16x8_t a, float16_t b)
}
/*
-**foo2: { xfail *-*-* }
+**foo2:
** ...
** vcmp.f16 lt, q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
** ...
@@ -56,4 +56,4 @@ foo2 (float16x8_t a)
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_n_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_n_f32.c
index d22ea1a..b485f75 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_n_f32.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_n_f32.c
@@ -39,7 +39,7 @@ foo1 (float32x4_t a, float32_t b)
}
/*
-**foo2: { xfail *-*-* }
+**foo2:
** ...
** vcmp.f32 lt, q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
** ...
@@ -56,4 +56,4 @@ foo2 (float32x4_t a)
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_n_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_n_f16.c
index 83beca9..1156caa 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_n_f16.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_n_f16.c
@@ -39,7 +39,7 @@ foo1 (float16x8_t a, float16_t b)
}
/*
-**foo2: { xfail *-*-* }
+**foo2:
** ...
** vcmp.f16 ne, q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
** ...
@@ -56,4 +56,4 @@ foo2 (float16x8_t a)
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_n_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_n_f32.c
index abe1abf..c3ffbd1 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_n_f32.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_n_f32.c
@@ -39,7 +39,7 @@ foo1 (float32x4_t a, float32_t b)
}
/*
-**foo2: { xfail *-*-* }
+**foo2:
** ...
** vcmp.f32 ne, q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
** ...
@@ -56,4 +56,4 @@ foo2 (float32x4_t a)
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_m_n_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_m_n_s16.c
index bf05c73..dbbf854 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_m_n_s16.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_m_n_s16.c
@@ -42,8 +42,24 @@ foo1 (int16x8_t inactive, int16_t a, mve_pred16_t p)
return vdupq_m (inactive, a, p);
}
+/*
+**foo2:
+** ...
+** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|)
+** ...
+** vpst(?: @.*|)
+** ...
+** vdupt.16 q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
+** ...
+*/
+int16x8_t
+foo2 (int16x8_t inactive, mve_pred16_t p)
+{
+ return vdupq_m (inactive, 1, p);
+}
+
#ifdef __cplusplus
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_m_n_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_m_n_s32.c
index 71789bb..613b5d3 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_m_n_s32.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_m_n_s32.c
@@ -42,8 +42,24 @@ foo1 (int32x4_t inactive, int32_t a, mve_pred16_t p)
return vdupq_m (inactive, a, p);
}
+/*
+**foo2:
+** ...
+** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|)
+** ...
+** vpst(?: @.*|)
+** ...
+** vdupt.32 q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
+** ...
+*/
+int32x4_t
+foo2 (int32x4_t inactive, mve_pred16_t p)
+{
+ return vdupq_m (inactive, 1, p);
+}
+
#ifdef __cplusplus
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_m_n_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_m_n_s8.c
index 48c4fbd1..a1ff48e 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_m_n_s8.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_m_n_s8.c
@@ -42,8 +42,24 @@ foo1 (int8x16_t inactive, int8_t a, mve_pred16_t p)
return vdupq_m (inactive, a, p);
}
+/*
+**foo2:
+** ...
+** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|)
+** ...
+** vpst(?: @.*|)
+** ...
+** vdupt.8 q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
+** ...
+*/
+int8x16_t
+foo2 (int8x16_t inactive, mve_pred16_t p)
+{
+ return vdupq_m (inactive, 1, p);
+}
+
#ifdef __cplusplus
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_f16.c
index 4411219..f9aae2f 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_f16.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_f16.c
@@ -24,6 +24,7 @@ foo (float16_t a)
/*
**foo1:
** ...
+** movw r[0-9]+, #15462
** vdup.16 q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
** ...
*/
@@ -37,4 +38,4 @@ foo1 ()
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_f32-2.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_f32-2.c
new file mode 100644
index 0000000..a4b0022
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_f32-2.c
@@ -0,0 +1,29 @@
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_mve.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ /* Test with a constant that fits in vmov. */
+/*
+**foo1:
+** ...
+** vmov.f32 q[0-9]+, #0.0 .*
+** ...
+*/
+float32x4_t
+foo1 ()
+{
+ return vdupq_n_f32 (0);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_f32.c
index 059e3e4..81c2e29 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_f32.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_f32.c
@@ -24,7 +24,8 @@ foo (float32_t a)
/*
**foo1:
** ...
-** vdup.32 q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
+** ldr r[0-9]+, .L.*
+** vdup.32 q[0-9]+, r[0-9]+
** ...
*/
float32x4_t
@@ -37,4 +38,4 @@ foo1 ()
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s16-2.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s16-2.c
new file mode 100644
index 0000000..3fedbb1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s16-2.c
@@ -0,0 +1,30 @@
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_mve.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ /* Test with a constant that does not fit in vmov. */
+/*
+**foo1:
+** ...
+** mov r[0-9]+, #1000(?: @.*|)
+** vdup.16 q[0-9]+, r[0-9]+
+** ...
+*/
+int16x8_t
+foo1 ()
+{
+ return vdupq_n_s16 (1000);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s16.c
index d8ba299..f274607 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s16.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s16.c
@@ -21,8 +21,20 @@ foo (int16_t a)
return vdupq_n_s16 (a);
}
+/*
+**foo1:
+** ...
+** vmov.i16 q[0-9]+, (#0x1) (?:@.*|)
+** ...
+*/
+int16x8_t
+foo1 ()
+{
+ return vdupq_n_s16 (1);
+}
+
#ifdef __cplusplus
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s32-2.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s32-2.c
new file mode 100644
index 0000000..3a4d32e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s32-2.c
@@ -0,0 +1,30 @@
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_mve.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ /* Test with a constant that does not fit in vmov. */
+/*
+**foo1:
+** ...
+** mov r[0-9]+, #1000
+** vdup.32 q[0-9]+, r[0-9]+
+** ...
+*/
+int32x4_t
+foo1 ()
+{
+ return vdupq_n_s32 (1000);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s32.c
index a81c6d1..7f75eca2 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s32.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s32.c
@@ -21,8 +21,20 @@ foo (int32_t a)
return vdupq_n_s32 (a);
}
+/*
+**foo1:
+** ...
+** vmov.i32 q[0-9]+, (#0x1) (?:@.*|)
+** ...
+*/
+int32x4_t
+foo1 ()
+{
+ return vdupq_n_s32 (1);
+}
+
#ifdef __cplusplus
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s8.c
index b0bac4f..454ff5a 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s8.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_s8.c
@@ -21,8 +21,20 @@ foo (int8_t a)
return vdupq_n_s8 (a);
}
+/*
+**foo1:
+** ...
+** vmov.i8 q[0-9]+, (#0x1) (?:@.*|)
+** ...
+*/
+int8x16_t
+foo1 ()
+{
+ return vdupq_n_s8 (1);
+}
+
#ifdef __cplusplus
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u16-2.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u16-2.c
new file mode 100644
index 0000000..accd7fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u16-2.c
@@ -0,0 +1,30 @@
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_mve.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ /* Test with a constant that does not fit in vmov. */
+/*
+**foo1:
+** ...
+** mov r[0-9]+, #1000(?: @.*|)
+** vdup.16 q[0-9]+, r[0-9]+
+** ...
+*/
+uint16x8_t
+foo1 ()
+{
+ return vdupq_n_u16 (1000);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u16.c
index 55e0a60..4accb64 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u16.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u16.c
@@ -24,7 +24,7 @@ foo (uint16_t a)
/*
**foo1:
** ...
-** vdup.16 q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
+** vmov.i16 q[0-9]+, (#0x1) (?:@.*|)
** ...
*/
uint16x8_t
@@ -37,4 +37,4 @@ foo1 ()
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u32-2.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u32-2.c
new file mode 100644
index 0000000..03ea23c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u32-2.c
@@ -0,0 +1,30 @@
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_mve.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ /* Test with a constant that does not fit in vmov. */
+/*
+**foo1:
+** ...
+** mov r[0-9]+, #1000
+** vdup.32 q[0-9]+, r[0-9]+
+** ...
+*/
+uint32x4_t
+foo1 ()
+{
+ return vdupq_n_u32 (1000);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u32.c
index bf73bc1..d08a94c 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u32.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u32.c
@@ -24,7 +24,7 @@ foo (uint32_t a)
/*
**foo1:
** ...
-** vdup.32 q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
+** vmov.i32 q[0-9]+, (#0x1) (?:@.*|)
** ...
*/
uint32x4_t
@@ -37,4 +37,4 @@ foo1 ()
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u8.c
index 48cbdb2..f1fcd4a 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u8.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_n_u8.c
@@ -24,7 +24,7 @@ foo (uint8_t a)
/*
**foo1:
** ...
-** vdup.8 q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
+** vmov.i8 q[0-9]+, (#0x1) (?:@.*|)
** ...
*/
uint8x16_t
@@ -37,4 +37,4 @@ foo1 ()
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_x_n_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_x_n_s16.c
index 6756502..9dcfe4e 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_x_n_s16.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_x_n_s16.c
@@ -25,8 +25,24 @@ foo (int16_t a, mve_pred16_t p)
return vdupq_x_n_s16 (a, p);
}
+/*
+**foo1:
+** ...
+** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|)
+** ...
+** vpst(?: @.*|)
+** ...
+** vdupt.16 q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
+** ...
+*/
+int16x8_t
+foo1 (mve_pred16_t p)
+{
+ return vdupq_x_n_s16 (1, p);
+}
+
#ifdef __cplusplus
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_x_n_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_x_n_s32.c
index b04afb3..eacdb2e 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_x_n_s32.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_x_n_s32.c
@@ -25,8 +25,24 @@ foo (int32_t a, mve_pred16_t p)
return vdupq_x_n_s32 (a, p);
}
+/*
+**foo1:
+** ...
+** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|)
+** ...
+** vpst(?: @.*|)
+** ...
+** vdupt.32 q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
+** ...
+*/
+int32x4_t
+foo1 (mve_pred16_t p)
+{
+ return vdupq_x_n_s32 (1, p);
+}
+
#ifdef __cplusplus
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_x_n_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_x_n_s8.c
index b23facd..8951f74 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_x_n_s8.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vdupq_x_n_s8.c
@@ -25,8 +25,24 @@ foo (int8_t a, mve_pred16_t p)
return vdupq_x_n_s8 (a, p);
}
+/*
+**foo1:
+** ...
+** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|)
+** ...
+** vpst(?: @.*|)
+** ...
+** vdupt.8 q[0-9]+, (?:ip|fp|r[0-9]+)(?: @.*|)
+** ...
+*/
+int8x16_t
+foo1 (mve_pred16_t p)
+{
+ return vdupq_x_n_s8 (1, p);
+}
+
#ifdef __cplusplus
}
#endif
-/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */