aarch64: Support SVE comparisons for unpacked integers

This patch adds support for comparing unpacked SVE integer vectors, such as byte elements stored in the bottom bytes of halfword containers. It also adds support for selects between unpacked SVE vectors (both integer and floating-point), since selects and compares are closely tied via the vcond optab interface. gcc/ * config/aarch64/aarch64-sve.md (@vcond_mask_<mode><vpred>): Extend from SVE_FULL to SVE_ALL. (*vcond_mask_<mode><vpred>): Likewise. (@aarch64_sel_dup<mode>): Likewise. (vcond<SVE_FULL:mode><v_int_equiv>): Extend to... (vcond<SVE_ALL:mode><SVE_I:mode>): ...this, but requiring the sizes of the container modes to match. (vcondu<SVE_FULL:mode><v_int_equiv>): Extend to... (vcondu<SVE_ALL:mode><SVE_I:mode>): ...this. (vec_cmp<SVE_FULL_I:mode><vpred>): Extend to... (vec_cmp<SVE_I:mode><vpred>): ...this. (vec_cmpu<SVE_FULL_I:mode><vpred>): Extend to... (vec_cmpu<SVE_I:mode><vpred>): ...this. (@aarch64_pred_cmp<cmp_op><SVE_FULL_I:mode>): Extend to... (@aarch64_pred_cmp<cmp_op><SVE_I:mode>): ...this. (*cmp<cmp_op><SVE_FULL_I:mode>_cc): Extend to... (*cmp<cmp_op><SVE_I:mode>_cc): ...this. (*cmp<cmp_op><SVE_FULL_I:mode>_ptest): Extend to... (*cmp<cmp_op><SVE_I:mode>_ptest): ...this. (*cmp<cmp_op><SVE_FULL_I:mode>_and): Extend to... (*cmp<cmp_op><SVE_I:mode>_and): ...this. gcc/testsuite/ * gcc.target/aarch64/sve/cmp_1.c: New test. * gcc.target/aarch64/sve/cmp_2.c: Likewise. * gcc.target/aarch64/sve/cond_arith_1.c: Add --param aarch64-sve-compare-costs=0 * gcc.target/aarch64/sve/cond_arith_1_run.c: Likewise. * gcc.target/aarch64/sve/cond_arith_3.c: Likewise. * gcc.target/aarch64/sve/cond_arith_3_run.c: Likewise. * gcc.target/aarch64/sve/mask_gather_load_7.c: Likewise. * gcc.target/aarch64/sve/mask_load_slp_1.c: Likewise. * gcc.target/aarch64/sve/vcond_11.c: Likewise. * gcc.target/aarch64/sve/vcond_11_run.c: Likewise.
author: Richard Sandiford <richard.sandiford@arm.com> 2020-11-11 11:42:46 +0000
committer: Richard Sandiford <richard.sandiford@arm.com> 2020-11-11 11:42:46 +0000
commit: 46c705e70e078f6a1920d92e49042125d5e18495 (patch)
tree: 9b22d8160985c14baf881d68e981f7ee32c9497f /gcc
parent: e29dd0eb733f4b9ae03e44322c7fbe8b51eff0a4 (diff)
download: gcc-46c705e70e078f6a1920d92e49042125d5e18495.zip
gcc-46c705e70e078f6a1920d92e49042125d5e18495.tar.gz
gcc-46c705e70e078f6a1920d92e49042125d5e18495.tar.bz2
11 files changed, 216 insertions, 50 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 4b0a1eb..455b025 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -7379,11 +7379,11 @@
 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
 ;; SEL operand order:        mask, true, false
 (define_expand "@vcond_mask_<mode><vpred>"
-  [(set (match_operand:SVE_FULL 0 "register_operand")
-	(unspec:SVE_FULL
+  [(set (match_operand:SVE_ALL 0 "register_operand")
+	(unspec:SVE_ALL
 	  [(match_operand:<VPRED> 3 "register_operand")
-	   (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm")
-	   (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
+	   (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
   {
@@ -7396,12 +7396,25 @@
 ;; - two registers
 ;; - a duplicated immediate and a register
 ;; - a duplicated immediate and zero
+;;
+;; For unpacked vectors, it doesn't really matter whether SEL uses the
+;; the container size or the element size.  If SEL used the container size,
+;; it would ignore undefined bits of the predicate but would copy the
+;; upper (undefined) bits of each container along with the defined bits.
+;; If SEL used the element size, it would use undefined bits of the predicate
+;; to select between undefined elements in each input vector.  Thus the only
+;; difference is whether the undefined bits in a container always come from
+;; the same input as the defined bits, or whether the choice can vary
+;; independently of the defined bits.
+;;
+;; For the other instructions, using the element size is more natural,
+;; so we do that for SEL as well.
 (define_insn "*vcond_mask_<mode><vpred>"
-  [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
-	(unspec:SVE_FULL
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
+	(unspec:SVE_ALL
 	  [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl")
-	   (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
-	   (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
+	   (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
+	   (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
 	  UNSPEC_SEL))]
   "TARGET_SVE
    && (!register_operand (operands[1], <MODE>mode)
@@ -7422,12 +7435,12 @@
 ;; of GPRs as being more expensive than duplicates of FPRs, since they
 ;; involve a cross-file move.
 (define_insn "@aarch64_sel_dup<mode>"
-  [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w")
-	(unspec:SVE_FULL
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w")
+	(unspec:SVE_ALL
 	  [(match_operand:<VPRED> 3 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
-	   (vec_duplicate:SVE_FULL
+	   (vec_duplicate:SVE_ALL
 	     (match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w"))
-	   (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")]
+	   (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
   "@
@@ -7448,34 +7461,34 @@
 
 ;; Integer (signed) vcond.  Don't enforce an immediate range here, since it
 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
-(define_expand "vcond<mode><v_int_equiv>"
-  [(set (match_operand:SVE_FULL 0 "register_operand")
-	(if_then_else:SVE_FULL
+(define_expand "vcond<SVE_ALL:mode><SVE_I:mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand")
+	(if_then_else:SVE_ALL
 	  (match_operator 3 "comparison_operator"
-	    [(match_operand:<V_INT_EQUIV> 4 "register_operand")
-	     (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
-	  (match_operand:SVE_FULL 1 "nonmemory_operand")
-	  (match_operand:SVE_FULL 2 "nonmemory_operand")))]
-  "TARGET_SVE"
+	    [(match_operand:SVE_I 4 "register_operand")
+	     (match_operand:SVE_I 5 "nonmemory_operand")])
+	  (match_operand:SVE_ALL 1 "nonmemory_operand")
+	  (match_operand:SVE_ALL 2 "nonmemory_operand")))]
+  "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>"
   {
-    aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
+    aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands);
     DONE;
   }
 )
 
 ;; Integer vcondu.  Don't enforce an immediate range here, since it
 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
-(define_expand "vcondu<mode><v_int_equiv>"
-  [(set (match_operand:SVE_FULL 0 "register_operand")
-	(if_then_else:SVE_FULL
+(define_expand "vcondu<SVE_ALL:mode><SVE_I:mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand")
+	(if_then_else:SVE_ALL
 	  (match_operator 3 "comparison_operator"
-	    [(match_operand:<V_INT_EQUIV> 4 "register_operand")
-	     (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
-	  (match_operand:SVE_FULL 1 "nonmemory_operand")
-	  (match_operand:SVE_FULL 2 "nonmemory_operand")))]
-  "TARGET_SVE"
+	    [(match_operand:SVE_I 4 "register_operand")
+	     (match_operand:SVE_I 5 "nonmemory_operand")])
+	  (match_operand:SVE_ALL 1 "nonmemory_operand")
+	  (match_operand:SVE_ALL 2 "nonmemory_operand")))]
+  "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>"
   {
-    aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
+    aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands);
     DONE;
   }
 )
@@ -7520,8 +7533,8 @@
   [(parallel
     [(set (match_operand:<VPRED> 0 "register_operand")
 	  (match_operator:<VPRED> 1 "comparison_operator"
-	    [(match_operand:SVE_FULL_I 2 "register_operand")
-	     (match_operand:SVE_FULL_I 3 "nonmemory_operand")]))
+	    [(match_operand:SVE_I 2 "register_operand")
+	     (match_operand:SVE_I 3 "nonmemory_operand")]))
      (clobber (reg:CC_NZC CC_REGNUM))])]
   "TARGET_SVE"
   {
@@ -7538,8 +7551,8 @@
   [(parallel
     [(set (match_operand:<VPRED> 0 "register_operand")
 	  (match_operator:<VPRED> 1 "comparison_operator"
-	    [(match_operand:SVE_FULL_I 2 "register_operand")
-	     (match_operand:SVE_FULL_I 3 "nonmemory_operand")]))
+	    [(match_operand:SVE_I 2 "register_operand")
+	     (match_operand:SVE_I 3 "nonmemory_operand")]))
      (clobber (reg:CC_NZC CC_REGNUM))])]
   "TARGET_SVE"
   {
@@ -7550,14 +7563,38 @@
 )
 
 ;; Predicated integer comparisons.
+;;
+;; For unpacked vectors, only the lowpart element in each input container
+;; has a defined value, and only the predicate bits associated with
+;; those elements are defined.  For example, when comparing two VNx2SIs:
+;;
+;; - The VNx2SIs can be seem as VNx2DIs in which the low halves of each
+;;   DI container store an SI element.  The upper bits of each DI container
+;;   are undefined.
+;;
+;; - Alternatively, the VNx2SIs can be seen as VNx4SIs in which the
+;;   even elements are defined and the odd elements are undefined.
+;;
+;; - The associated predicate mode is VNx2BI.  This means that only the
+;;   low bit in each predicate byte is defined (on input and on output).
+;;
+;; - We use a .s comparison to compare VNx2SIs, under the control of a
+;;   VNx2BI governing predicate, to produce a VNx2BI result.  If we view
+;;   the .s operation as operating on VNx4SIs then for odd lanes:
+;;
+;;   - the input governing predicate bit is undefined
+;;   - the SI elements being compared are undefined
+;;   - the predicate result bit is therefore undefined, but
+;;   - the predicate result bit is in the undefined part of a VNx2BI,
+;;     so its value doesn't matter anyway.
 (define_insn "@aarch64_pred_cmp<cmp_op><mode>"
   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
 	(unspec:<VPRED>
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
 	   (SVE_INT_CMP:<VPRED>
-	     (match_operand:SVE_FULL_I 3 "register_operand" "w, w")
-	     (match_operand:SVE_FULL_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+	     (match_operand:SVE_I 3 "register_operand" "w, w")
+	     (match_operand:SVE_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
 	  UNSPEC_PRED_Z))
    (clobber (reg:CC_NZC CC_REGNUM))]
   "TARGET_SVE"
@@ -7578,8 +7615,8 @@
 	     [(match_operand 6)
 	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
 	      (SVE_INT_CMP:<VPRED>
-		(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
-		(match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+		(match_operand:SVE_I 2 "register_operand" "w, w")
+		(match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
 	     UNSPEC_PRED_Z)]
 	  UNSPEC_PTEST))
    (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
@@ -7614,8 +7651,8 @@
 	     [(match_operand 6)
 	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
 	      (SVE_INT_CMP:<VPRED>
-		(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
-		(match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+		(match_operand:SVE_I 2 "register_operand" "w, w")
+		(match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
 	     UNSPEC_PRED_Z)]
 	  UNSPEC_PTEST))
    (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
@@ -7642,8 +7679,8 @@
 	    [(match_operand 4)
 	     (const_int SVE_KNOWN_PTRUE)
 	     (SVE_INT_CMP:<VPRED>
-	       (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
-	       (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+	       (match_operand:SVE_I 2 "register_operand" "w, w")
+	       (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
 	    UNSPEC_PRED_Z)
 	  (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
    (clobber (reg:CC_NZC CC_REGNUM))]
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cmp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cmp_1.c
new file mode 100644
index 0000000..7cf66c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cmp_1.c
@@ -0,0 +1,57 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_PAIR(TYPE1, TYPE2)				\
+  void							\
+  f_##TYPE1##_##TYPE2 (TYPE1 *restrict x,		\
+		       TYPE2 *restrict g, int n)	\
+  {							\
+    for (int i = 0; i < n; ++i)				\
+      if (g[i] < 4)					\
+	x[i] += 1;					\
+  }
+
+#define TEST_SINGLE(TYPE)			\
+  TEST_PAIR (TYPE, int8_t)			\
+  TEST_PAIR (TYPE, uint8_t)			\
+  TEST_PAIR (TYPE, int16_t)			\
+  TEST_PAIR (TYPE, uint16_t)			\
+  TEST_PAIR (TYPE, int32_t)			\
+  TEST_PAIR (TYPE, uint32_t)			\
+  TEST_PAIR (TYPE, int64_t)			\
+  TEST_PAIR (TYPE, uint64_t)
+
+TEST_SINGLE (int8_t)
+TEST_SINGLE (uint8_t)
+TEST_SINGLE (int16_t)
+TEST_SINGLE (uint16_t)
+TEST_SINGLE (int32_t)
+TEST_SINGLE (uint32_t)
+TEST_SINGLE (int64_t)
+TEST_SINGLE (uint64_t)
+
+/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b,} 8 } } */
+/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.h,} 8 } } */
+/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s,} 8 } } */
+/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h,} 16 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 8 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s,} 24 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 32 } } */
+
+/* { dg-final { scan-assembler-times {\tcmpl[et]\tp[0-9]+\.b,} 8 } } */
+/* { dg-final { scan-assembler-times {\tcmpl[so]\tp[0-9]+\.b,} 8 } } */
+/* { dg-final { scan-assembler-times {\tcmpl[et]\tp[0-9]+\.h,} 8 } } */
+/* { dg-final { scan-assembler-times {\tcmpl[so]\tp[0-9]+\.h,} 8 } } */
+/* { dg-final { scan-assembler-times {\tcmpl[et]\tp[0-9]+\.s,} 8 } } */
+/* { dg-final { scan-assembler-times {\tcmpl[so]\tp[0-9]+\.s,} 8 } } */
+/* { dg-final { scan-assembler-times {\tcmpl[et]\tp[0-9]+\.d,} 8 } } */
+/* { dg-final { scan-assembler-times {\tcmpl[so]\tp[0-9]+\.d,} 8 } } */
+
+/* { dg-final { scan-assembler-not {\tpunpk} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cmp_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cmp_2.c
new file mode 100644
index 0000000..b221206
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cmp_2.c
@@ -0,0 +1,72 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_PAIR(TYPE1, TYPE2)					\
+  void								\
+  f_##TYPE1##_##TYPE2 (TYPE1 *restrict x, TYPE1 y, TYPE1 z,	\
+		       TYPE2 *restrict g, TYPE2 h, int n)	\
+  {								\
+    for (int i = 0; i < n; ++i)					\
+      x[i] = g[i] < h ? y : z;					\
+  }
+
+#define TEST_SINGLE(TYPE)			\
+  TEST_PAIR (TYPE, int8_t)			\
+  TEST_PAIR (TYPE, uint8_t)			\
+  TEST_PAIR (TYPE, int16_t)			\
+  TEST_PAIR (TYPE, uint16_t)			\
+  TEST_PAIR (TYPE, int32_t)			\
+  TEST_PAIR (TYPE, uint32_t)			\
+  TEST_PAIR (TYPE, int64_t)			\
+  TEST_PAIR (TYPE, uint64_t)
+
+TEST_SINGLE (int8_t)
+TEST_SINGLE (uint8_t)
+TEST_SINGLE (int16_t)
+TEST_SINGLE (uint16_t)
+TEST_SINGLE (int32_t)
+TEST_SINGLE (uint32_t)
+TEST_SINGLE (float)
+TEST_SINGLE (int64_t)
+TEST_SINGLE (uint64_t)
+TEST_SINGLE (double)
+
+/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b,} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.h,} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s,} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h,} 8 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s,} 14 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 20 } } */
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 4 } } */
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.h,} 4 } } */
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 4 } } */
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.d,} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 8 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 4 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.d,} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 18 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.d,} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 24 } } */
+
+/* { dg-final { scan-assembler-times {\tcmp(?:h[is]|l[os])\tp[0-9]+\.b,} 10 } } */
+/* { dg-final { scan-assembler-times {\tcmp[lg][et]\tp[0-9]+\.b,} 10 } } */
+/* { dg-final { scan-assembler-times {\tcmp(?:h[is]|l[os])\tp[0-9]+\.h,} 10 } } */
+/* { dg-final { scan-assembler-times {\tcmp[lg][et]\tp[0-9]+\.h,} 10 } } */
+/* { dg-final { scan-assembler-times {\tcmp(?:h[is]|l[os])\tp[0-9]+\.s,} 10 } } */
+/* { dg-final { scan-assembler-times {\tcmp[lg][et]\tp[0-9]+\.s,} 10 } } */
+/* { dg-final { scan-assembler-times {\tcmp(?:h[is]|l[os])\tp[0-9]+\.d,} 10 } } */
+/* { dg-final { scan-assembler-times {\tcmp[lg][et]\tp[0-9]+\.d,} 10 } } */
+
+/* { dg-final { scan-assembler-not {\tpunpk} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_1.c
index 52138d2..d831e9c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
 
 #include <stdint.h>
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_1_run.c
index 876f98f..5808e0a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_1_run.c
@@ -1,5 +1,5 @@
 /* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
 
 #include "cond_arith_1.c"
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_3.c
index 94eb255..068e0b6 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
 
 #include <stdint.h>
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_3_run.c
index 31457da..d258004 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_3_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_3_run.c
@@ -1,5 +1,5 @@
 /* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
 
 #include "cond_arith_3.c"
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_7.c
index cd2661e..687716e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_7.c
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --save-temps --param aarch64-sve-compare-costs=0" } */
 
 #include <stdint.h>
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_load_slp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_load_slp_1.c
index 78c70b2..a38b92d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/mask_load_slp_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_load_slp_1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
 
 #include <stdint.h>
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_11.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_11.c
index 3c9e340..4efcf3a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/vcond_11.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_11.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --param aarch64-sve-compare-costs=0" } */
 
 #include <stdint.h>
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_11_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_11_run.c
index 9a4edb8..4cbe4a6 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/vcond_11_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_11_run.c
@@ -1,5 +1,5 @@
 /* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --param aarch64-sve-compare-costs=0" } */
 
 #include "vcond_11.c"
author	Richard Sandiford <richard.sandiford@arm.com>	2020-11-11 11:42:46 +0000
committer	Richard Sandiford <richard.sandiford@arm.com>	2020-11-11 11:42:46 +0000
commit	46c705e70e078f6a1920d92e49042125d5e18495 (patch)
tree	9b22d8160985c14baf881d68e981f7ee32c9497f /gcc
parent	e29dd0eb733f4b9ae03e44322c7fbe8b51eff0a4 (diff)
download	gcc-46c705e70e078f6a1920d92e49042125d5e18495.zip gcc-46c705e70e078f6a1920d92e49042125d5e18495.tar.gz gcc-46c705e70e078f6a1920d92e49042125d5e18495.tar.bz2