arm: Fix vcond_mask expander for MVE (PR target/100757)

The problem in this PR is that we call VPSEL with a mask of vector type instead of HImode. This happens because operand 3 in vcond_mask is the pre-computed vector comparison and has vector type. This patch fixes it by implementing TARGET_VECTORIZE_GET_MASK_MODE, returning the appropriate VxBI mode when targeting MVE. In turn, this implies implementing vec_cmp<mode><MVE_vpred>, vec_cmpu<mode><MVE_vpred> and vcond_mask_<mode><MVE_vpred>, and we can move vec_cmp<mode><v_cmp_result>, vec_cmpu<mode><mode> and vcond_mask_<mode><v_cmp_result> back to neon.md since they are not used by MVE anymore. The new *<MVE_vpred> patterns listed above are implemented in mve.md since they are only valid for MVE. However this may make maintenance/comparison more painful than having all of them in vec-common.md. In the process, we can get rid of the recently added vcond_mve parameter of arm_expand_vector_compare. Compared to neon.md's vcond_mask_<mode><v_cmp_result> before my "arm: Auto-vectorization for MVE: vcmp" patch (r12-834), it keeps the VDQWH iterator added in r12-835 (to have V4HF/V8HF support), as well as the (!<Is_float_mode> || flag_unsafe_math_optimizations) condition which was not present before r12-834 although SF modes were enabled by VDQW (I think this was a bug). Using TARGET_VECTORIZE_GET_MASK_MODE has the advantage that we no longer need to generate vpsel with vectors of 0 and 1: the masks are now merged via scalar 'ands' instructions operating on 16-bit masks after converting the boolean vectors. In addition, this patch fixes a problem in arm_expand_vcond() where the result would be a vector of 0 or 1 instead of operand 1 or 2. Since we want to skip gcc.dg/signbit-2.c for MVE, we also add a new arm_mve effective target. Reducing the number of iterations in pr100757-3.c from 32 to 8, we generate the code below: float a[32]; float fn1(int d) { float c = 4.0f; for (int b = 0; b < 8; b++) if (a[b] != 2.0f) c = 5.0f; return c; } fn1: ldr r3, .L3+48 vldr.64 d4, .L3 // q2=(2.0,2.0,2.0,2.0) vldr.64 d5, .L3+8 vldrw.32 q0, [r3] // q0=a(0..3) adds r3, r3, #16 vcmp.f32 eq, q0, q2 // cmp a(0..3) == (2.0,2.0,2.0,2.0) vldrw.32 q1, [r3] // q1=a(4..7) vmrs r3, P0 vcmp.f32 eq, q1, q2 // cmp a(4..7) == (2.0,2.0,2.0,2.0) vmrs r2, P0 @ movhi ands r3, r3, r2 // r3=select(a(0..3]) & select(a(4..7)) vldr.64 d4, .L3+16 // q2=(5.0,5.0,5.0,5.0) vldr.64 d5, .L3+24 vmsr P0, r3 vldr.64 d6, .L3+32 // q3=(4.0,4.0,4.0,4.0) vldr.64 d7, .L3+40 vpsel q3, q3, q2 // q3=vcond_mask(4.0,5.0) vmov.32 r2, q3[1] // keep the scalar max vmov.32 r0, q3[3] vmov.32 r3, q3[2] vmov.f32 s11, s12 vmov s15, r2 vmov s14, r3 vmaxnm.f32 s15, s11, s15 vmaxnm.f32 s15, s15, s14 vmov s14, r0 vmaxnm.f32 s15, s15, s14 vmov r0, s15 bx lr .L4: .align 3 .L3: .word 1073741824 // 2.0f .word 1073741824 .word 1073741824 .word 1073741824 .word 1084227584 // 5.0f .word 1084227584 .word 1084227584 .word 1084227584 .word 1082130432 // 4.0f .word 1082130432 .word 1082130432 .word 1082130432 This patch adds tests that trigger an ICE without this fix. The pr100757*.c testcases are derived from gcc.c-torture/compile/20160205-1.c, forcing the use of MVE, and using various types and return values different from 0 and 1 to avoid commonalization with boolean masks. In addition, since we should not need these masks, the tests make sure they are not present. Most of the work of this patch series was carried out while I was working at STMicroelectronics as a Linaro assignee. 2022-02-22 Christophe Lyon <christophe.lyon@arm.com> PR target/100757 gcc/ * config/arm/arm-protos.h (arm_get_mask_mode): New prototype. (arm_expand_vector_compare): Update prototype. * config/arm/arm.cc (TARGET_VECTORIZE_GET_MASK_MODE): New. (arm_vector_mode_supported_p): Add support for VxBI modes. (arm_expand_vector_compare): Remove useless generation of vpsel. (arm_expand_vcond): Fix select operands. (arm_get_mask_mode): New. * config/arm/mve.md (vec_cmp<mode><MVE_vpred>): New. (vec_cmpu<mode><MVE_vpred>): New. (vcond_mask_<mode><MVE_vpred>): New. * config/arm/vec-common.md (vec_cmp<mode><v_cmp_result>) (vec_cmpu<mode><mode, vcond_mask_<mode><v_cmp_result>): Move to ... * config/arm/neon.md (vec_cmp<mode><v_cmp_result>) (vec_cmpu<mode><mode, vcond_mask_<mode><v_cmp_result>): ... here and disable for MVE. * doc/sourcebuild.texi (arm_mve): Document new effective-target. gcc/testsuite/ PR target/100757 * gcc.target/arm/simd/pr100757-2.c: New. * gcc.target/arm/simd/pr100757-3.c: New. * gcc.target/arm/simd/pr100757-4.c: New. * gcc.target/arm/simd/pr100757.c: New. * gcc.dg/signbit-2.c: Skip when targeting ARM/MVE. * lib/target-supports.exp (check_effective_target_arm_mve): New.
author: Christophe Lyon <christophe.lyon@arm.com> 2021-10-20 15:30:16 +0000
committer: Christophe Lyon <christophe.lyon@foss.st.com> 2022-02-22 15:55:07 +0000
commit: df0e57c2c032cea0f77f2e68231c035f282b26d6 (patch)
tree: 21d8c9a9003ba83035baaea4f058760da0e64036 /gcc
parent: 91224cf625dc90304bb515a0cc602beed48fe3da (diff)
download: gcc-df0e57c2c032cea0f77f2e68231c035f282b26d6.zip
gcc-df0e57c2c032cea0f77f2e68231c035f282b26d6.tar.gz
gcc-df0e57c2c032cea0f77f2e68231c035f282b26d6.tar.bz2
12 files changed, 227 insertions, 130 deletions
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index f2f7ca6..9d14209 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -204,6 +204,7 @@ extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
 extern bool arm_pad_reg_upward (machine_mode, tree, int);
 #endif
 extern int arm_apply_result_size (void);
+extern opt_machine_mode arm_get_mask_mode (machine_mode mode);
 
 #endif /* RTX_CODE */
 
@@ -380,7 +381,7 @@ extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx,
 extern bool arm_fusion_enabled_p (tune_params::fuse_ops);
 extern bool arm_valid_symbolic_address_p (rtx);
 extern bool arm_validize_comparison (rtx *, rtx *, rtx *);
-extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool, bool);
+extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool);
 #endif /* RTX_CODE */
 
 extern bool arm_gen_setmem (rtx *);
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index df43c67..c1103d9 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -832,6 +832,9 @@ static const struct attribute_spec arm_attribute_table[] =
 
 #undef TARGET_STACK_PROTECT_GUARD
 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
+
+#undef TARGET_VECTORIZE_GET_MASK_MODE
+#define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
 
 /* Obstack for minipool constant handling.  */
 static struct obstack minipool_obstack;
@@ -29286,7 +29289,8 @@ arm_vector_mode_supported_p (machine_mode mode)
 
   if (TARGET_HAVE_MVE
       && (mode == V2DImode || mode == V4SImode || mode == V8HImode
-	  || mode == V16QImode))
+	  || mode == V16QImode
+	  || mode == V16BImode || mode == V8BImode || mode == V4BImode))
       return true;
 
   if (TARGET_HAVE_MVE_FLOAT
@@ -31085,7 +31089,7 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
 }
 
 /* Return the mode for the MVE vector of predicates corresponding to MODE.  */
-machine_mode
+opt_machine_mode
 arm_mode_to_pred_mode (machine_mode mode)
 {
   switch (GET_MODE_NUNITS (mode))
@@ -31094,7 +31098,7 @@ arm_mode_to_pred_mode (machine_mode mode)
     case 8: return V8BImode;
     case 4: return V4BImode;
     }
-  gcc_unreachable ();
+  return opt_machine_mode ();
 }
 
 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
@@ -31102,16 +31106,12 @@ arm_mode_to_pred_mode (machine_mode mode)
    and return true if TARGET contains the inverse.  If !CAN_INVERT,
    always store the result in TARGET, never its inverse.
 
-   If VCOND_MVE, do not emit the vpsel instruction here, let arm_expand_vcond do
-   it with the right destination type to avoid emiting two vpsel, one here and
-   one in arm_expand_vcond.
-
    Note that the handling of floating-point comparisons is not
    IEEE compliant.  */
 
 bool
 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
-			   bool can_invert, bool vcond_mve)
+			   bool can_invert)
 {
   machine_mode cmp_result_mode = GET_MODE (target);
   machine_mode cmp_mode = GET_MODE (op0);
@@ -31140,7 +31140,7 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
 	       and then store its inverse in TARGET.  This avoids reusing
 	       TARGET (which for integer NE could be one of the inputs).  */
 	    rtx tmp = gen_reg_rtx (cmp_result_mode);
-	    if (arm_expand_vector_compare (tmp, code, op0, op1, true, vcond_mve))
+	    if (arm_expand_vector_compare (tmp, code, op0, op1, true))
 	      gcc_unreachable ();
 	    emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
 	    return false;
@@ -31176,36 +31176,22 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
     case NE:
       if (TARGET_HAVE_MVE)
 	{
-	  rtx vpr_p0;
-	  if (vcond_mve)
-	    vpr_p0 = target;
-	  else
-	    vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode));
-
 	  switch (GET_MODE_CLASS (cmp_mode))
 	    {
 	    case MODE_VECTOR_INT:
-	      emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1)));
+	      emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
+					op0, force_reg (cmp_mode, op1)));
 	      break;
 	    case MODE_VECTOR_FLOAT:
 	      if (TARGET_HAVE_MVE_FLOAT)
-		emit_insn (gen_mve_vcmpq_f (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1)));
+		emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
+					    op0, force_reg (cmp_mode, op1)));
 	      else
 		gcc_unreachable ();
 	      break;
 	    default:
 	      gcc_unreachable ();
 	    }
-
-	  /* If we are not expanding a vcond, build the result here.  */
-	  if (!vcond_mve)
-	    {
-	      rtx zero = gen_reg_rtx (cmp_result_mode);
-	      rtx one = gen_reg_rtx (cmp_result_mode);
-	      emit_move_insn (zero, CONST0_RTX (cmp_result_mode));
-	      emit_move_insn (one, CONST1_RTX (cmp_result_mode));
-	      emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0));
-	    }
 	}
       else
 	emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
@@ -31217,23 +31203,8 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
     case GEU:
     case GTU:
       if (TARGET_HAVE_MVE)
-	{
-	  rtx vpr_p0;
-	  if (vcond_mve)
-	    vpr_p0 = target;
-	  else
-	    vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode));
-
-	  emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1)));
-	  if (!vcond_mve)
-	    {
-	      rtx zero = gen_reg_rtx (cmp_result_mode);
-	      rtx one = gen_reg_rtx (cmp_result_mode);
-	      emit_move_insn (zero, CONST0_RTX (cmp_result_mode));
-	      emit_move_insn (one, CONST1_RTX (cmp_result_mode));
-	      emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0));
-	    }
-	}
+	emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
+				  op0, force_reg (cmp_mode, op1)));
       else
 	emit_insn (gen_neon_vc (code, cmp_mode, target,
 				op0, force_reg (cmp_mode, op1)));
@@ -31244,23 +31215,8 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
     case LEU:
     case LTU:
       if (TARGET_HAVE_MVE)
-	{
-	  rtx vpr_p0;
-	  if (vcond_mve)
-	    vpr_p0 = target;
-	  else
-	    vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode));
-
-	  emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, vpr_p0, force_reg (cmp_mode, op1), op0));
-	  if (!vcond_mve)
-	    {
-	      rtx zero = gen_reg_rtx (cmp_result_mode);
-	      rtx one = gen_reg_rtx (cmp_result_mode);
-	      emit_move_insn (zero, CONST0_RTX (cmp_result_mode));
-	      emit_move_insn (one, CONST1_RTX (cmp_result_mode));
-	      emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0));
-	    }
-	}
+	emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
+				  force_reg (cmp_mode, op1), op0));
       else
 	emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
 				target, force_reg (cmp_mode, op1), op0));
@@ -31275,8 +31231,8 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
 	rtx gt_res = gen_reg_rtx (cmp_result_mode);
 	rtx alt_res = gen_reg_rtx (cmp_result_mode);
 	rtx_code alt_code = (code == LTGT ? LT : LE);
-	if (arm_expand_vector_compare (gt_res, GT, op0, op1, true, vcond_mve)
-	    || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true, vcond_mve))
+	if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
+	    || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
 	  gcc_unreachable ();
 	emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
 						     gt_res, alt_res)));
@@ -31296,19 +31252,15 @@ arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
 {
   /* When expanding for MVE, we do not want to emit a (useless) vpsel in
      arm_expand_vector_compare, and another one here.  */
-  bool vcond_mve=false;
   rtx mask;
 
   if (TARGET_HAVE_MVE)
-    {
-      vcond_mve=true;
-      mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode));
-    }
+    mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
   else
     mask = gen_reg_rtx (cmp_result_mode);
 
   bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
-					     operands[4], operands[5], true, vcond_mve);
+					     operands[4], operands[5], true);
   if (inverted)
     std::swap (operands[1], operands[2]);
   if (TARGET_NEON)
@@ -31316,20 +31268,20 @@ arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
 			    mask, operands[1], operands[2]));
   else
     {
-      machine_mode cmp_mode = GET_MODE (operands[4]);
-      rtx vpr_p0 = mask;
-      rtx zero = gen_reg_rtx (cmp_mode);
-      rtx one = gen_reg_rtx (cmp_mode);
-      emit_move_insn (zero, CONST0_RTX (cmp_mode));
-      emit_move_insn (one, CONST1_RTX (cmp_mode));
+      machine_mode cmp_mode = GET_MODE (operands[0]);
+
       switch (GET_MODE_CLASS (cmp_mode))
 	{
 	case MODE_VECTOR_INT:
-	  emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, operands[0], one, zero, vpr_p0));
+	  emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0],
+				     operands[1], operands[2], mask));
 	  break;
 	case MODE_VECTOR_FLOAT:
 	  if (TARGET_HAVE_MVE_FLOAT)
-	    emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0], one, zero, vpr_p0));
+	    emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0],
+					 operands[1], operands[2], mask));
+	  else
+	    gcc_unreachable ();
 	  break;
 	default:
 	  gcc_unreachable ();
@@ -34251,4 +34203,15 @@ arm_mode_base_reg_class (machine_mode mode)
 
 struct gcc_target targetm = TARGET_INITIALIZER;
 
+/* Implement TARGET_VECTORIZE_GET_MASK_MODE.  */
+
+opt_machine_mode
+arm_get_mask_mode (machine_mode mode)
+{
+  if (TARGET_HAVE_MVE)
+    return arm_mode_to_pred_mode (mode);
+
+  return default_get_mask_mode (mode);
+}
+
 #include "gt-arm.h"
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 983aa10..d0c3100 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -10527,3 +10527,54 @@
       operands[1] = force_reg (<MODE>mode, operands[1]);
   }
 )
+
+;; Expanders for vec_cmp and vcond
+
+(define_expand "vec_cmp<mode><MVE_vpred>"
+  [(set (match_operand:<MVE_VPRED> 0 "s_register_operand")
+	(match_operator:<MVE_VPRED> 1 "comparison_operator"
+	  [(match_operand:MVE_VLD_ST 2 "s_register_operand")
+	   (match_operand:MVE_VLD_ST 3 "reg_or_zero_operand")]))]
+  "TARGET_HAVE_MVE
+   && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+			     operands[2], operands[3], false);
+  DONE;
+})
+
+(define_expand "vec_cmpu<mode><MVE_vpred>"
+  [(set (match_operand:<MVE_VPRED> 0 "s_register_operand")
+	(match_operator:<MVE_VPRED> 1 "comparison_operator"
+	  [(match_operand:MVE_2 2 "s_register_operand")
+	   (match_operand:MVE_2 3 "reg_or_zero_operand")]))]
+  "TARGET_HAVE_MVE"
+{
+  arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+			     operands[2], operands[3], false);
+  DONE;
+})
+
+(define_expand "vcond_mask_<mode><MVE_vpred>"
+  [(set (match_operand:MVE_VLD_ST 0 "s_register_operand")
+	(if_then_else:MVE_VLD_ST
+	  (match_operand:<MVE_VPRED> 3 "s_register_operand")
+	  (match_operand:MVE_VLD_ST 1 "s_register_operand")
+	  (match_operand:MVE_VLD_ST 2 "s_register_operand")))]
+  "TARGET_HAVE_MVE"
+{
+  switch (GET_MODE_CLASS (<MODE>mode))
+    {
+      case MODE_VECTOR_INT:
+	emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0],
+				   operands[1], operands[2], operands[3]));
+	break;
+      case MODE_VECTOR_FLOAT:
+	emit_insn (gen_mve_vpselq_f (<MODE>mode, operands[0],
+				     operands[1], operands[2], operands[3]));
+	break;
+      default:
+	gcc_unreachable ();
+    }
+  DONE;
+})
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 2b9a3de..f270ded 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -1394,6 +1394,45 @@
   [(set_attr "type" "neon_qsub<q>")]
 )
 
+(define_expand "vec_cmp<mode><v_cmp_result>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
+	(match_operator:<V_cmp_result> 1 "comparison_operator"
+	  [(match_operand:VDQWH 2 "s_register_operand")
+	   (match_operand:VDQWH 3 "reg_or_zero_operand")]))]
+  "TARGET_NEON
+   && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+			     operands[2], operands[3], false);
+  DONE;
+})
+
+(define_expand "vec_cmpu<mode><mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand")
+	(match_operator:VDQIW 1 "comparison_operator"
+	  [(match_operand:VDQIW 2 "s_register_operand")
+	   (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
+  "TARGET_NEON"
+{
+  arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+			     operands[2], operands[3], false);
+  DONE;
+})
+
+(define_expand "vcond_mask_<mode><v_cmp_result>"
+  [(set (match_operand:VDQWH 0 "s_register_operand")
+	(if_then_else:VDQWH
+	  (match_operand:<V_cmp_result> 3 "s_register_operand")
+	  (match_operand:VDQWH 1 "s_register_operand")
+	  (match_operand:VDQWH 2 "s_register_operand")))]
+  "TARGET_NEON
+   && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1],
+				  operands[2]));
+  DONE;
+})
+
 ;; Patterns for builtins.
 
 ; good for plain vadd, vaddq.
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 2718d82..f130090 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -363,33 +363,6 @@
     }
 })
 
-(define_expand "vec_cmp<mode><v_cmp_result>"
-  [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
-	(match_operator:<V_cmp_result> 1 "comparison_operator"
-	  [(match_operand:VDQWH 2 "s_register_operand")
-	   (match_operand:VDQWH 3 "reg_or_zero_operand")]))]
-  "ARM_HAVE_<MODE>_ARITH
-   && !TARGET_REALLY_IWMMXT
-   && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
-{
-  arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
-			     operands[2], operands[3], false, false);
-  DONE;
-})
-
-(define_expand "vec_cmpu<mode><mode>"
-  [(set (match_operand:VDQIW 0 "s_register_operand")
-	(match_operator:VDQIW 1 "comparison_operator"
-	  [(match_operand:VDQIW 2 "s_register_operand")
-	   (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
-  "ARM_HAVE_<MODE>_ARITH
-   && !TARGET_REALLY_IWMMXT"
-{
-  arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
-			     operands[2], operands[3], false, false);
-  DONE;
-})
-
 ;; Conditional instructions.  These are comparisons with conditional moves for
 ;; vectors.  They perform the assignment:
 ;;
@@ -461,31 +434,6 @@
   DONE;
 })
 
-(define_expand "vcond_mask_<mode><v_cmp_result>"
-  [(set (match_operand:VDQWH 0 "s_register_operand")
-        (if_then_else:VDQWH
-          (match_operand:<V_cmp_result> 3 "s_register_operand")
-          (match_operand:VDQWH 1 "s_register_operand")
-          (match_operand:VDQWH 2 "s_register_operand")))]
-  "ARM_HAVE_<MODE>_ARITH
-   && !TARGET_REALLY_IWMMXT
-   && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
-{
-  if (TARGET_NEON)
-    {
-      emit_insn (gen_neon_vbsl (<MODE>mode, operands[0], operands[3],
-                                operands[1], operands[2]));
-    }
-  else if (TARGET_HAVE_MVE)
-    {
-      emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0],
-                                 operands[1], operands[2], operands[3]));
-    }
-  else
-    gcc_unreachable ();
-  DONE;
-})
-
 (define_expand "vec_load_lanesoi<mode>"
   [(set (match_operand:OI 0 "s_register_operand")
         (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 2d128f4..613ac29 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2236,6 +2236,10 @@ ARM target supports the @code{-mfloat-abi=softfp} option.
 @anchor{arm_hard_ok}
 ARM target supports the @code{-mfloat-abi=hard} option.
 
+@item arm_mve
+@anchor{arm_mve}
+ARM target supports generating MVE instructions.
+
 @item arm_v8_1_lob_ok
 @anchor{arm_v8_1_lob_ok}
 ARM Target supports executing the Armv8.1-M Mainline Low Overhead Loop
diff --git a/gcc/testsuite/gcc.dg/signbit-2.c b/gcc/testsuite/gcc.dg/signbit-2.c
index b609f67..2f2dc44 100644
--- a/gcc/testsuite/gcc.dg/signbit-2.c
+++ b/gcc/testsuite/gcc.dg/signbit-2.c
@@ -4,6 +4,7 @@
 /* This test does not work when the truth type does not match vector type.  */
 /* { dg-additional-options "-mno-avx512f" { target { i?86-*-* x86_64-*-* } } } */
 /* { dg-additional-options "-march=armv8-a" { target aarch64_sve } } */
+/* { dg-skip-if "no fallback for MVE" { arm_mve } } */
 
 #include <stdint.h>
 
diff --git a/gcc/testsuite/gcc.target/arm/simd/pr100757-2.c b/gcc/testsuite/gcc.target/arm/simd/pr100757-2.c
new file mode 100644
index 0000000..c2262b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/pr100757-2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */
+/* Derived from gcc.c-torture/compile/20160205-1.c.  */
+
+float a[32];
+int fn1(int d) {
+  int c = 4;
+  for (int b = 0; b < 32; b++)
+    if (a[b] != 2.0f)
+      c = 5;
+  return c;
+}
+
+/* { dg-final { scan-assembler-times {\t.word\t1073741824\n} 4 } } */ /* Constant 2.0f.  */
+/* { dg-final { scan-assembler-times {\t.word\t4\n} 4 } } */ /* Initial value for c.  */
+/* { dg-final { scan-assembler-times {\t.word\t5\n} 4 } } */ /* Possible value for c.  */
+/* { dg-final { scan-assembler-not {\t.word\t1\n} } } */ /* 'true' mask.  */
+/* { dg-final { scan-assembler-not {\t.word\t0\n} } } */ /* 'false' mask.  */
diff --git a/gcc/testsuite/gcc.target/arm/simd/pr100757-3.c b/gcc/testsuite/gcc.target/arm/simd/pr100757-3.c
new file mode 100644
index 0000000..e604555
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/pr100757-3.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */
+/* Copied from gcc.c-torture/compile/20160205-1.c.  */
+
+float a[32];
+float fn1(int d) {
+  float c = 4.0f;
+  for (int b = 0; b < 32; b++)
+    if (a[b] != 2.0f)
+      c = 5.0f;
+  return c;
+}
+
+/* { dg-final { scan-assembler-times {\t.word\t1073741824\n} 4 } } */ /* Constant 2.0f.  */
+/* { dg-final { scan-assembler-times {\t.word\t1084227584\n} 4 } } */ /* Initial value for c (4.0).  */
+/* { dg-final { scan-assembler-times {\t.word\t1082130432\n} 4 } } */ /* Possible value for c (5.0).  */
+/* { dg-final { scan-assembler-not {\t.word\t1\n} } } */ /* 'true' mask.  */
+/* { dg-final { scan-assembler-not {\t.word\t0\n} } } */ /* 'false' mask.  */
diff --git a/gcc/testsuite/gcc.target/arm/simd/pr100757-4.c b/gcc/testsuite/gcc.target/arm/simd/pr100757-4.c
new file mode 100644
index 0000000..c12040c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/pr100757-4.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O3" } */
+/* Derived from gcc.c-torture/compile/20160205-1.c.  */
+
+unsigned int a[32];
+int fn1(int d) {
+  int c = 2;
+  for (int b = 0; b < 32; b++)
+    if (a[b])
+      c = 3;
+  return c;
+}
+
+/* { dg-final { scan-assembler-times {\t.word\t0\n} 4 } } */ /* 'false' mask.  */
+/* { dg-final { scan-assembler-not {\t.word\t1\n} } } */ /* 'true' mask.  */
+/* { dg-final { scan-assembler-times {\t.word\t2\n} 4 } } */ /* Initial value for c.  */
+/* { dg-final { scan-assembler-times {\t.word\t3\n} 4 } } */ /* Possible value for c.  */
diff --git a/gcc/testsuite/gcc.target/arm/simd/pr100757.c b/gcc/testsuite/gcc.target/arm/simd/pr100757.c
new file mode 100644
index 0000000..41d6e4e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/pr100757.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O3" } */
+/* Derived from gcc.c-torture/compile/20160205-1.c.  */
+
+int a[32];
+int fn1(int d) {
+  int c = 2;
+  for (int b = 0; b < 32; b++)
+    if (a[b])
+      c = 3;
+  return c;
+}
+
+/* { dg-final { scan-assembler-times {\t.word\t0\n} 4 } } */ /* 'false' mask.  */
+/* { dg-final { scan-assembler-not {\t.word\t1\n} } } */ /* 'true' mask.  */
+/* { dg-final { scan-assembler-times {\t.word\t2\n} 4 } } */ /* Initial value for c.  */
+/* { dg-final { scan-assembler-times {\t.word\t3\n} 4 } } */ /* Possible value for c.  */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 972400f..737e1a8 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -5235,6 +5235,18 @@ proc check_effective_target_arm_hard_ok { } {
 	} "-mfloat-abi=hard"]
 }
 
+# Return 1 if this is an ARM target supporting MVE.
+proc check_effective_target_arm_mve { } {
+    if { ![istarget arm*-*-*] } {
+	return 0
+    }
+    return [check_no_compiler_messages arm_mve assembly {
+	#if !defined (__ARM_FEATURE_MVE)
+	#error FOO
+	#endif
+    }]
+}
+
 # Return 1 if the target supports ARMv8.1-M MVE with floating point
 # instructions, 0 otherwise.  The test is valid for ARM.
 # Record the command line options needed.
author	Christophe Lyon <christophe.lyon@arm.com>	2021-10-20 15:30:16 +0000
committer	Christophe Lyon <christophe.lyon@foss.st.com>	2022-02-22 15:55:07 +0000
commit	df0e57c2c032cea0f77f2e68231c035f282b26d6 (patch)
tree	21d8c9a9003ba83035baaea4f058760da0e64036 /gcc
parent	91224cf625dc90304bb515a0cc602beed48fe3da (diff)
download	gcc-df0e57c2c032cea0f77f2e68231c035f282b26d6.zip gcc-df0e57c2c032cea0f77f2e68231c035f282b26d6.tar.gz gcc-df0e57c2c032cea0f77f2e68231c035f282b26d6.tar.bz2