RISC-V: Refine some codes of riscv-v.cc[NFC]

This patch is NFC patch to refine unreasonable codes I notice. Tested on zvl128b/zvl256b/zvl512b/zvl1024b no regression. Committed. gcc/ChangeLog: * config/riscv/riscv-v.cc (emit_vlmax_gather_insn): Refine codes. (emit_vlmax_masked_gather_mu_insn): Ditto. (modulo_sel_indices): Ditto. (expand_vec_perm): Ditto. (shuffle_generic_patterns): Ditto.
author: Juzhe-Zhong <juzhe.zhong@rivai.ai> 2023-11-23 18:55:03 +0800
committer: Pan Li <pan2.li@intel.com> 2023-11-23 19:25:25 +0800
commit: 97ddebb6b4f6b132b0a8072b26d030077b418963 (patch)
tree: 861c87af9bed0a3147d4f494942be1da3e0f566c
parent: 256d64b3460807f33ff2f70e676801bdcf908c1c (diff)
download: gcc-97ddebb6b4f6b132b0a8072b26d030077b418963.zip
gcc-97ddebb6b4f6b132b0a8072b26d030077b418963.tar.gz
gcc-97ddebb6b4f6b132b0a8072b26d030077b418963.tar.bz2
1 files changed, 18 insertions, 36 deletions
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 7d3e803..24b09c0 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -294,8 +294,6 @@ public:
 	       "vsetvl zero, rs1/imm".  */
 	    poly_uint64 nunits = GET_MODE_NUNITS (vtype_mode);
 	    len = gen_int_mode (nunits, Pmode);
-	    if (!satisfies_constraint_K (len))
-	      len = force_reg (Pmode, len);
 	    vls_p = true;
 	  }
 	else if (can_create_pseudo_p ())
@@ -846,24 +844,6 @@ emit_vlmax_gather_insn (rtx target, rtx op, rtx sel)
     }
   else if (maybe_ne (GET_MODE_SIZE (data_mode), GET_MODE_SIZE (sel_mode)))
     icode = code_for_pred_gatherei16 (data_mode);
-  else if (CONST_VECTOR_P (sel)
-           && GET_MODE_BITSIZE (GET_MODE_INNER (sel_mode)) > 16
-           && riscv_get_v_regno_alignment (data_mode) > 1)
-    {
-      /* If the inner mode of data is not QI or HI and data_lmul > 1,
-         emitting vrgatherei16.vv instruction will lower register
-         pressure.
-         data_mode  sel_mode  ei16
-         RVVM1QI    RVVM1QI   RVVM2HI  not needed
-         RVVM2QI    RVVM2QI   RVVM4HI  not needed
-         RVVM2HI    RVVM2HI   RVVM2HI  not needed
-         RVVM2SI    RVVM2SI   RVVM1HI  need
-         RVVM4SI    RVVM4SI   RVVM2HI  need
-         RVVM8DI    RVVM8DI   RVVM2HI  need */
-      PUT_MODE (sel, get_vector_mode (HImode,
-                GET_MODE_NUNITS (data_mode)).require ());
-      icode = code_for_pred_gatherei16 (data_mode);
-    }
   else
     icode = code_for_pred_gather (data_mode);
   rtx ops[] = {target, op, sel};
@@ -877,13 +857,13 @@ emit_vlmax_masked_gather_mu_insn (rtx target, rtx op, rtx sel, rtx mask)
   insn_code icode;
   machine_mode data_mode = GET_MODE (target);
   machine_mode sel_mode = GET_MODE (sel);
-  if (maybe_ne (GET_MODE_SIZE (data_mode), GET_MODE_SIZE (sel_mode)))
-    icode = code_for_pred_gatherei16 (data_mode);
-  else if (const_vec_duplicate_p (sel, &elt))
+  if (const_vec_duplicate_p (sel, &elt))
     {
       icode = code_for_pred_gather_scalar (data_mode);
       sel = elt;
     }
+  else if (maybe_ne (GET_MODE_SIZE (data_mode), GET_MODE_SIZE (sel_mode)))
+    icode = code_for_pred_gatherei16 (data_mode);
   else
     icode = code_for_pred_gather (data_mode);
   rtx ops[] = {target, mask, target, op, sel};
@@ -2703,15 +2683,21 @@ expand_vec_cmp_float (rtx target, rtx_code code, rtx op0, rtx op1,
   return false;
 }
 
-/* Modulo all SEL indices to ensure they are all in range if [0, MAX_SEL].  */
+/* Modulo all SEL indices to ensure they are all in range if [0, MAX_SEL].
+   MAX_SEL is nunits - 1 if rtx_equal_p (op0, op1). Otherwise, it is
+   2 * nunits - 1.  */
 static rtx
-modulo_sel_indices (rtx sel, poly_uint64 max_sel)
+modulo_sel_indices (rtx op0, rtx op1, rtx sel)
 {
   rtx sel_mod;
   machine_mode sel_mode = GET_MODE (sel);
   poly_uint64 nunits = GET_MODE_NUNITS (sel_mode);
-  /* If SEL is variable-length CONST_VECTOR, we don't need to modulo it.  */
-  if (!nunits.is_constant () && CONST_VECTOR_P (sel))
+  poly_uint64 max_sel = rtx_equal_p (op0, op1) ? nunits - 1 : 2 * nunits - 1;
+  /* If SEL is variable-length CONST_VECTOR, we don't need to modulo it.
+     Or if SEL is constant-length within [0, MAX_SEL], no need to modulo the
+     indice.  */
+  if (CONST_VECTOR_P (sel)
+      && (!nunits.is_constant () || const_vec_all_in_range_p (sel, 0, max_sel)))
     sel_mod = sel;
   else
     {
@@ -2761,9 +2747,7 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
      out-of-range indices, so we need to modulo all the vec_perm indices
      to ensure they are all in range of [0, nunits - 1] when op0 == op1
      or all in range of [0, 2 * nunits - 1] when op0 != op1.  */
-  rtx sel_mod
-    = modulo_sel_indices (sel,
-			  rtx_equal_p (op0, op1) ? nunits - 1 : 2 * nunits - 1);
+  rtx sel_mod = modulo_sel_indices (op0, op1, sel);
 
   /* Check if the two values vectors are the same.  */
   if (rtx_equal_p (op0, op1))
@@ -2772,15 +2756,13 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
       return;
     }
 
-  rtx max_sel = gen_const_vector_dup (sel_mode, 2 * nunits - 1);
-
   /* This following sequence is handling the case that:
      __builtin_shufflevector (vec1, vec2, index...), the index can be any
      value in range of [0, 2 * nunits - 1].  */
   machine_mode mask_mode;
   mask_mode = get_mask_mode (data_mode);
   rtx mask = gen_reg_rtx (mask_mode);
-  max_sel = gen_const_vector_dup (sel_mode, nunits);
+  rtx max_sel = gen_const_vector_dup (sel_mode, nunits);
 
   /* Step 1: generate a mask that should select everything >= nunits into the
    * mask.  */
@@ -3282,15 +3264,15 @@ shuffle_generic_patterns (struct expand_vec_perm_d *d)
 	    return false;
 	}
     }
+  else if (riscv_get_v_regno_alignment (sel_mode) > 1
+	   && GET_MODE_INNER (sel_mode) != HImode)
+    sel_mode = get_vector_mode (HImode, nunits).require ();
 
   /* Success! */
   if (d->testing_p)
     return true;
 
   rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
-  /* 'mov<mode>' generte interleave vector.  */
-  if (!nunits.is_constant ())
-    sel = force_reg (sel_mode, sel);
   /* Some FIXED-VLMAX/VLS vector permutation situations call targethook
      instead of expand vec_perm<mode>, we handle it directly.  */
   expand_vec_perm (d->target, d->op0, d->op1, sel);
author	Juzhe-Zhong <juzhe.zhong@rivai.ai>	2023-11-23 18:55:03 +0800
committer	Pan Li <pan2.li@intel.com>	2023-11-23 19:25:25 +0800
commit	97ddebb6b4f6b132b0a8072b26d030077b418963 (patch)
tree	861c87af9bed0a3147d4f494942be1da3e0f566c
parent	256d64b3460807f33ff2f70e676801bdcf908c1c (diff)
download	gcc-97ddebb6b4f6b132b0a8072b26d030077b418963.zip gcc-97ddebb6b4f6b132b0a8072b26d030077b418963.tar.gz gcc-97ddebb6b4f6b132b0a8072b26d030077b418963.tar.bz2