32 files changed, 659 insertions, 365 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index a20a20c..70303d6 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -759,7 +759,7 @@ bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
 bool aarch64_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT,
 					    HOST_WIDE_INT);
 bool aarch64_const_vec_rnd_cst_p (rtx, rtx);
-bool aarch64_const_vec_rsra_rnd_imm_p (rtx);
+bool aarch64_rnd_imm_p (rtx);
 bool aarch64_constant_address_p (rtx);
 bool aarch64_emit_approx_div (rtx, rtx, rtx);
 bool aarch64_emit_approx_sqrt (rtx, rtx, bool);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 90118c6..4052ca9 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1323,7 +1323,7 @@
 	      (plus:<V2XWIDE>
 		(<SHIFTEXTEND>:<V2XWIDE>
 		  (match_operand:VSDQ_I_DI 2 "register_operand" "w"))
-		(match_operand:<V2XWIDE> 4 "aarch64_simd_rsra_rnd_imm_vec"))
+		(match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
 	      (match_operand:VSDQ_I_DI 3 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>")))
 	  (match_operand:VSDQ_I_DI 1 "register_operand" "0")))]
   "TARGET_SIMD
@@ -6437,7 +6437,7 @@
 	    (plus:<V2XWIDE>
 	      (<SHIFTEXTEND>:<V2XWIDE>
 		(match_operand:VSDQ_I_DI 1 "register_operand" "w"))
-	      (match_operand:<V2XWIDE> 3 "aarch64_simd_rsra_rnd_imm_vec"))
+	      (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
 	    (match_operand:VSDQ_I_DI 2 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>"))))]
   "TARGET_SIMD
    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
@@ -6557,7 +6557,7 @@
 	    (plus:<V2XWIDE>
 	      (<TRUNCEXTEND>:<V2XWIDE>
 	        (match_operand:VQN 1 "register_operand" "w"))
-	      (match_operand:<V2XWIDE> 3 "aarch64_simd_rsra_rnd_imm_vec"))
+	      (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
 	    (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
   "TARGET_SIMD
    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
@@ -6572,7 +6572,7 @@
 	    (plus:<DWI>
 	      (<TRUNCEXTEND>:<DWI>
 	        (match_operand:SD_HSDI 1 "register_operand" "w"))
-	      (match_operand:<DWI> 3 "aarch64_simd_rsra_rnd_imm_vec"))
+	      (match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
 	    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
   "TARGET_SIMD
    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
@@ -6702,7 +6702,7 @@
 		(plus:<V2XWIDE>
 		  (sign_extend:<V2XWIDE>
 		    (match_operand:VQN 1 "register_operand" "w"))
-		  (match_operand:<V2XWIDE> 3 "aarch64_simd_rsra_rnd_imm_vec"))
+		  (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
 		(match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
 	      (match_operand:<V2XWIDE> 4 "aarch64_simd_imm_zero"))
 	    (match_operand:<V2XWIDE> 5 "aarch64_simd_umax_quarter_mode"))))]
@@ -6713,14 +6713,14 @@
 )
 
 (define_insn "aarch64_sqrshrun_n<mode>_insn"
-  [(set (match_operand:<V2XWIDE> 0 "register_operand" "=w")
-	(smin:<V2XWIDE>
-	  (smax:<V2XWIDE>
-	    (ashiftrt:<V2XWIDE>
-	      (plus:<V2XWIDE>
-		(sign_extend:<V2XWIDE>
+  [(set (match_operand:<DWI> 0 "register_operand" "=w")
+	(smin:<DWI>
+	  (smax:<DWI>
+	    (ashiftrt:<DWI>
+	      (plus:<DWI>
+		(sign_extend:<DWI>
 		  (match_operand:SD_HSDI 1 "register_operand" "w"))
-		(match_operand:<V2XWIDE> 3 "aarch64_simd_rsra_rnd_imm_vec"))
+		(match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
 	      (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
 	    (const_int 0))
 	  (const_int <half_mask>)))]
@@ -6736,10 +6736,10 @@
    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
   "TARGET_SIMD"
   {
-    int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
+    int prec = GET_MODE_UNIT_PRECISION (<DWI>mode);
     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
-    rtx rnd = immed_wide_int_const (rnd_wi, <V2XWIDE>mode);
-    rtx dst = gen_reg_rtx (<V2XWIDE>mode);
+    rtx rnd = immed_wide_int_const (rnd_wi, <DWI>mode);
+    rtx dst = gen_reg_rtx (<DWI>mode);
     emit_insn (gen_aarch64_sqrshrun_n<mode>_insn (dst, operands[1], operands[2], rnd));
     emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
     DONE;
@@ -6831,7 +6831,7 @@
 	      (plus:<V2XWIDE>
 		(<TRUNCEXTEND>:<V2XWIDE>
 		  (match_operand:VQN 2 "register_operand" "w"))
-		(match_operand:<V2XWIDE> 4 "aarch64_simd_rsra_rnd_imm_vec"))
+		(match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
 	      (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
   "TARGET_SIMD && !BYTES_BIG_ENDIAN
    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
@@ -6847,7 +6847,7 @@
 	      (plus:<V2XWIDE>
 		(<TRUNCEXTEND>:<V2XWIDE>
 		  (match_operand:VQN 2 "register_operand" "w"))
-		(match_operand:<V2XWIDE> 4 "aarch64_simd_rsra_rnd_imm_vec"))
+		(match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
 	      (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
 	  (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
   "TARGET_SIMD && BYTES_BIG_ENDIAN
@@ -6965,7 +6965,7 @@
 		  (plus:<V2XWIDE>
 		    (sign_extend:<V2XWIDE>
 		      (match_operand:VQN 2 "register_operand" "w"))
-		    (match_operand:<V2XWIDE> 4 "aarch64_simd_rsra_rnd_imm_vec"))
+		    (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
 		  (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
 		(match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
 	      (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))))]
@@ -6985,7 +6985,7 @@
 		  (plus:<V2XWIDE>
 		    (sign_extend:<V2XWIDE>
 		      (match_operand:VQN 2 "register_operand" "w"))
-		    (match_operand:<V2XWIDE> 4 "aarch64_simd_rsra_rnd_imm_vec"))
+		    (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
 		  (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
 		(match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
 	      (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index b99f12c..560e543 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -1929,7 +1929,7 @@ static const struct tune_params ampere1_tunings =
   "32:16",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
-  1,	/* fma_reassoc_width.  */
+  4,	/* fma_reassoc_width.  */
   2,	/* vec_reassoc_width.  */
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
@@ -11761,14 +11761,14 @@ aarch64_extract_vec_duplicate_wide_int (rtx x, wide_int *ret_wi)
   return true;
 }
 
-/* Return true if X is a TImode constant or a constant vector of integer
-   immediates that represent the rounding constant used in the RSRA
-   instructions.
-   The accepted form of the constant is (1 << (C - 1)) where C is within
+/* Return true if X is a scalar or a constant vector of integer
+   immediates that represent the rounding constant used in the fixed-point
+   arithmetic instructions.
+   The accepted form of the constant is (1 << (C - 1)) where C is in the range
    [1, MODE_WIDTH/2].  */
 
 bool
-aarch64_const_vec_rsra_rnd_imm_p (rtx x)
+aarch64_rnd_imm_p (rtx x)
 {
   wide_int rnd_cst;
   if (!aarch64_extract_vec_duplicate_wide_int (x, &rnd_cst))
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index b31ba6e..d5a4a1c 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -626,15 +626,11 @@
   (and (match_code "const_vector")
        (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 64)")))
 
-(define_predicate "aarch64_simd_rsra_rnd_imm_vec"
+;; A constant or vector of constants that represents an integer rounding
+;; constant added during fixed-point arithmetic calculations
+(define_predicate "aarch64_int_rnd_operand"
   (and (match_code "const_vector,const_int,const_wide_int")
-       (match_test "aarch64_const_vec_rsra_rnd_imm_p (op)")))
-
-(define_predicate "aarch64_simd_rshrn_imm_vec"
-  (and (match_code "const_vector")
-       (match_test "aarch64_const_vec_all_same_in_range_p (op, 1,
-				HOST_WIDE_INT_1U
-				<< (GET_MODE_UNIT_BITSIZE  (mode) - 1))")))
+       (match_test "aarch64_rnd_imm_p (op)")))
 
 (define_predicate "aarch64_simd_raddsubhn_imm_vec"
   (and (match_code "const_vector")
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 7bb4d39..9a8d244 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -10234,6 +10234,18 @@ ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
   machine_mode mode1 = insn_data[d->icode].operand[1].mode;
   enum rtx_code comparison = d->comparison;
 
+  /* ptest reg, reg sets the carry flag.  */
+  if (comparison == LTU
+      && (d->code == IX86_BUILTIN_PTESTC
+	  || d->code == IX86_BUILTIN_PTESTC256)
+      && rtx_equal_p (op0, op1))
+    {
+      if (!target)
+	target = gen_reg_rtx (SImode);
+      emit_move_insn (target, const1_rtx);
+      return target;
+    }
+
   if (VECTOR_MODE_P (mode0))
     op0 = safe_vector_operand (op0, mode0);
   if (VECTOR_MODE_P (mode1))
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 2cb0bdd..7f593ce 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -1400,7 +1400,11 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args,
       if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
 	opts->x_ix86_tune_string
 	  = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
-      else if (orig_tune_defaulted)
+      /* If we have explicit arch string and no tune string specified, set
+	 tune_string to NULL and later it will be overriden by arch_string
+	 so target clones can get proper optimization.  */
+      else if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
+	       || orig_tune_defaulted)
 	opts->x_ix86_tune_string = NULL;
 
       /* If fpmath= is not set, and we now have sse2 on 32-bit, use it.  */
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 32851a5..0761965 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -21423,16 +21423,23 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
       else if (XINT (x, 1) == UNSPEC_PTEST)
 	{
 	  *total = cost->sse_op;
-	  if (XVECLEN (x, 0) == 2
-	      && GET_CODE (XVECEXP (x, 0, 0)) == AND)
+	  rtx test_op0 = XVECEXP (x, 0, 0);
+	  if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
+	    return false;
+	  if (GET_CODE (test_op0) == AND)
 	    {
-	      rtx andop = XVECEXP (x, 0, 0);
-	      *total += rtx_cost (XEXP (andop, 0), GET_MODE (andop),
-				  AND, opno, speed)
-			+ rtx_cost (XEXP (andop, 1), GET_MODE (andop),
-				    AND, opno, speed);
-	      return true;
+	      rtx and_op0 = XEXP (test_op0, 0);
+	      if (GET_CODE (and_op0) == NOT)
+		and_op0 = XEXP (and_op0, 0);
+	      *total += rtx_cost (and_op0, GET_MODE (and_op0),
+				  AND, 0, speed)
+			+ rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
+				    AND, 1, speed);
 	    }
+	  else
+	    *total = rtx_cost (test_op0, GET_MODE (test_op0),
+			       UNSPEC, 0, speed);
+	  return true;
 	}
       return false;
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 95a6653c..15c0310 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -11380,6 +11380,8 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "QI")])
 
+;; *andqi_ext<mode>_3 is defined via *<code>qi_ext<mode>_3 below.
+
 ;; Convert wide AND instructions with immediate operand to shorter QImode
 ;; equivalents when possible.
 ;; Don't do the splitting with memory operands, since it introduces risk
@@ -12092,6 +12094,26 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "QI")])
 
+(define_insn "*<code>qi_ext<mode>_3"
+  [(set (zero_extract:SWI248
+	  (match_operand 0 "int248_register_operand" "+Q")
+	  (const_int 8)
+	  (const_int 8))
+	(zero_extract:SWI248
+	  (any_logic:SWI248
+	    (match_operand 1 "int248_register_operand" "%0")
+	    (match_operand 2 "int248_register_operand" "Q"))
+	  (const_int 8)
+	  (const_int 8)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   && (rtx_equal_p (operands[0], operands[1])
+       || rtx_equal_p (operands[0], operands[2]))"
+  "<logic>{b}\t{%h2, %h0|%h0, %h2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI")])
+
 ;; Convert wide OR instructions with immediate operand to shorter QImode
 ;; equivalents when possible.
 ;; Don't do the splitting with memory operands, since it introduces risk
@@ -12206,6 +12228,18 @@
    (set_attr "type" "alu")
    (set_attr "mode" "QI")])
 
+;; Peephole2 rega = 0; rega op= regb into rega = regb.
+(define_peephole2
+  [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
+		   (const_int 0))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 0)
+		   (any_or_plus:SWI (match_dup 0)
+				    (match_operand:SWI 1 "<general_operand>")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(set (match_dup 0) (match_dup 1))])
+		
 ;; Split DST = (HI<<32)|LO early to minimize register usage.
 (define_insn_and_split "*concat<mode><dwi>3_1"
   [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
@@ -13365,6 +13399,28 @@
   [(const_int 0)]
   "ix86_split_ashl (operands, operands[3], <DWI>mode); DONE;")
 
+(define_insn_and_split "*ashl<dwi>3_doubleword_highpart"
+  [(set (match_operand:<DWI> 0 "register_operand" "=r")
+	(ashift:<DWI>
+	  (any_extend:<DWI> (match_operand:DWIH 1 "nonimmediate_operand" "rm"))
+	  (match_operand:QI 2 "const_int_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "INTVAL (operands[2]) >= <MODE_SIZE> * BITS_PER_UNIT
+   && INTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT * 2"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[3]);
+  int bits = INTVAL (operands[2]) - (<MODE_SIZE> * BITS_PER_UNIT);
+  if (!rtx_equal_p (operands[3], operands[1]))
+    emit_move_insn (operands[3], operands[1]);
+  if (bits > 0)
+    emit_insn (gen_ashl<mode>3 (operands[3], operands[3], GEN_INT (bits)));
+  ix86_expand_clear (operands[0]);
+  DONE;
+})
+
 (define_insn "x86_64_shld"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (ashift:DI (match_dup 0)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index f793258..3b50c71 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1465,12 +1465,12 @@
 })
 
 (define_insn "*<avx512>_load<mode>_mask"
-  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
-	(vec_merge:VI12_AVX512VL
-	  (unspec:VI12_AVX512VL
-	    [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
+  [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v")
+	(vec_merge:VI12HFBF_AVX512VL
+	  (unspec:VI12HFBF_AVX512VL
+	    [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")]
 	    UNSPEC_MASKLOAD)
-	  (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
+	  (match_operand:VI12HFBF_AVX512VL 2 "nonimm_or_0_operand" "0C")
 	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
   "TARGET_AVX512BW"
   "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
@@ -1479,9 +1479,9 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn_and_split "*<avx512>_load<mode>"
-  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
-	(unspec:VI12_AVX512VL
-	  [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
+  [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v")
+	(unspec:VI12HFBF_AVX512VL
+	  [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")]
 	  UNSPEC_MASKLOAD))]
   "TARGET_AVX512BW"
   "#"
@@ -23490,6 +23490,70 @@
   [(set (reg:CCZ FLAGS_REG)
 	(unspec:CCZ [(match_dup 0) (match_dup 1)] UNSPEC_PTEST))])
 
+;; ptest reg,reg sets the carry flag.
+(define_split
+  [(set (reg:CCC FLAGS_REG)
+	(unspec:CCC [(match_operand:V_AVX 0 "register_operand")
+		     (match_operand:V_AVX 1 "register_operand")]
+		    UNSPEC_PTEST))]
+  "TARGET_SSE4_1
+   && rtx_equal_p (operands[0], operands[1])"
+  [(set (reg:CCC FLAGS_REG)
+	(unspec:CCC [(const_int 0)] UNSPEC_STC))])
+
+;; Changing the CCmode of FLAGS_REG requires updating both def and use.
+;; pandn/ptestz/set{n?}e -> ptestc/set{n?}c
+(define_split
+  [(set (match_operand:SWI 0 "register_operand")
+	(match_operator:SWI 3 "bt_comparison_operator"
+	  [(unspec:CCZ [
+	     (and:V_AVX (not:V_AVX (match_operand:V_AVX 1 "register_operand"))
+			(match_operand:V_AVX 2 "register_operand"))
+	     (and:V_AVX (not:V_AVX (match_dup 1)) (match_dup 2))]
+	     UNSPEC_PTEST)
+	   (const_int 0)]))]
+  "TARGET_SSE4_1"
+  [(set (reg:CCC FLAGS_REG)
+	(unspec:CCC [(match_dup 1) (match_dup 2)] UNSPEC_PTEST))
+   (set (match_dup 0)
+	(match_op_dup 3 [(reg:CCC FLAGS_REG) (const_int 0)]))])
+
+(define_split
+  [(set (strict_low_part (match_operand:QI 0 "register_operand"))
+	(match_operator:QI 3 "bt_comparison_operator"
+	  [(unspec:CCZ [
+	     (and:V_AVX (not:V_AVX (match_operand:V_AVX 1 "register_operand"))
+			(match_operand:V_AVX 2 "register_operand"))
+	     (and:V_AVX (not:V_AVX (match_dup 1)) (match_dup 2))]
+	     UNSPEC_PTEST)
+	   (const_int 0)]))]
+  "TARGET_SSE4_1"
+  [(set (reg:CCC FLAGS_REG)
+	(unspec:CCC [(match_dup 1) (match_dup 2)] UNSPEC_PTEST))
+   (set (strict_low_part (match_dup 0))
+	(match_op_dup 3 [(reg:CCC FLAGS_REG) (const_int 0)]))])
+
+;; pandn/ptestz/j{n?}e -> ptestc/j{n?}c
+(define_split
+  [(set (pc)
+	(if_then_else
+	  (match_operator 3 "bt_comparison_operator"
+	    [(unspec:CCZ [
+	       (and:V_AVX
+		 (not:V_AVX (match_operand:V_AVX 1 "register_operand"))
+		 (match_operand:V_AVX 2 "register_operand"))
+	       (and:V_AVX (not:V_AVX (match_dup 1)) (match_dup 2))]
+	       UNSPEC_PTEST)
+	     (const_int 0)])
+	  (match_operand 0)
+	  (pc)))]
+  "TARGET_SSE4_1"
+  [(set (reg:CCC FLAGS_REG)
+	(unspec:CCC [(match_dup 1) (match_dup 2)] UNSPEC_PTEST))
+   (set (pc) (if_then_else (match_op_dup 3 [(reg:CCC FLAGS_REG) (const_int 0)])
+			   (match_dup 0)
+			   (pc)))])
+
 (define_expand "nearbyint<mode>2"
   [(set (match_operand:VFH 0 "register_operand")
 	(unspec:VFH
@@ -26915,17 +26979,21 @@
   "TARGET_AVX")
 
 (define_expand "maskload<mode><avx512fmaskmodelower>"
-  [(set (match_operand:V48H_AVX512VL 0 "register_operand")
-	(vec_merge:V48H_AVX512VL
-	  (match_operand:V48H_AVX512VL 1 "memory_operand")
+  [(set (match_operand:V48_AVX512VL 0 "register_operand")
+	(vec_merge:V48_AVX512VL
+	  (unspec:V48_AVX512VL
+	    [(match_operand:V48_AVX512VL 1 "memory_operand")]
+	    UNSPEC_MASKLOAD)
 	  (match_dup 0)
 	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
   "TARGET_AVX512F")
 
 (define_expand "maskload<mode><avx512fmaskmodelower>"
-  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
-	(vec_merge:VI12_AVX512VL
-	  (match_operand:VI12_AVX512VL 1 "memory_operand")
+  [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand")
+	(vec_merge:VI12HFBF_AVX512VL
+	  (unspec:VI12HFBF_AVX512VL
+	    [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand")]
+	    UNSPEC_MASKLOAD)
 	  (match_dup 0)
 	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
   "TARGET_AVX512BW")
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index f1641d7..19100b5 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -22,29 +22,27 @@
 ;; == Loads/Stores
 ;; =========================================================================
 
-;; len_load/len_store is a sub-optimal pattern for RVV auto-vectorization support.
-;; We will replace them when len_maskload/len_maskstore is supported in loop vectorizer.
-(define_expand "len_load_<mode>"
+(define_expand "len_maskload<mode><vm>"
   [(match_operand:V 0 "register_operand")
    (match_operand:V 1 "memory_operand")
-   (match_operand 2 "vector_length_operand")
-   (match_operand 3 "const_0_operand")]
+   (match_operand 2 "autovec_length_operand")
+   (match_operand:<VM> 3 "vector_mask_operand")
+   (match_operand 4 "const_0_operand")]
   "TARGET_VECTOR"
 {
-  riscv_vector::emit_nonvlmax_insn (code_for_pred_mov (<MODE>mode),
-  				    riscv_vector::RVV_UNOP, operands, operands[2]);
+  riscv_vector::expand_load_store (operands, true);
   DONE;
 })
 
-(define_expand "len_store_<mode>"
+(define_expand "len_maskstore<mode><vm>"
   [(match_operand:V 0 "memory_operand")
    (match_operand:V 1 "register_operand")
-   (match_operand 2 "vector_length_operand")
-   (match_operand 3 "const_0_operand")]
+   (match_operand 2 "autovec_length_operand")
+   (match_operand:<VM> 3 "vector_mask_operand")
+   (match_operand 4 "const_0_operand")]
   "TARGET_VECTOR"
 {
-  riscv_vector::emit_nonvlmax_insn (code_for_pred_mov (<MODE>mode),
-  				    riscv_vector::RVV_UNOP, operands, operands[2]);
+  riscv_vector::expand_load_store (operands, false);
   DONE;
 })
 
@@ -314,44 +312,6 @@
 )
 
 ;; -------------------------------------------------------------------------
-;; ---- [INT,FP] Compare and select
-;; -------------------------------------------------------------------------
-;; The patterns in this section are synthetic.
-;; -------------------------------------------------------------------------
-
-(define_expand "vcond<V:mode><VI:mode>"
-  [(set (match_operand:V 0 "register_operand")
-	(if_then_else:V
-	  (match_operator 3 "comparison_operator"
-	    [(match_operand:VI 4 "register_operand")
-	     (match_operand:VI 5 "register_operand")])
-	  (match_operand:V 1 "register_operand")
-	  (match_operand:V 2 "register_operand")))]
-  "TARGET_VECTOR && known_eq (GET_MODE_NUNITS (<V:MODE>mode),
-  		GET_MODE_NUNITS (<VI:MODE>mode))"
-  {
-    riscv_vector::expand_vcond (operands);
-    DONE;
-  }
-)
-
-(define_expand "vcondu<V:mode><VI:mode>"
-  [(set (match_operand:V 0 "register_operand")
-	(if_then_else:V
-	  (match_operator 3 "comparison_operator"
-	    [(match_operand:VI 4 "register_operand")
-	     (match_operand:VI 5 "register_operand")])
-	  (match_operand:V 1 "register_operand")
-	  (match_operand:V 2 "register_operand")))]
-  "TARGET_VECTOR && known_eq (GET_MODE_NUNITS (<V:MODE>mode),
-  		GET_MODE_NUNITS (<VI:MODE>mode))"
-  {
-    riscv_vector::expand_vcond (operands);
-    DONE;
-  }
-)
-
-;; -------------------------------------------------------------------------
 ;; ---- [INT] Sign and zero extension
 ;; -------------------------------------------------------------------------
 ;; Includes:
@@ -596,40 +556,41 @@
 ;;    result after reload_completed.
 (define_expand "fma<mode>4"
   [(parallel
-    [(set (match_operand:VI 0 "register_operand"     "=vr")
+    [(set (match_operand:VI 0 "register_operand")
 	  (plus:VI
 	    (mult:VI
-	      (match_operand:VI 1 "register_operand" " vr")
-	      (match_operand:VI 2 "register_operand" " vr"))
-	    (match_operand:VI 3 "register_operand"   " vr")))
-     (clobber (match_scratch:SI 4))])]
+	      (match_operand:VI 1 "register_operand")
+	      (match_operand:VI 2 "register_operand"))
+	    (match_operand:VI 3 "register_operand")))
+     (clobber (match_dup 4))])]
   "TARGET_VECTOR"
-  {})
+  {
+    operands[4] = gen_reg_rtx (Pmode);
+  })
 
-(define_insn_and_split "*fma<mode>"
+(define_insn_and_split "*fma<VI:mode><P:mode>"
   [(set (match_operand:VI 0 "register_operand"     "=vr, vr, ?&vr")
 	(plus:VI
 	  (mult:VI
 	    (match_operand:VI 1 "register_operand" " %0, vr,   vr")
 	    (match_operand:VI 2 "register_operand" " vr, vr,   vr"))
 	  (match_operand:VI 3 "register_operand"   " vr,  0,   vr")))
-   (clobber (match_scratch:SI 4 "=r,r,r"))]
+   (clobber (match_operand:P 4 "register_operand" "=r,r,r"))]
   "TARGET_VECTOR"
   "#"
   "&& reload_completed"
   [(const_int 0)]
   {
-    PUT_MODE (operands[4], Pmode);
-    riscv_vector::emit_vlmax_vsetvl (<MODE>mode, operands[4]);
+    riscv_vector::emit_vlmax_vsetvl (<VI:MODE>mode, operands[4]);
     if (which_alternative == 2)
       emit_insn (gen_rtx_SET (operands[0], operands[3]));
     rtx ops[] = {operands[0], operands[1], operands[2], operands[3], operands[0]};
-    riscv_vector::emit_vlmax_ternary_insn (code_for_pred_mul_plus (<MODE>mode),
-					  riscv_vector::RVV_TERNOP, ops, operands[4]);
+    riscv_vector::emit_vlmax_ternary_insn (code_for_pred_mul_plus (<VI:MODE>mode),
+					   riscv_vector::RVV_TERNOP, ops, operands[4]);
     DONE;
   }
   [(set_attr "type" "vimuladd")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<VI:MODE>")])
 
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] VNMSAC and VNMSUB
@@ -641,40 +602,225 @@
 
 (define_expand "fnma<mode>4"
   [(parallel
-    [(set (match_operand:VI 0 "register_operand"     "=vr")
+    [(set (match_operand:VI 0 "register_operand")
    (minus:VI
-     (match_operand:VI 3 "register_operand"   " vr")
+     (match_operand:VI 3 "register_operand")
      (mult:VI
-       (match_operand:VI 1 "register_operand" " vr")
-       (match_operand:VI 2 "register_operand" " vr"))))
-     (clobber (match_scratch:SI 4))])]
+       (match_operand:VI 1 "register_operand")
+       (match_operand:VI 2 "register_operand"))))
+     (clobber (match_dup 4))])]
   "TARGET_VECTOR"
-  {})
+  {
+    operands[4] = gen_reg_rtx (Pmode);
+  })
 
-(define_insn_and_split "*fnma<mode>"
+(define_insn_and_split "*fnma<VI:mode><P:mode>"
   [(set (match_operand:VI 0 "register_operand"     "=vr, vr, ?&vr")
  (minus:VI
    (match_operand:VI 3 "register_operand"   " vr,  0,   vr")
    (mult:VI
      (match_operand:VI 1 "register_operand" " %0, vr,   vr")
      (match_operand:VI 2 "register_operand" " vr, vr,   vr"))))
-   (clobber (match_scratch:SI 4 "=r,r,r"))]
+   (clobber (match_operand:P 4 "register_operand" "=r,r,r"))]
   "TARGET_VECTOR"
   "#"
   "&& reload_completed"
   [(const_int 0)]
   {
-    PUT_MODE (operands[4], Pmode);
-    riscv_vector::emit_vlmax_vsetvl (<MODE>mode, operands[4]);
+    riscv_vector::emit_vlmax_vsetvl (<VI:MODE>mode, operands[4]);
     if (which_alternative == 2)
       emit_insn (gen_rtx_SET (operands[0], operands[3]));
     rtx ops[] = {operands[0], operands[1], operands[2], operands[3], operands[0]};
-    riscv_vector::emit_vlmax_ternary_insn (code_for_pred_minus_mul (<MODE>mode),
-    riscv_vector::RVV_TERNOP, ops, operands[4]);
+    riscv_vector::emit_vlmax_ternary_insn (code_for_pred_minus_mul (<VI:MODE>mode),
+    					   riscv_vector::RVV_TERNOP, ops, operands[4]);
     DONE;
   }
   [(set_attr "type" "vimuladd")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<VI:MODE>")])
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] VFMACC and VFMADD
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vfmacc
+;; - vfmadd
+;; -------------------------------------------------------------------------
+
+(define_expand "fma<mode>4"
+  [(parallel
+    [(set (match_operand:VF_AUTO 0 "register_operand")
+	  (fma:VF_AUTO
+	    (match_operand:VF_AUTO 1 "register_operand")
+	    (match_operand:VF_AUTO 2 "register_operand")
+	    (match_operand:VF_AUTO 3 "register_operand")))
+     (clobber (match_dup 4))])]
+  "TARGET_VECTOR"
+  {
+    operands[4] = gen_reg_rtx (Pmode);
+  })
+
+(define_insn_and_split "*fma<VF_AUTO:mode><P:mode>"
+  [(set (match_operand:VF_AUTO 0 "register_operand"   "=vr, vr, ?&vr")
+	(fma:VF_AUTO
+	  (match_operand:VF_AUTO 1 "register_operand" " %0, vr,   vr")
+	  (match_operand:VF_AUTO 2 "register_operand" " vr, vr,   vr")
+	  (match_operand:VF_AUTO 3 "register_operand" " vr,  0,   vr")))
+   (clobber (match_operand:P 4 "register_operand" "=r,r,r"))]
+  "TARGET_VECTOR"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    riscv_vector::emit_vlmax_vsetvl (<VF_AUTO:MODE>mode, operands[4]);
+    if (which_alternative == 2)
+      emit_insn (gen_rtx_SET (operands[0], operands[3]));
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3], operands[0]};
+    riscv_vector::emit_vlmax_fp_ternary_insn (code_for_pred_mul (PLUS, <VF_AUTO:MODE>mode),
+					      riscv_vector::RVV_TERNOP, ops, operands[4]);
+    DONE;
+  }
+  [(set_attr "type" "vfmuladd")
+   (set_attr "mode" "<VF_AUTO:MODE>")])
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] VFNMSAC and VFNMSUB
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vfnmsac
+;; - vfnmsub
+;; -------------------------------------------------------------------------
+
+(define_expand "fnma<mode>4"
+  [(parallel
+    [(set (match_operand:VF_AUTO 0 "register_operand")
+	  (fma:VF_AUTO
+	    (neg:VF_AUTO
+	      (match_operand:VF_AUTO 1 "register_operand"))
+	    (match_operand:VF_AUTO 2 "register_operand")
+	    (match_operand:VF_AUTO 3 "register_operand")))
+     (clobber (match_dup 4))])]
+  "TARGET_VECTOR"
+  {
+    operands[4] = gen_reg_rtx (Pmode);
+  })
+
+(define_insn_and_split "*fnma<VF_AUTO:mode><P:mode>"
+  [(set (match_operand:VF_AUTO 0 "register_operand"     "=vr, vr, ?&vr")
+	(fma:VF_AUTO
+	  (neg:VF_AUTO
+	    (match_operand:VF_AUTO 1 "register_operand" " %0, vr,   vr"))
+	  (match_operand:VF_AUTO 2 "register_operand"   " vr, vr,   vr")
+	  (match_operand:VF_AUTO 3 "register_operand"   " vr,  0,   vr")))
+   (clobber (match_operand:P 4 "register_operand" "=r,r,r"))]
+  "TARGET_VECTOR"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    riscv_vector::emit_vlmax_vsetvl (<VF_AUTO:MODE>mode, operands[4]);
+    if (which_alternative == 2)
+      emit_insn (gen_rtx_SET (operands[0], operands[3]));
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3], operands[0]};
+    riscv_vector::emit_vlmax_fp_ternary_insn (code_for_pred_mul_neg (PLUS, <VF_AUTO:MODE>mode),
+					      riscv_vector::RVV_TERNOP, ops, operands[4]);
+    DONE;
+  }
+  [(set_attr "type" "vfmuladd")
+   (set_attr "mode" "<VF_AUTO:MODE>")])
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] VFMSAC and VFMSUB
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vfmsac
+;; - vfmsub
+;; -------------------------------------------------------------------------
+
+(define_expand "fms<mode>4"
+  [(parallel
+    [(set (match_operand:VF_AUTO 0 "register_operand")
+	  (fma:VF_AUTO
+	    (match_operand:VF_AUTO 1 "register_operand")
+	    (match_operand:VF_AUTO 2 "register_operand")
+	    (neg:VF_AUTO
+	      (match_operand:VF_AUTO 3 "register_operand"))))
+     (clobber (match_dup 4))])]
+  "TARGET_VECTOR"
+  {
+    operands[4] = gen_reg_rtx (Pmode);
+  })
+
+(define_insn_and_split "*fms<VF_AUTO:mode><P:mode>"
+  [(set (match_operand:VF_AUTO 0 "register_operand"     "=vr, vr, ?&vr")
+	(fma:VF_AUTO
+	  (match_operand:VF_AUTO 1 "register_operand"   " %0, vr,   vr")
+	  (match_operand:VF_AUTO 2 "register_operand"   " vr, vr,   vr")
+	  (neg:VF_AUTO
+	    (match_operand:VF_AUTO 3 "register_operand" " vr,  0,   vr"))))
+   (clobber (match_operand:P 4 "register_operand" "=r,r,r"))]
+  "TARGET_VECTOR"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    riscv_vector::emit_vlmax_vsetvl (<VF_AUTO:MODE>mode, operands[4]);
+    if (which_alternative == 2)
+      emit_insn (gen_rtx_SET (operands[0], operands[3]));
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3], operands[0]};
+    riscv_vector::emit_vlmax_fp_ternary_insn (code_for_pred_mul (MINUS, <VF_AUTO:MODE>mode),
+					      riscv_vector::RVV_TERNOP, ops, operands[4]);
+    DONE;
+  }
+  [(set_attr "type" "vfmuladd")
+   (set_attr "mode" "<VF_AUTO:MODE>")])
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] VFMSAC and VFMSUB
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vfmsac
+;; - vfmsub
+;; -------------------------------------------------------------------------
+
+(define_expand "fnms<mode>4"
+  [(parallel
+    [(set (match_operand:VF_AUTO 0 "register_operand")
+	  (fma:VF_AUTO
+	    (neg:VF_AUTO
+	      (match_operand:VF_AUTO 1 "register_operand"))
+	    (match_operand:VF_AUTO 2 "register_operand")
+	    (neg:VF_AUTO
+	      (match_operand:VF_AUTO 3 "register_operand"))))
+     (clobber (match_dup 4))])]
+  "TARGET_VECTOR"
+  {
+    operands[4] = gen_reg_rtx (Pmode);
+  })
+
+(define_insn_and_split "*fnms<VF_AUTO:mode><P:mode>"
+  [(set (match_operand:VF_AUTO 0 "register_operand"     "=vr, vr, ?&vr")
+	(fma:VF_AUTO
+	  (neg:VF_AUTO
+	    (match_operand:VF_AUTO 1 "register_operand" " %0, vr,   vr"))
+	  (match_operand:VF_AUTO 2 "register_operand"   " vr, vr,   vr")
+	  (neg:VF_AUTO
+	    (match_operand:VF_AUTO 3 "register_operand" " vr,  0,   vr"))))
+   (clobber (match_operand:P 4 "register_operand" "=r,r,r"))]
+  "TARGET_VECTOR"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    riscv_vector::emit_vlmax_vsetvl (<VF_AUTO:MODE>mode, operands[4]);
+    if (which_alternative == 2)
+      emit_insn (gen_rtx_SET (operands[0], operands[3]));
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3], operands[0]};
+    riscv_vector::emit_vlmax_fp_ternary_insn (code_for_pred_mul_neg (MINUS, <VF_AUTO:MODE>mode),
+					      riscv_vector::RVV_TERNOP, ops, operands[4]);
+    DONE;
+  }
+  [(set_attr "type" "vfmuladd")
+   (set_attr "mode" "<VF_AUTO:MODE>")])
 
 ;; =========================================================================
 ;; == SELECT_VL
diff --git a/gcc/config/riscv/genrvv-type-indexer.cc b/gcc/config/riscv/genrvv-type-indexer.cc
index a332a6a..8fc93ce 100644
--- a/gcc/config/riscv/genrvv-type-indexer.cc
+++ b/gcc/config/riscv/genrvv-type-indexer.cc
@@ -73,6 +73,9 @@ valid_type (unsigned sew, int lmul_log2, unsigned nf, bool float_p)
   if (nf > 8 || nf < 1)
     return false;
 
+  if (sew == 16 && nf != 1 && float_p) // Disable FP16 tuple in temporarily.
+    return false;
+
   switch (lmul_log2)
     {
     case 1:
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 04ca6ce..eb975ea 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -276,6 +276,13 @@
   (ior (match_operand 0 "pmode_register_operand")
        (match_operand 0 "const_csr_operand")))
 
+(define_special_predicate "autovec_length_operand"
+  (ior (match_operand 0 "pmode_register_operand")
+       (ior (match_operand 0 "const_csr_operand")
+            (match_test "rtx_equal_p (op, gen_int_mode
+                         (GET_MODE_NUNITS (GET_MODE (op)),
+                                           Pmode))"))))
+
 (define_predicate "reg_or_mem_operand"
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "memory_operand")))
diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
index 1d15270..19a4f9f 100644
--- a/gcc/config/riscv/riscv-modes.def
+++ b/gcc/config/riscv/riscv-modes.def
@@ -220,7 +220,6 @@ ADJUST_ALIGNMENT (VNx1QI, 1);
 #define RVV_TUPLE_MODES(NBYTES, NSUBPARTS, VB, VH, VS, VD)                     \
   VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, QI, NBYTES, 1);             \
   VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, HI, NBYTES / 2, 1);         \
-  VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, FLOAT, HF, NBYTES / 2, 1);       \
   VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, SI, NBYTES / 4, 1);         \
   VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, FLOAT, SF, NBYTES / 4, 1);       \
   VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, DI, NBYTES / 8, 1);         \
@@ -237,9 +236,6 @@ ADJUST_ALIGNMENT (VNx1QI, 1);
   ADJUST_NUNITS (VNx##NSUBPARTS##x##VD##DI,                                    \
 		 riscv_v_adjust_nunits (VNx##NSUBPARTS##x##VD##DI##mode,       \
 					VD * NSUBPARTS));                      \
-  ADJUST_NUNITS (VNx##NSUBPARTS##x##VH##HF,                                    \
-		 riscv_v_adjust_nunits (VNx##NSUBPARTS##x##VH##HF##mode,       \
-					VH * NSUBPARTS));                      \
   ADJUST_NUNITS (VNx##NSUBPARTS##x##VS##SF,                                    \
 		 riscv_v_adjust_nunits (VNx##NSUBPARTS##x##VS##SF##mode,       \
 					VS * NSUBPARTS));                      \
@@ -251,7 +247,6 @@ ADJUST_ALIGNMENT (VNx1QI, 1);
   ADJUST_ALIGNMENT (VNx##NSUBPARTS##x##VH##HI, 2);                             \
   ADJUST_ALIGNMENT (VNx##NSUBPARTS##x##VS##SI, 4);                             \
   ADJUST_ALIGNMENT (VNx##NSUBPARTS##x##VD##DI, 8);                             \
-  ADJUST_ALIGNMENT (VNx##NSUBPARTS##x##VH##HF, 2);                             \
   ADJUST_ALIGNMENT (VNx##NSUBPARTS##x##VS##SF, 4);                             \
   ADJUST_ALIGNMENT (VNx##NSUBPARTS##x##VD##DF, 8);
 
@@ -280,12 +275,10 @@ RVV_TUPLE_MODES (64, 2, 64, 32, 16, 8)
 #define RVV_TUPLE_PARTIAL_MODES(NSUBPARTS)                                     \
   VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, QI, 1, 1);                  \
   VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, HI, 1, 1);                  \
-  VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, FLOAT, HF, 1, 1);                \
   VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, SI, 1, 1);                  \
   VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, FLOAT, SF, 1, 1);                \
   VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, QI, 2, 1);                  \
   VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, HI, 2, 1);                  \
-  VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, FLOAT, HF, 2, 1);                \
   VECTOR_MODE_WITH_PREFIX (VNx##NSUBPARTS##x, INT, QI, 4, 1);                  \
                                                                                \
   ADJUST_NUNITS (VNx##NSUBPARTS##x1QI,                                         \
@@ -294,9 +287,6 @@ RVV_TUPLE_MODES (64, 2, 64, 32, 16, 8)
   ADJUST_NUNITS (VNx##NSUBPARTS##x1HI,                                         \
 		 riscv_v_adjust_nunits (VNx##NSUBPARTS##x1HI##mode,            \
 					NSUBPARTS));                           \
-  ADJUST_NUNITS (VNx##NSUBPARTS##x1HF,                                         \
-		 riscv_v_adjust_nunits (VNx##NSUBPARTS##x1HF##mode,            \
-					NSUBPARTS));                           \
   ADJUST_NUNITS (VNx##NSUBPARTS##x1SI,                                         \
 		 riscv_v_adjust_nunits (VNx##NSUBPARTS##x1SI##mode,            \
 					NSUBPARTS));                           \
@@ -309,20 +299,15 @@ RVV_TUPLE_MODES (64, 2, 64, 32, 16, 8)
   ADJUST_NUNITS (VNx##NSUBPARTS##x2HI,                                         \
 		 riscv_v_adjust_nunits (VNx##NSUBPARTS##x2HI##mode,            \
 					2 * NSUBPARTS));                       \
-ADJUST_NUNITS (VNx##NSUBPARTS##x2HF,                                         \
-		 riscv_v_adjust_nunits (VNx##NSUBPARTS##x2HF##mode,            \
-					2 * NSUBPARTS));                       \
   ADJUST_NUNITS (VNx##NSUBPARTS##x4QI,                                         \
 		 riscv_v_adjust_nunits (VNx##NSUBPARTS##x4QI##mode,            \
 					4 * NSUBPARTS));                       \
   ADJUST_ALIGNMENT (VNx##NSUBPARTS##x1QI, 1);                                  \
   ADJUST_ALIGNMENT (VNx##NSUBPARTS##x1HI, 2);                                  \
-  ADJUST_ALIGNMENT (VNx##NSUBPARTS##x1HF, 2);                                  \
   ADJUST_ALIGNMENT (VNx##NSUBPARTS##x1SI, 4);                                  \
   ADJUST_ALIGNMENT (VNx##NSUBPARTS##x1SF, 4);                                  \
   ADJUST_ALIGNMENT (VNx##NSUBPARTS##x2QI, 1);                                  \
   ADJUST_ALIGNMENT (VNx##NSUBPARTS##x2HI, 2);                                  \
-  ADJUST_ALIGNMENT (VNx##NSUBPARTS##x2HF, 2);                                  \
   ADJUST_ALIGNMENT (VNx##NSUBPARTS##x4QI, 1);
 
 RVV_TUPLE_PARTIAL_MODES (2)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index f052757..7265b1c 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -143,6 +143,7 @@ enum insn_type
   RVV_CMP_OP = 4,
   RVV_CMP_MU_OP = RVV_CMP_OP + 2, /* +2 means mask and maskoff operand.  */
   RVV_UNOP_MU = RVV_UNOP + 2,	  /* Likewise.  */
+  RVV_UNOP_M = RVV_UNOP + 2,	  /* Likewise.  */
   RVV_TERNOP = 5,
   RVV_WIDEN_TERNOP = 4,
   RVV_SCALAR_MOV_OP = 4, /* +1 for VUNDEF according to vector.md.  */
@@ -187,6 +188,7 @@ void emit_hard_vlmax_vsetvl (machine_mode, rtx);
 void emit_vlmax_insn (unsigned, int, rtx *, rtx = 0);
 void emit_vlmax_fp_insn (unsigned, int, rtx *, rtx = 0);
 void emit_vlmax_ternary_insn (unsigned, int, rtx *, rtx = 0);
+void emit_vlmax_fp_ternary_insn (unsigned, int, rtx *, rtx = 0);
 void emit_nonvlmax_insn (unsigned, int, rtx *, rtx);
 void emit_vlmax_slide_insn (unsigned, rtx *);
 void emit_nonvlmax_slide_tu_insn (unsigned, rtx *, rtx);
@@ -250,9 +252,9 @@ machine_mode preferred_simd_mode (scalar_mode);
 opt_machine_mode get_mask_mode (machine_mode);
 void expand_vec_series (rtx, rtx, rtx);
 void expand_vec_init (rtx, rtx);
-void expand_vcond (rtx *);
 void expand_vec_perm (rtx, rtx, rtx, rtx);
 void expand_select_vl (rtx *);
+void expand_load_store (rtx *, bool);
 
 /* Rounding mode bitfield for fixed point VXRM.  */
 enum fixed_point_rounding_mode
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 839a2c6..f6dd0d8 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -705,19 +705,42 @@ emit_vlmax_ternary_insn (unsigned icode, int op_num, rtx *ops, rtx vl)
 {
   machine_mode dest_mode = GET_MODE (ops[0]);
   machine_mode mask_mode = get_mask_mode (dest_mode).require ();
-  /* We have a maximum of 11 operands for RVV instruction patterns according to
-   * vector.md.  */
-  insn_expander<11> e (/*OP_NUM*/ op_num, /*HAS_DEST_P*/ true,
-		       /*FULLY_UNMASKED_P*/ true,
-		       /*USE_REAL_MERGE_P*/ true, /*HAS_AVL_P*/ true,
-		       /*VLMAX_P*/ true,
-		       /*DEST_MODE*/ dest_mode, /*MASK_MODE*/ mask_mode);
+  insn_expander<RVV_INSN_OPERANDS_MAX> e (/*OP_NUM*/ op_num,
+					  /*HAS_DEST_P*/ true,
+					  /*FULLY_UNMASKED_P*/ true,
+					  /*USE_REAL_MERGE_P*/ true,
+					  /*HAS_AVL_P*/ true,
+					  /*VLMAX_P*/ true,
+					  /*DEST_MODE*/ dest_mode,
+					  /*MASK_MODE*/ mask_mode);
   e.set_policy (TAIL_ANY);
   e.set_policy (MASK_ANY);
   e.set_vl (vl);
   e.emit_insn ((enum insn_code) icode, ops);
 }
 
+/* This function emits a {VLMAX, TAIL_ANY, MASK_ANY} vsetvli followed by the
+ * ternary operation which always has a real merge operand.  */
+void
+emit_vlmax_fp_ternary_insn (unsigned icode, int op_num, rtx *ops, rtx vl)
+{
+  machine_mode dest_mode = GET_MODE (ops[0]);
+  machine_mode mask_mode = get_mask_mode (dest_mode).require ();
+  insn_expander<RVV_INSN_OPERANDS_MAX> e (/*OP_NUM*/ op_num,
+					  /*HAS_DEST_P*/ true,
+					  /*FULLY_UNMASKED_P*/ true,
+					  /*USE_REAL_MERGE_P*/ true,
+					  /*HAS_AVL_P*/ true,
+					  /*VLMAX_P*/ true,
+					  /*DEST_MODE*/ dest_mode,
+					  /*MASK_MODE*/ mask_mode);
+  e.set_policy (TAIL_ANY);
+  e.set_policy (MASK_ANY);
+  e.set_rounding_mode (FRM_DYN);
+  e.set_vl (vl);
+  e.emit_insn ((enum insn_code) icode, ops);
+}
+
 /* This function emits a {NONVLMAX, TAIL_ANY, MASK_ANY} vsetvli followed by the
  * actual operation.  */
 void
@@ -842,16 +865,55 @@ emit_vlmax_cmp_mu_insn (unsigned icode, rtx *ops)
 }
 
 /* This function emits a masked instruction.  */
+static void
+emit_vlmax_masked_insn (unsigned icode, int op_num, rtx *ops)
+{
+  machine_mode dest_mode = GET_MODE (ops[0]);
+  machine_mode mask_mode = get_mask_mode (dest_mode).require ();
+  insn_expander<RVV_INSN_OPERANDS_MAX> e (/*OP_NUM*/ op_num,
+					  /*HAS_DEST_P*/ true,
+					  /*FULLY_UNMASKED_P*/ false,
+					  /*USE_REAL_MERGE_P*/ true,
+					  /*HAS_AVL_P*/ true,
+					  /*VLMAX_P*/ true, dest_mode,
+					  mask_mode);
+  e.set_policy (TAIL_ANY);
+  e.set_policy (MASK_ANY);
+  e.emit_insn ((enum insn_code) icode, ops);
+}
+
+/* This function emits a masked instruction.  */
+static void
+emit_nonvlmax_masked_insn (unsigned icode, int op_num, rtx *ops, rtx avl)
+{
+  machine_mode dest_mode = GET_MODE (ops[0]);
+  machine_mode mask_mode = get_mask_mode (dest_mode).require ();
+  insn_expander<RVV_INSN_OPERANDS_MAX> e (/*OP_NUM*/ op_num,
+					  /*HAS_DEST_P*/ true,
+					  /*FULLY_UNMASKED_P*/ false,
+					  /*USE_REAL_MERGE_P*/ true,
+					  /*HAS_AVL_P*/ true,
+					  /*VLMAX_P*/ false, dest_mode,
+					  mask_mode);
+  e.set_policy (TAIL_ANY);
+  e.set_policy (MASK_ANY);
+  e.set_vl (avl);
+  e.emit_insn ((enum insn_code) icode, ops);
+}
+
+/* This function emits a masked instruction.  */
 void
 emit_vlmax_masked_mu_insn (unsigned icode, int op_num, rtx *ops)
 {
   machine_mode dest_mode = GET_MODE (ops[0]);
   machine_mode mask_mode = get_mask_mode (dest_mode).require ();
-  insn_expander<11> e (/*OP_NUM*/ op_num, /*HAS_DEST_P*/ true,
-		       /*FULLY_UNMASKED_P*/ false,
-		       /*USE_REAL_MERGE_P*/ true,
-		       /*HAS_AVL_P*/ true,
-		       /*VLMAX_P*/ true, dest_mode, mask_mode);
+  insn_expander<RVV_INSN_OPERANDS_MAX> e (/*OP_NUM*/ op_num,
+					  /*HAS_DEST_P*/ true,
+					  /*FULLY_UNMASKED_P*/ false,
+					  /*USE_REAL_MERGE_P*/ true,
+					  /*HAS_AVL_P*/ true,
+					  /*VLMAX_P*/ true, dest_mode,
+					  mask_mode);
   e.set_policy (TAIL_ANY);
   e.set_policy (MASK_UNDISTURBED);
   e.emit_insn ((enum insn_code) icode, ops);
@@ -2359,28 +2421,6 @@ expand_vec_cmp_float (rtx target, rtx_code code, rtx op0, rtx op1,
   return false;
 }
 
-/* Expand an RVV vcond pattern with operands OPS.  DATA_MODE is the mode
-   of the data being merged and CMP_MODE is the mode of the values being
-   compared.  */
-
-void
-expand_vcond (rtx *ops)
-{
-  machine_mode cmp_mode = GET_MODE (ops[4]);
-  machine_mode data_mode = GET_MODE (ops[1]);
-  machine_mode mask_mode = get_mask_mode (cmp_mode).require ();
-  rtx mask = gen_reg_rtx (mask_mode);
-  if (FLOAT_MODE_P (cmp_mode))
-    {
-      if (expand_vec_cmp_float (mask, GET_CODE (ops[3]), ops[4], ops[5], true))
-	std::swap (ops[1], ops[2]);
-    }
-  else
-    expand_vec_cmp (mask, GET_CODE (ops[3]), ops[4], ops[5]);
-  emit_insn (
-    gen_vcond_mask (data_mode, data_mode, ops[0], ops[1], ops[2], mask));
-}
-
 /* Implement vec_perm<mode>.  */
 
 void
@@ -2721,4 +2761,45 @@ expand_select_vl (rtx *ops)
   emit_insn (gen_no_side_effects_vsetvl_rtx (rvv_mode, ops[0], ops[1]));
 }
 
+/* Expand LEN_MASK_{LOAD,STORE}.  */
+void
+expand_load_store (rtx *ops, bool is_load)
+{
+  poly_int64 value;
+  rtx len = ops[2];
+  rtx mask = ops[3];
+  machine_mode mode = GET_MODE (ops[0]);
+
+  if (poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode)))
+    {
+      /* If the length operand is equal to VF, it is VLMAX load/store.  */
+      if (is_load)
+	{
+	  rtx m_ops[] = {ops[0], mask, RVV_VUNDEF (mode), ops[1]};
+	  emit_vlmax_masked_insn (code_for_pred_mov (mode), RVV_UNOP_M, m_ops);
+	}
+      else
+	{
+	  len = gen_reg_rtx (Pmode);
+	  emit_vlmax_vsetvl (mode, len);
+	  emit_insn (gen_pred_store (mode, ops[0], mask, ops[1], len,
+				     get_avl_type_rtx (VLMAX)));
+	}
+    }
+  else
+    {
+      if (!satisfies_constraint_K (len))
+	len = force_reg (Pmode, len);
+      if (is_load)
+	{
+	  rtx m_ops[] = {ops[0], mask, RVV_VUNDEF (mode), ops[1]};
+	  emit_nonvlmax_masked_insn (code_for_pred_mov (mode), RVV_UNOP_M,
+				     m_ops, len);
+	}
+      else
+	emit_insn (gen_pred_store (mode, ops[0], mask, ops[1], len,
+				   get_avl_type_rtx (NONVLMAX)));
+    }
+}
+
 } // namespace riscv_vector
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index c6c53dc..5c8deda 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -1567,7 +1567,7 @@ public:
   {
     tree arg = CALL_EXPR_ARG (e.exp, 0);
     rtx src = expand_normal (arg);
-    emit_insn (gen_rtx_SET (gen_lowpart (e.vector_mode (), e.target), src));
+    emit_move_insn (gen_lowpart (e.vector_mode (), e.target), src);
     return e.target;
   }
 };
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h b/gcc/config/riscv/riscv-vector-builtins-bases.h
index 62ff38a..fb95d6a 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -121,8 +121,6 @@ extern const function_base *const vsmul;
 extern const function_base *const vssra;
 extern const function_base *const vssrl;
 extern const function_base *const vnclip;
-extern const function_base *const vnclip;
-extern const function_base *const vnclipu;
 extern const function_base *const vnclipu;
 extern const function_base *const vmand;
 extern const function_base *const vmnand;
@@ -144,8 +142,6 @@ extern const function_base *const vmsof;
 extern const function_base *const viota;
 extern const function_base *const vid;
 extern const function_base *const vfadd;
-extern const function_base *const vfadd;
-extern const function_base *const vfsub;
 extern const function_base *const vfsub;
 extern const function_base *const vfrsub;
 extern const function_base *const vfwadd;
@@ -153,7 +149,6 @@ extern const function_base *const vfwsub;
 extern const function_base *const vfmul;
 extern const function_base *const vfmul;
 extern const function_base *const vfdiv;
-extern const function_base *const vfdiv;
 extern const function_base *const vfrdiv;
 extern const function_base *const vfwmul;
 extern const function_base *const vfmacc;
diff --git a/gcc/config/riscv/riscv-vector-builtins-types.def b/gcc/config/riscv/riscv-vector-builtins-types.def
index 1c3cc0e..4926bd8 100644
--- a/gcc/config/riscv/riscv-vector-builtins-types.def
+++ b/gcc/config/riscv/riscv-vector-builtins-types.def
@@ -1291,31 +1291,6 @@ DEF_RVV_TUPLE_OPS (vint64m2x4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_TUPLE_OPS (vuint64m2x4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_TUPLE_OPS (vint64m4x2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_TUPLE_OPS (vuint64m4x2_t, RVV_REQUIRE_ELEN_64)
-DEF_RVV_TUPLE_OPS (vfloat16mf4x2_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vfloat16mf4x3_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vfloat16mf4x4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vfloat16mf4x5_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vfloat16mf4x6_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vfloat16mf4x7_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vfloat16mf4x8_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vfloat16mf2x2_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_TUPLE_OPS (vfloat16mf2x3_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_TUPLE_OPS (vfloat16mf2x4_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_TUPLE_OPS (vfloat16mf2x5_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_TUPLE_OPS (vfloat16mf2x6_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_TUPLE_OPS (vfloat16mf2x7_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_TUPLE_OPS (vfloat16mf2x8_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_TUPLE_OPS (vfloat16m1x2_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_TUPLE_OPS (vfloat16m1x3_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_TUPLE_OPS (vfloat16m1x4_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_TUPLE_OPS (vfloat16m1x5_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_TUPLE_OPS (vfloat16m1x6_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_TUPLE_OPS (vfloat16m1x7_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_TUPLE_OPS (vfloat16m1x8_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_TUPLE_OPS (vfloat16m2x2_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_TUPLE_OPS (vfloat16m2x3_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_TUPLE_OPS (vfloat16m2x4_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_TUPLE_OPS (vfloat16m4x2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_TUPLE_OPS (vfloat32mf2x2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
 DEF_RVV_TUPLE_OPS (vfloat32mf2x3_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
 DEF_RVV_TUPLE_OPS (vfloat32mf2x4_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc
index 9e6dae9..466e36d 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -3636,6 +3636,7 @@ function_expander::use_contiguous_store_insn (insn_code icode)
   for (int argno = arg_offset; argno < call_expr_nargs (exp); argno++)
     add_input_operand (argno);
 
+  add_input_operand (Pmode, get_avl_type_rtx (avl_type::NONVLMAX));
   return generate_insn (icode);
 }
 
diff --git a/gcc/config/riscv/riscv-vector-builtins.def b/gcc/config/riscv/riscv-vector-builtins.def
index 1e94579..310edea 100644
--- a/gcc/config/riscv/riscv-vector-builtins.def
+++ b/gcc/config/riscv/riscv-vector-builtins.def
@@ -494,48 +494,18 @@ DEF_RVV_TYPE (vuint64m8_t, 16, __rvv_uint64m8_t, uint64, VNx16DI, VNx8DI, VOID,
 /* LMUL = 1/4.  */
 DEF_RVV_TYPE (vfloat16mf4_t, 18, __rvv_float16mf4_t, float16, VNx2HF, VNx1HF, VOID,
 	      _f16mf4, _f16, _e16mf4)
-/* Define tuple types for SEW = 16, LMUL = MF4. */
-DEF_RVV_TUPLE_TYPE (vfloat16mf4x2_t, 20, __rvv_float16mf4x2_t, vfloat16mf4_t, float, 2, _f16mf4x2)
-DEF_RVV_TUPLE_TYPE (vfloat16mf4x3_t, 20, __rvv_float16mf4x3_t, vfloat16mf4_t, float, 3, _f16mf4x3)
-DEF_RVV_TUPLE_TYPE (vfloat16mf4x4_t, 20, __rvv_float16mf4x4_t, vfloat16mf4_t, float, 4, _f16mf4x4)
-DEF_RVV_TUPLE_TYPE (vfloat16mf4x5_t, 20, __rvv_float16mf4x5_t, vfloat16mf4_t, float, 5, _f16mf4x5)
-DEF_RVV_TUPLE_TYPE (vfloat16mf4x6_t, 20, __rvv_float16mf4x6_t, vfloat16mf4_t, float, 6, _f16mf4x6)
-DEF_RVV_TUPLE_TYPE (vfloat16mf4x7_t, 20, __rvv_float16mf4x7_t, vfloat16mf4_t, float, 7, _f16mf4x7)
-DEF_RVV_TUPLE_TYPE (vfloat16mf4x8_t, 20, __rvv_float16mf4x8_t, vfloat16mf4_t, float, 8, _f16mf4x8)
 /* LMUL = 1/2.  */
 DEF_RVV_TYPE (vfloat16mf2_t, 18, __rvv_float16mf2_t, float16, VNx4HF, VNx2HF, VNx1HF,
 	      _f16mf2, _f16, _e16mf2)
-/* Define tuple types for SEW = 16, LMUL = MF2. */
-DEF_RVV_TUPLE_TYPE (vfloat16mf2x2_t, 20, __rvv_float16mf2x2_t, vfloat16mf2_t, float, 2, _f16mf2x2)
-DEF_RVV_TUPLE_TYPE (vfloat16mf2x3_t, 20, __rvv_float16mf2x3_t, vfloat16mf2_t, float, 3, _f16mf2x3)
-DEF_RVV_TUPLE_TYPE (vfloat16mf2x4_t, 20, __rvv_float16mf2x4_t, vfloat16mf2_t, float, 4, _f16mf2x4)
-DEF_RVV_TUPLE_TYPE (vfloat16mf2x5_t, 20, __rvv_float16mf2x5_t, vfloat16mf2_t, float, 5, _f16mf2x5)
-DEF_RVV_TUPLE_TYPE (vfloat16mf2x6_t, 20, __rvv_float16mf2x6_t, vfloat16mf2_t, float, 6, _f16mf2x6)
-DEF_RVV_TUPLE_TYPE (vfloat16mf2x7_t, 20, __rvv_float16mf2x7_t, vfloat16mf2_t, float, 7, _f16mf2x7)
-DEF_RVV_TUPLE_TYPE (vfloat16mf2x8_t, 20, __rvv_float16mf2x8_t, vfloat16mf2_t, float, 8, _f16mf2x8)
 /* LMUL = 1.  */
 DEF_RVV_TYPE (vfloat16m1_t, 17, __rvv_float16m1_t, float16, VNx8HF, VNx4HF, VNx2HF,
 	      _f16m1, _f16, _e16m1)
-/* Define tuple types for SEW = 16, LMUL = M1. */
-DEF_RVV_TUPLE_TYPE (vfloat16m1x2_t, 19, __rvv_float16m1x2_t, vfloat16m1_t, float, 2, _f16m1x2)
-DEF_RVV_TUPLE_TYPE (vfloat16m1x3_t, 19, __rvv_float16m1x3_t, vfloat16m1_t, float, 3, _f16m1x3)
-DEF_RVV_TUPLE_TYPE (vfloat16m1x4_t, 19, __rvv_float16m1x4_t, vfloat16m1_t, float, 4, _f16m1x4)
-DEF_RVV_TUPLE_TYPE (vfloat16m1x5_t, 19, __rvv_float16m1x5_t, vfloat16m1_t, float, 5, _f16m1x5)
-DEF_RVV_TUPLE_TYPE (vfloat16m1x6_t, 19, __rvv_float16m1x6_t, vfloat16m1_t, float, 6, _f16m1x6)
-DEF_RVV_TUPLE_TYPE (vfloat16m1x7_t, 19, __rvv_float16m1x7_t, vfloat16m1_t, float, 7, _f16m1x7)
-DEF_RVV_TUPLE_TYPE (vfloat16m1x8_t, 19, __rvv_float16m1x8_t, vfloat16m1_t, float, 8, _f16m1x8)
 /* LMUL = 2.  */
 DEF_RVV_TYPE (vfloat16m2_t, 17, __rvv_float16m2_t, float16, VNx16HF, VNx8HF, VNx4HF,
 	      _f16m2, _f16, _e16m2)
-/* Define tuple types for SEW = 16, LMUL = M2. */
-DEF_RVV_TUPLE_TYPE (vfloat16m2x2_t, 19, __rvv_float16m2x2_t, vfloat16m2_t, float, 2, _f16m2x2)
-DEF_RVV_TUPLE_TYPE (vfloat16m2x3_t, 19, __rvv_float16m2x3_t, vfloat16m2_t, float, 3, _f16m2x3)
-DEF_RVV_TUPLE_TYPE (vfloat16m2x4_t, 19, __rvv_float16m2x4_t, vfloat16m2_t, float, 4, _f16m2x4)
 /* LMUL = 4.  */
 DEF_RVV_TYPE (vfloat16m4_t, 17, __rvv_float16m4_t, float16, VNx32HF, VNx16HF, VNx8HF,
 	      _f16m4, _f16, _e16m4)
-/* Define tuple types for SEW = 16, LMUL = M4. */
-DEF_RVV_TUPLE_TYPE (vfloat16m4x2_t, 19, __rvv_float16m4x2_t, vfloat16m4_t, float, 2, _f16m4x2)
 /* LMUL = 8.  */
 DEF_RVV_TYPE (vfloat16m8_t, 16, __rvv_float16m8_t, float16, VNx64HF, VNx32HF, VNx16HF,
 	      _f16m8, _f16, _e16m8)
diff --git a/gcc/config/riscv/riscv-vector-switch.def b/gcc/config/riscv/riscv-vector-switch.def
index 7f14891..52f07709 100644
--- a/gcc/config/riscv/riscv-vector-switch.def
+++ b/gcc/config/riscv/riscv-vector-switch.def
@@ -248,38 +248,6 @@ TUPLE_ENTRY (VNx5x1HI, TARGET_MIN_VLEN < 128, VNx1HI, 5, LMUL_F2, 32, LMUL_F4, 6
 TUPLE_ENTRY (VNx6x1HI, TARGET_MIN_VLEN < 128, VNx1HI, 6, LMUL_F2, 32, LMUL_F4, 64, LMUL_RESERVED, 0)
 TUPLE_ENTRY (VNx7x1HI, TARGET_MIN_VLEN < 128, VNx1HI, 7, LMUL_F2, 32, LMUL_F4, 64, LMUL_RESERVED, 0)
 TUPLE_ENTRY (VNx8x1HI, TARGET_MIN_VLEN < 128, VNx1HI, 8, LMUL_F2, 32, LMUL_F4, 64, LMUL_RESERVED, 0)
-TUPLE_ENTRY (VNx2x32HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128, VNx32HF, 2, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_4, 4)
-TUPLE_ENTRY (VNx2x16HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 64, VNx16HF, 2, LMUL_RESERVED, 0, LMUL_4, 4, LMUL_2, 8)
-TUPLE_ENTRY (VNx3x16HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128, VNx16HF, 3, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_2, 8)
-TUPLE_ENTRY (VNx4x16HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128, VNx16HF, 4, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_2, 8)
-TUPLE_ENTRY (VNx2x8HF, TARGET_VECTOR_ELEN_FP_16, VNx8HF, 2, LMUL_4, 4, LMUL_2, 8, LMUL_1, 16)
-TUPLE_ENTRY (VNx3x8HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 64, VNx8HF, 3, LMUL_RESERVED, 0, LMUL_2, 8, LMUL_1, 16)
-TUPLE_ENTRY (VNx4x8HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 64, VNx8HF, 4, LMUL_RESERVED, 0, LMUL_2, 8, LMUL_1, 16)
-TUPLE_ENTRY (VNx5x8HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128, VNx8HF, 5, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_1, 16)
-TUPLE_ENTRY (VNx6x8HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128, VNx8HF, 6, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_1, 16)
-TUPLE_ENTRY (VNx7x8HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128, VNx8HF, 7, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_1, 16)
-TUPLE_ENTRY (VNx8x8HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128, VNx8HF, 8, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_1, 16)
-TUPLE_ENTRY (VNx2x4HF, TARGET_VECTOR_ELEN_FP_16, VNx4HF, 2, LMUL_2, 8, LMUL_1, 16, LMUL_F2, 32)
-TUPLE_ENTRY (VNx3x4HF, TARGET_VECTOR_ELEN_FP_16, VNx4HF, 3, LMUL_2, 8, LMUL_1, 16, LMUL_F2, 32)
-TUPLE_ENTRY (VNx4x4HF, TARGET_VECTOR_ELEN_FP_16, VNx4HF, 4, LMUL_2, 8, LMUL_1, 16, LMUL_F2, 32)
-TUPLE_ENTRY (VNx5x4HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 64, VNx4HF, 5, LMUL_RESERVED, 0, LMUL_1, 16, LMUL_F2, 32)
-TUPLE_ENTRY (VNx6x4HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 64, VNx4HF, 6, LMUL_RESERVED, 0, LMUL_1, 16, LMUL_F2, 32)
-TUPLE_ENTRY (VNx7x4HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 64, VNx4HF, 7, LMUL_RESERVED, 0, LMUL_1, 16, LMUL_F2, 32)
-TUPLE_ENTRY (VNx8x4HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 64, VNx4HF, 8, LMUL_RESERVED, 0, LMUL_1, 16, LMUL_F2, 32)
-TUPLE_ENTRY (VNx2x2HF, TARGET_VECTOR_ELEN_FP_16, VNx2HF, 2, LMUL_1, 16, LMUL_F2, 32, LMUL_F4, 64)
-TUPLE_ENTRY (VNx3x2HF, TARGET_VECTOR_ELEN_FP_16, VNx2HF, 3, LMUL_1, 16, LMUL_F2, 32, LMUL_F4, 64)
-TUPLE_ENTRY (VNx4x2HF, TARGET_VECTOR_ELEN_FP_16, VNx2HF, 4, LMUL_1, 16, LMUL_F2, 32, LMUL_F4, 64)
-TUPLE_ENTRY (VNx5x2HF, TARGET_VECTOR_ELEN_FP_16, VNx2HF, 5, LMUL_1, 16, LMUL_F2, 32, LMUL_F4, 64)
-TUPLE_ENTRY (VNx6x2HF, TARGET_VECTOR_ELEN_FP_16, VNx2HF, 6, LMUL_1, 16, LMUL_F2, 32, LMUL_F4, 64)
-TUPLE_ENTRY (VNx7x2HF, TARGET_VECTOR_ELEN_FP_16, VNx2HF, 7, LMUL_1, 16, LMUL_F2, 32, LMUL_F4, 64)
-TUPLE_ENTRY (VNx8x2HF, TARGET_VECTOR_ELEN_FP_16, VNx2HF, 8, LMUL_1, 16, LMUL_F2, 32, LMUL_F4, 64)
-TUPLE_ENTRY (VNx2x1HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128, VNx1HF, 2, LMUL_F2, 32, LMUL_F4, 64, LMUL_RESERVED, 0)
-TUPLE_ENTRY (VNx3x1HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128, VNx1HF, 3, LMUL_F2, 32, LMUL_F4, 64, LMUL_RESERVED, 0)
-TUPLE_ENTRY (VNx4x1HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128, VNx1HF, 4, LMUL_F2, 32, LMUL_F4, 64, LMUL_RESERVED, 0)
-TUPLE_ENTRY (VNx5x1HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128, VNx1HF, 5, LMUL_F2, 32, LMUL_F4, 64, LMUL_RESERVED, 0)
-TUPLE_ENTRY (VNx6x1HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128, VNx1HF, 6, LMUL_F2, 32, LMUL_F4, 64, LMUL_RESERVED, 0)
-TUPLE_ENTRY (VNx7x1HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128, VNx1HF, 7, LMUL_F2, 32, LMUL_F4, 64, LMUL_RESERVED, 0)
-TUPLE_ENTRY (VNx8x1HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128, VNx1HF, 8, LMUL_F2, 32, LMUL_F4, 64, LMUL_RESERVED, 0)
 
 /* Tuple modes for EEW = 32.  */
 TUPLE_ENTRY (VNx2x16SI, TARGET_MIN_VLEN >= 128, VNx16SI, 2, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_4, 8)
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 971c3f9..2d576e8 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -2003,9 +2003,51 @@ vector_insn_info::parse_insn (insn_info *insn)
   new_info.parse_insn (def_insn);
   if (!same_vlmax_p (new_info) && !scalar_move_insn_p (insn->rtl ()))
     return;
-  /* TODO: Currently, we don't forward AVL for non-VLMAX vsetvl.  */
-  if (vlmax_avl_p (new_info.get_avl ()))
-    set_avl_info (avl_info (new_info.get_avl (), get_avl_source ()));
+
+  if (new_info.has_avl ())
+    {
+      if (new_info.has_avl_imm ())
+	set_avl_info (avl_info (new_info.get_avl (), nullptr));
+      else
+	{
+	  if (vlmax_avl_p (new_info.get_avl ()))
+	    set_avl_info (avl_info (new_info.get_avl (), get_avl_source ()));
+	  else
+	    {
+	      /* Conservatively propagate non-VLMAX AVL of user vsetvl:
+		 1. The user vsetvl should be same block with the rvv insn.
+		 2. The user vsetvl is the only def insn of rvv insn.
+		 3. The AVL is not modified between def-use chain.
+		 4. The VL is only used by insn within EBB.
+	       */
+	      bool modified_p = false;
+	      for (insn_info *i = def_insn->next_nondebug_insn ();
+		   real_insn_and_same_bb_p (i, get_insn ()->bb ());
+		   i = i->next_nondebug_insn ())
+		{
+		  if (find_access (i->defs (), REGNO (new_info.get_avl ())))
+		    {
+		      modified_p = true;
+		      break;
+		    }
+		}
+
+	      bool has_live_out_use = false;
+	      for (use_info *use : m_avl.get_source ()->all_uses ())
+		{
+		  if (use->is_live_out_use ())
+		    {
+		      has_live_out_use = true;
+		      break;
+		    }
+		}
+	      if (!modified_p && !has_live_out_use
+		  && def_insn == m_avl.get_source ()->insn ()
+		  && m_insn->bb () == def_insn->bb ())
+		set_avl_info (new_info.get_avl_info ());
+	    }
+	}
+    }
 
   if (scalar_move_insn_p (insn->rtl ()) && m_avl.has_non_zero_avl ())
     m_demands[DEMAND_NONZERO_AVL] = true;
diff --git a/gcc/config/riscv/riscv-vsetvl.h b/gcc/config/riscv/riscv-vsetvl.h
index 4257451..87cdd2e 100644
--- a/gcc/config/riscv/riscv-vsetvl.h
+++ b/gcc/config/riscv/riscv-vsetvl.h
@@ -180,6 +180,7 @@ public:
   bool has_avl_reg () const { return get_value () && REG_P (get_value ()); }
   bool has_avl_no_reg () const { return !get_value (); }
   bool has_non_zero_avl () const;
+  bool has_avl () const { return get_value (); }
 };
 
 /* Basic structure to save VL/VTYPE information.  */
@@ -219,6 +220,7 @@ public:
   bool has_avl_reg () const { return m_avl.has_avl_reg (); }
   bool has_avl_no_reg () const { return m_avl.has_avl_no_reg (); }
   bool has_non_zero_avl () const { return m_avl.has_non_zero_avl (); };
+  bool has_avl () const { return m_avl.has_avl (); }
 
   rtx get_avl () const { return m_avl.get_value (); }
   const avl_info &get_avl_info () const { return m_avl; }
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 245cace..565e8cd 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -191,11 +191,6 @@
   VNx2x4HI,VNx3x4HI,VNx4x4HI,VNx5x4HI,VNx6x4HI,VNx7x4HI,VNx8x4HI,
   VNx2x2HI,VNx3x2HI,VNx4x2HI,VNx5x2HI,VNx6x2HI,VNx7x2HI,VNx8x2HI,
   VNx2x1HI,VNx3x1HI,VNx4x1HI,VNx5x1HI,VNx6x1HI,VNx7x1HI,VNx8x1HI,
-  VNx2x32HF,VNx2x16HF,VNx3x16HF,VNx4x16HF,
-  VNx2x8HF,VNx3x8HF,VNx4x8HF,VNx5x8HF,VNx6x8HF,VNx7x8HF,VNx8x8HF,
-  VNx2x4HF,VNx3x4HF,VNx4x4HF,VNx5x4HF,VNx6x4HF,VNx7x4HF,VNx8x4HF,
-  VNx2x2HF,VNx3x2HF,VNx4x2HF,VNx5x2HF,VNx6x2HF,VNx7x2HF,VNx8x2HF,
-  VNx2x1HF,VNx3x1HF,VNx4x1HF,VNx5x1HF,VNx6x1HF,VNx7x1HF,VNx8x1HF,
   VNx2x16SI,VNx2x8SI,VNx3x8SI,VNx4x8SI,
   VNx2x4SI,VNx3x4SI,VNx4x4SI,VNx5x4SI,VNx6x4SI,VNx7x4SI,VNx8x4SI,
   VNx2x2SI,VNx3x2SI,VNx4x2SI,VNx5x2SI,VNx6x2SI,VNx7x2SI,VNx8x2SI,
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 6ca1c54..26c1bb7 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -652,38 +652,6 @@
   (VNx6x1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128")
   (VNx7x1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128")
   (VNx8x1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128")
-  (VNx2x32HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
-  (VNx2x16HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 64")
-  (VNx3x16HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
-  (VNx4x16HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
-  (VNx2x8HF "TARGET_VECTOR_ELEN_FP_16")
-  (VNx3x8HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 64")
-  (VNx4x8HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 64")
-  (VNx5x8HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
-  (VNx6x8HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
-  (VNx7x8HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
-  (VNx8x8HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
-  (VNx2x4HF "TARGET_VECTOR_ELEN_FP_16")
-  (VNx3x4HF "TARGET_VECTOR_ELEN_FP_16")
-  (VNx4x4HF "TARGET_VECTOR_ELEN_FP_16")
-  (VNx5x4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 64")
-  (VNx6x4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 64")
-  (VNx7x4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 64")
-  (VNx8x4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 64")
-  (VNx2x2HF "TARGET_VECTOR_ELEN_FP_16")
-  (VNx3x2HF "TARGET_VECTOR_ELEN_FP_16")
-  (VNx4x2HF "TARGET_VECTOR_ELEN_FP_16")
-  (VNx5x2HF "TARGET_VECTOR_ELEN_FP_16")
-  (VNx6x2HF "TARGET_VECTOR_ELEN_FP_16")
-  (VNx7x2HF "TARGET_VECTOR_ELEN_FP_16")
-  (VNx8x2HF "TARGET_VECTOR_ELEN_FP_16")
-  (VNx2x1HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
-  (VNx3x1HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
-  (VNx4x1HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
-  (VNx5x1HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
-  (VNx6x1HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
-  (VNx7x1HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
-  (VNx8x1HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
   (VNx2x16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
   (VNx2x8SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 64")
   (VNx3x8SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
@@ -1154,11 +1122,6 @@
   (VNx2x8DI "VNx8BI") (VNx2x4DI "VNx4BI") (VNx3x4DI "VNx4BI") (VNx4x4DI "VNx4BI")
   (VNx2x2DI "VNx2BI") (VNx3x2DI "VNx2BI") (VNx4x2DI "VNx2BI") (VNx5x2DI "VNx2BI") (VNx6x2DI "VNx2BI") (VNx7x2DI "VNx2BI") (VNx8x2DI "VNx2BI")
   (VNx2x1DI "VNx1BI") (VNx3x1DI "VNx1BI") (VNx4x1DI "VNx1BI") (VNx5x1DI "VNx1BI") (VNx6x1DI "VNx1BI") (VNx7x1DI "VNx1BI") (VNx8x1DI "VNx1BI")
-  (VNx2x32HF "VNx32BI") (VNx2x16HF "VNx16BI") (VNx3x16HF "VNx16BI") (VNx4x16HF "VNx16BI")
-  (VNx2x8HF "VNx8BI") (VNx3x8HF "VNx8BI") (VNx4x8HF "VNx8BI") (VNx5x8HF "VNx8BI") (VNx6x8HF "VNx8BI") (VNx7x8HF "VNx8BI") (VNx8x8HF "VNx8BI")
-  (VNx2x4HF "VNx4BI") (VNx3x4HF "VNx4BI") (VNx4x4HF "VNx4BI") (VNx5x4HF "VNx4BI") (VNx6x4HF "VNx4BI") (VNx7x4HF "VNx4BI") (VNx8x4HF "VNx4BI")
-  (VNx2x2HF "VNx2BI") (VNx3x2HF "VNx2BI") (VNx4x2HF "VNx2BI") (VNx5x2HF "VNx2BI") (VNx6x2HF "VNx2BI") (VNx7x2HF "VNx2BI") (VNx8x2HF "VNx2BI")
-  (VNx2x1HF "VNx1BI") (VNx3x1HF "VNx1BI") (VNx4x1HF "VNx1BI") (VNx5x1HF "VNx1BI") (VNx6x1HF "VNx1BI") (VNx7x1HF "VNx1BI") (VNx8x1HF "VNx1BI")
   (VNx2x16SF "VNx16BI") (VNx2x8SF "VNx8BI") (VNx3x8SF "VNx8BI") (VNx4x8SF "VNx8BI")
   (VNx2x4SF "VNx4BI") (VNx3x4SF "VNx4BI") (VNx4x4SF "VNx4BI") (VNx5x4SF "VNx4BI") (VNx6x4SF "VNx4BI") (VNx7x4SF "VNx4BI") (VNx8x4SF "VNx4BI")
   (VNx2x2SF "VNx2BI") (VNx3x2SF "VNx2BI") (VNx4x2SF "VNx2BI") (VNx5x2SF "VNx2BI") (VNx6x2SF "VNx2BI") (VNx7x2SF "VNx2BI") (VNx8x2SF "VNx2BI")
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 884e7435..674e602 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -425,14 +425,14 @@
 	 (eq_attr "type" "vldux,vldox,vialu,vshift,viminmax,vimul,vidiv,vsalu,\
 			  viwalu,viwmul,vnshift,vaalu,vsmul,vsshift,\
 			  vnclip,vicmp,vfalu,vfmul,vfminmax,vfdiv,vfwalu,vfwmul,\
-			  vfsgnj,vfcmp,vfmuladd,vslideup,vslidedown,vislide1up,\
+			  vfsgnj,vfcmp,vslideup,vslidedown,vislide1up,\
 			  vislide1down,vfslide1up,vfslide1down,vgather,viwmuladd,vfwmuladd,\
 			  vlsegds,vlsegdux,vlsegdox")
 	   (symbol_ref "INTVAL (operands[8])")
 	 (eq_attr "type" "vstux,vstox,vssegts,vssegtux,vssegtox")
 	   (symbol_ref "INTVAL (operands[5])")
 
-	 (eq_attr "type" "vimuladd")
+	 (eq_attr "type" "vimuladd,vfmuladd")
 	   (symbol_ref "INTVAL (operands[9])")
 
 	 (eq_attr "type" "vmsfs,vmidx,vcompress")
@@ -1063,6 +1063,7 @@
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1")
 	     (match_operand 3 "vector_length_operand"    "   rK")
+	     (match_operand 4 "const_int_operand"        "    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (match_operand:V 2 "register_operand"         "    vr")
@@ -1071,7 +1072,7 @@
   "vse<sew>.v\t%2,%0%p1"
   [(set_attr "type" "vste")
    (set_attr "mode" "<MODE>")
-   (set (attr "avl_type") (symbol_ref "riscv_vector::NONVLMAX"))
+   (set (attr "avl_type") (symbol_ref "INTVAL (operands[4])"))
    (set_attr "vl_op_idx" "3")])
 
 ;; vlm.v/vsm.v/vmclr.m/vmset.m.
@@ -1113,6 +1114,7 @@
 	  (unspec:VB
 	    [(match_operand:VB 1 "vector_all_trues_mask_operand" "Wc1")
 	     (match_operand 3 "vector_length_operand"            " rK")
+	     (match_operand 4 "const_int_operand"                "  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (match_operand:VB 2 "register_operand"                 " vr")
@@ -1121,7 +1123,7 @@
   "vsm.v\t%2,%0"
   [(set_attr "type" "vstm")
    (set_attr "mode" "<MODE>")
-   (set (attr "avl_type") (symbol_ref "riscv_vector::NONVLMAX"))
+   (set (attr "avl_type") (symbol_ref "INTVAL (operands[4])"))
    (set_attr "vl_op_idx" "3")])
 
 (define_insn "@pred_merge<mode>"
@@ -1433,6 +1435,7 @@
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1")
 	     (match_operand 4 "vector_length_operand"    "   rK")
+	     (match_operand 5 "const_int_operand"        "    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (unspec:V
@@ -1442,7 +1445,8 @@
   "TARGET_VECTOR"
   "vsse<sew>.v\t%3,%0,%z2%p1"
   [(set_attr "type" "vsts")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set (attr "avl_type") (symbol_ref "INTVAL (operands[5])"))])
 
 ;; -------------------------------------------------------------------------------
 ;; ---- Predicated indexed loads/stores
diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index d45fb13..e286bf5 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -22,7 +22,7 @@
 ;; load mode is DI result mode is clobber compare mode is CC extend is none
 (define_insn_and_split "*ld_cmpdi_cr0_DI_clobber_CC_none"
   [(set (match_operand:CC 2 "cc_reg_operand" "=x")
-        (compare:CC (match_operand:DI 1 "ds_form_mem_operand" "m")
+        (compare:CC (match_operand:DI 1 "non_update_memory_operand" "YZ")
                     (match_operand:DI 3 "const_m1_to_1_operand" "n")))
    (clobber (match_scratch:DI 0 "=r"))]
   "(TARGET_P10_FUSION)"
@@ -43,7 +43,7 @@
 ;; load mode is DI result mode is clobber compare mode is CCUNS extend is none
 (define_insn_and_split "*ld_cmpldi_cr0_DI_clobber_CCUNS_none"
   [(set (match_operand:CCUNS 2 "cc_reg_operand" "=x")
-        (compare:CCUNS (match_operand:DI 1 "ds_form_mem_operand" "m")
+        (compare:CCUNS (match_operand:DI 1 "non_update_memory_operand" "YZ")
                        (match_operand:DI 3 "const_0_to_1_operand" "n")))
    (clobber (match_scratch:DI 0 "=r"))]
   "(TARGET_P10_FUSION)"
@@ -64,7 +64,7 @@
 ;; load mode is DI result mode is DI compare mode is CC extend is none
 (define_insn_and_split "*ld_cmpdi_cr0_DI_DI_CC_none"
   [(set (match_operand:CC 2 "cc_reg_operand" "=x")
-        (compare:CC (match_operand:DI 1 "ds_form_mem_operand" "m")
+        (compare:CC (match_operand:DI 1 "non_update_memory_operand" "YZ")
                     (match_operand:DI 3 "const_m1_to_1_operand" "n")))
    (set (match_operand:DI 0 "gpc_reg_operand" "=r") (match_dup 1))]
   "(TARGET_P10_FUSION)"
@@ -85,7 +85,7 @@
 ;; load mode is DI result mode is DI compare mode is CCUNS extend is none
 (define_insn_and_split "*ld_cmpldi_cr0_DI_DI_CCUNS_none"
   [(set (match_operand:CCUNS 2 "cc_reg_operand" "=x")
-        (compare:CCUNS (match_operand:DI 1 "ds_form_mem_operand" "m")
+        (compare:CCUNS (match_operand:DI 1 "non_update_memory_operand" "YZ")
                        (match_operand:DI 3 "const_0_to_1_operand" "n")))
    (set (match_operand:DI 0 "gpc_reg_operand" "=r") (match_dup 1))]
   "(TARGET_P10_FUSION)"
@@ -104,17 +104,17 @@
 
 ;; load-cmpi fusion pattern generated by gen_ld_cmpi_p10
 ;; load mode is SI result mode is clobber compare mode is CC extend is none
-(define_insn_and_split "*lwa_cmpdi_cr0_SI_clobber_CC_none"
+(define_insn_and_split "*lwz_cmpwi_cr0_SI_clobber_CC_none"
   [(set (match_operand:CC 2 "cc_reg_operand" "=x")
-        (compare:CC (match_operand:SI 1 "ds_form_mem_operand" "m")
+        (compare:CC (match_operand:SI 1 "non_update_memory_operand" "m")
                     (match_operand:SI 3 "const_m1_to_1_operand" "n")))
    (clobber (match_scratch:SI 0 "=r"))]
   "(TARGET_P10_FUSION)"
-  "lwa%X1 %0,%1\;cmpdi %2,%0,%3"
+  "lwz%X1 %0,%1\;cmpwi %2,%0,%3"
   "&& reload_completed
    && (cc_reg_not_cr0_operand (operands[2], CCmode)
        || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0),
-                                      SImode, NON_PREFIXED_DS))"
+                                      SImode, NON_PREFIXED_D))"
   [(set (match_dup 0) (match_dup 1))
    (set (match_dup 2)
         (compare:CC (match_dup 0) (match_dup 3)))]
@@ -146,17 +146,17 @@
 
 ;; load-cmpi fusion pattern generated by gen_ld_cmpi_p10
 ;; load mode is SI result mode is SI compare mode is CC extend is none
-(define_insn_and_split "*lwa_cmpdi_cr0_SI_SI_CC_none"
+(define_insn_and_split "*lwz_cmpwi_cr0_SI_SI_CC_none"
   [(set (match_operand:CC 2 "cc_reg_operand" "=x")
-        (compare:CC (match_operand:SI 1 "ds_form_mem_operand" "m")
+        (compare:CC (match_operand:SI 1 "non_update_memory_operand" "m")
                     (match_operand:SI 3 "const_m1_to_1_operand" "n")))
    (set (match_operand:SI 0 "gpc_reg_operand" "=r") (match_dup 1))]
   "(TARGET_P10_FUSION)"
-  "lwa%X1 %0,%1\;cmpdi %2,%0,%3"
+  "lwz%X1 %0,%1\;cmpwi %2,%0,%3"
   "&& reload_completed
    && (cc_reg_not_cr0_operand (operands[2], CCmode)
        || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0),
-                                      SImode, NON_PREFIXED_DS))"
+                                      SImode, NON_PREFIXED_D))"
   [(set (match_dup 0) (match_dup 1))
    (set (match_dup 2)
         (compare:CC (match_dup 0) (match_dup 3)))]
@@ -190,7 +190,7 @@
 ;; load mode is SI result mode is EXTSI compare mode is CC extend is sign
 (define_insn_and_split "*lwa_cmpdi_cr0_SI_EXTSI_CC_sign"
   [(set (match_operand:CC 2 "cc_reg_operand" "=x")
-        (compare:CC (match_operand:SI 1 "ds_form_mem_operand" "m")
+        (compare:CC (match_operand:SI 1 "non_update_memory_operand" "YZ")
                     (match_operand:SI 3 "const_m1_to_1_operand" "n")))
    (set (match_operand:EXTSI 0 "gpc_reg_operand" "=r") (sign_extend:EXTSI (match_dup 1)))]
   "(TARGET_P10_FUSION)"
@@ -205,6 +205,7 @@
   ""
   [(set_attr "type" "fused_load_cmpi")
    (set_attr "cost" "8")
+   (set_attr "sign_extend" "yes")
    (set_attr "length" "8")])
 
 ;; load-cmpi fusion pattern generated by gen_ld_cmpi_p10
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 82e8f86..4d1f825 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -61,20 +61,31 @@ sub gen_ld_cmpi_p10_one
   my $mempred = "non_update_memory_operand";
   my $extend;
 
+  # We need to special case lwa.  The prefixed_load_p function in rs6000.cc
+  # (which determines if a load instruction is prefixed) uses the fact that the
+  # register mode is different from the memory mode, and that the sign_extend
+  # attribute is set to use DS-form rules for the address instead of D-form.
+  # If the register size is the same, prefixed_load_p assumes we are doing a
+  # lwz.  We change to use an lwz and word compare if we don't need to sign
+  # extend the SImode value.  Otherwise if we need the value, we need to
+  # make sure the insn is marked as ds-form.
+  my $cmp_size_char = ($lmode eq "SI"
+		       && $ccmode eq "CC"
+		       && $result !~ /^EXT|^DI$/) ? "w" : "d";
+
   if ($ccmode eq "CC") {
     # ld and lwa are both DS-FORM.
-    ($lmode =~ /^[SD]I$/) and $np = "NON_PREFIXED_DS";
-    ($lmode =~ /^[SD]I$/) and $mempred = "ds_form_mem_operand";
+    ($lmode eq "DI") and $np = "NON_PREFIXED_DS";
+    ($lmode eq "SI" && $cmp_size_char eq "d") and $np = "NON_PREFIXED_DS";
   } else {
     if ($lmode eq "DI") {
       # ld is DS-form, but lwz is not.
       $np = "NON_PREFIXED_DS";
-      $mempred = "ds_form_mem_operand";
     }
   }
 
   my $cmpl = ($ccmode eq "CC") ? "" : "l";
-  my $echr = ($ccmode eq "CC") ? "a" : "z";
+  my $echr = ($ccmode eq "CC" && $cmp_size_char eq "d") ? "a" : "z";
   if ($lmode eq "DI") { $echr = ""; }
   my $constpred = ($ccmode eq "CC") ? "const_m1_to_1_operand"
   				    : "const_0_to_1_operand";
@@ -91,12 +102,15 @@ sub gen_ld_cmpi_p10_one
   }
 
   my $ldst = mode_to_ldst_char($lmode);
+
+  # DS-form addresses need YZ, and not m.
+  my $constraint = ($np eq "NON_PREFIXED_DS") ? "YZ" : "m";
   print <<HERE;
 ;; load-cmpi fusion pattern generated by gen_ld_cmpi_p10
 ;; load mode is $lmode result mode is $result compare mode is $ccmode extend is $extend
-(define_insn_and_split "*l${ldst}${echr}_cmp${cmpl}di_cr0_${lmode}_${result}_${ccmode}_${extend}"
+(define_insn_and_split "*l${ldst}${echr}_cmp${cmpl}${cmp_size_char}i_cr0_${lmode}_${result}_${ccmode}_${extend}"
   [(set (match_operand:${ccmode} 2 "cc_reg_operand" "=x")
-        (compare:${ccmode} (match_operand:${lmode} 1 "${mempred}" "m")
+        (compare:${ccmode} (match_operand:${lmode} 1 "${mempred}" "${constraint}")
 HERE
   print "   " if $ccmode eq "CCUNS";
 print <<HERE;
@@ -119,7 +133,7 @@ HERE
 
   print <<HERE;
   "(TARGET_P10_FUSION)"
-  "l${ldst}${echr}%X1 %0,%1\\;cmp${cmpl}di %2,%0,%3"
+  "l${ldst}${echr}%X1 %0,%1\\;cmp${cmpl}${cmp_size_char}i %2,%0,%3"
   "&& reload_completed
    && (cc_reg_not_cr0_operand (operands[2], CCmode)
        || !address_is_non_pfx_d_or_x (XEXP (operands[1], 0),
@@ -140,6 +154,15 @@ HERE
   ""
   [(set_attr "type" "fused_load_cmpi")
    (set_attr "cost" "8")
+HERE
+
+  if ($lmode eq "SI" && $ccmode eq "CC" && $cmp_size_char eq "d") {
+    # prefixed_load_p needs the sign_extend attribute to validate lwa as a
+    # DS-form instruction instead of D-form.
+    print "   (set_attr \"sign_extend\" \"yes\")\n";
+  }
+
+  print <<HERE
    (set_attr "length" "8")])
 
 HERE
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index a16ee30..6b56483 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1125,20 +1125,6 @@
   return INTVAL (offset) % 4 == 0;
 })
 
-;; Return 1 if the operand is a memory operand that has a valid address for
-;; a DS-form instruction. I.e. the address has to be either just a register,
-;; or register + const where the two low order bits of const are zero.
-(define_predicate "ds_form_mem_operand"
-  (match_code "subreg,mem")
-{
-  if (!any_memory_operand (op, mode))
-    return false;
-
-  rtx addr = XEXP (op, 0);
-
-  return address_to_insn_form (addr, mode, NON_PREFIXED_DS) == INSN_FORM_DS;
-})
-
 ;; Return 1 if the operand, used inside a MEM, is a SYMBOL_REF.
 (define_predicate "symbol_ref_operand"
   (and (match_code "symbol_ref")
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index b0db8ae..75c5e5f 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -287,7 +287,7 @@
 ;; Whether this insn has a prefixed form and a non-prefixed form.
 (define_attr "maybe_prefixed" "no,yes"
   (if_then_else (eq_attr "type" "load,fpload,vecload,store,fpstore,vecstore,
-  				 integer,add")
+  				 integer,add,fused_load_cmpi")
 		(const_string "yes")
 		(const_string "no")))
 
@@ -302,7 +302,7 @@
 	      (eq_attr "maybe_prefixed" "no"))
 	 (const_string "no")
 
-	 (eq_attr "type" "load,fpload,vecload")
+	 (eq_attr "type" "load,fpload,vecload,fused_load_cmpi")
 	 (if_then_else (match_test "prefixed_load_p (insn)")
 		       (const_string "yes")
 		       (const_string "no"))
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 9284477..d9f10542 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -13706,8 +13706,10 @@ s390_encode_section_info (tree decl, rtx rtl, int first)
     {
       /* Store the alignment to be able to check if we can use
 	 a larl/load-relative instruction.  We only handle the cases
-	 that can go wrong (i.e. no FUNC_DECLs).  */
-      if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
+	 that can go wrong (i.e. no FUNC_DECLs).
+	 All symbols without an explicit alignment are assumed to be 2
+	 byte aligned as mandated by our ABI.  */
+      if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) % 16)
 	SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
       else if (DECL_ALIGN (decl) % 32)
 	SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 4622dba..5cb1795 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -9269,7 +9269,7 @@
 		 (match_operand:SF 3 "arith_reg_operand" "0")))
    (clobber (reg:SI FPSCR_STAT_REG))
    (use (reg:SI FPSCR_MODES_REG))]
-  "TARGET_SH2E && flag_fp_contract_mode != FP_CONTRACT_OFF"
+  "TARGET_SH2E && flag_fp_contract_mode == FP_CONTRACT_FAST"
   "fmac	%1,%2,%0"
   "&& can_create_pseudo_p ()"
   [(parallel [(set (match_dup 0)