Merge from trunk revision f3e9c98a9f40fc24bb4ecef6aaa94ff799c8d587.

author: Ian Lance Taylor <iant@golang.org> 2021-03-17 12:19:01 -0700
committer: Ian Lance Taylor <iant@golang.org> 2021-03-17 12:19:01 -0700
commit: f10c7c4596dda99d2ee872c995ae4aeda65adbdf (patch)
tree: a3451277603bc8fbe2eddce5f4ad63f790129a01 /gcc/config
parent: bc636c218f2b28da06cd1404d5b35d1f8cc43fd1 (diff)
parent: f3e9c98a9f40fc24bb4ecef6aaa94ff799c8d587 (diff)
download: gcc-f10c7c4596dda99d2ee872c995ae4aeda65adbdf.zip
gcc-f10c7c4596dda99d2ee872c995ae4aeda65adbdf.tar.gz
gcc-f10c7c4596dda99d2ee872c995ae4aeda65adbdf.tar.bz2
18 files changed, 508 insertions, 326 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 25ab866..acdea2a 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1954,7 +1954,7 @@ aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
     return target;
 
   rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM);
-  rtx cmp_rtx = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx);
+  rtx cmp_rtx = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
   emit_insn (gen_aarch64_cstoresi (target, cmp_rtx, cc_reg));
   return target;
 }
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 8a86889..7838d99 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -23372,7 +23372,7 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
 					struct cgraph_simd_clone *clonei,
 					tree base_type, int num)
 {
-  tree t, ret_type, arg_type;
+  tree t, ret_type;
   unsigned int elt_bits, count;
   unsigned HOST_WIDE_INT const_simdlen;
   poly_uint64 vec_bits;
@@ -23412,11 +23412,17 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
       return 0;
     }
 
-  for (t = DECL_ARGUMENTS (node->decl); t; t = DECL_CHAIN (t))
+  int i;
+  tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
+  bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
+
+  for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
+       t && t != void_list_node; t = TREE_CHAIN (t), i++)
     {
-      arg_type = TREE_TYPE (t);
+      tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
 
-      if (!currently_supported_simd_type (arg_type, base_type))
+      if (clonei->args[i].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM
+	  && !currently_supported_simd_type (arg_type, base_type))
 	{
 	  if (TYPE_SIZE (arg_type) != TYPE_SIZE (base_type))
 	    warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index a8db33e..eaa582d 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -111,6 +111,8 @@
 ;;	otherwise any SSE register
 ;;  w	any EVEX encodable SSE register for AVX512BW with TARGET_AVX512VL
 ;;	target, otherwise any SSE register.
+;;  W   any EVEX encodable SSE register for AVX512BW target,
+;;	otherwise any SSE register.
 
 (define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
  "First SSE register (@code{%xmm0}).")
@@ -151,6 +153,10 @@
  "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
  "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target, otherwise any SSE register.")
 
+(define_register_constraint "YW"
+ "TARGET_AVX512BW ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
+ "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW target, otherwise any SSE register.")
+
 ;; We use the B prefix to denote any number of internal operands:
 ;;  f  FLAGS_REG
 ;;  g  GOT memory operand.
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 02d3142..ac69eed 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -1348,9 +1348,10 @@ ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
 	  if (regno0 != regno2)
 	    emit_insn (gen_rtx_SET (target, parts.index));
 
-	  /* Use shift for scaling.  */
-	  ix86_emit_binop (ASHIFT, mode, target,
-			   GEN_INT (exact_log2 (parts.scale)));
+	  /* Use shift for scaling, but emit it as MULT instead
+	     to avoid it being immediately peephole2 optimized back
+	     into lea.  */
+	  ix86_emit_binop (MULT, mode, target, GEN_INT (parts.scale));
 
 	  if (parts.base)
 	    ix86_emit_binop (PLUS, mode, target, parts.base);
@@ -13210,6 +13211,10 @@ rdseed_step:
 
       return 0;
 
+    case IX86_BUILTIN_VZEROUPPER:
+      cfun->machine->has_explicit_vzeroupper = true;
+      break;
+
     default:
       break;
     }
diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c
index 41891c9..77783a1 100644
--- a/gcc/config/i386/i386-features.c
+++ b/gcc/config/i386/i386-features.c
@@ -1837,19 +1837,22 @@ ix86_add_reg_usage_to_vzerouppers (void)
 static unsigned int
 rest_of_handle_insert_vzeroupper (void)
 {
-  int i;
-
-  /* vzeroupper instructions are inserted immediately after reload to
-     account for possible spills from 256bit or 512bit registers.  The pass
-     reuses mode switching infrastructure by re-running mode insertion
-     pass, so disable entities that have already been processed.  */
-  for (i = 0; i < MAX_386_ENTITIES; i++)
-    ix86_optimize_mode_switching[i] = 0;
+  if (TARGET_VZEROUPPER
+      && flag_expensive_optimizations
+      && !optimize_size)
+    {
+      /* vzeroupper instructions are inserted immediately after reload to
+	 account for possible spills from 256bit or 512bit registers.  The pass
+	 reuses mode switching infrastructure by re-running mode insertion
+	 pass, so disable entities that have already been processed.  */
+      for (int i = 0; i < MAX_386_ENTITIES; i++)
+	ix86_optimize_mode_switching[i] = 0;
 
-  ix86_optimize_mode_switching[AVX_U128] = 1;
+      ix86_optimize_mode_switching[AVX_U128] = 1;
 
-  /* Call optimize_mode_switching.  */
-  g->get_passes ()->execute_pass_mode_switching ();
+      /* Call optimize_mode_switching.  */
+      g->get_passes ()->execute_pass_mode_switching ();
+    }
   ix86_add_reg_usage_to_vzerouppers ();
   return 0;
 }
@@ -1880,8 +1883,10 @@ public:
   virtual bool gate (function *)
     {
       return TARGET_AVX
-	     && TARGET_VZEROUPPER && flag_expensive_optimizations
-	     && !optimize_size;
+	     && ((TARGET_VZEROUPPER
+		  && flag_expensive_optimizations
+		  && !optimize_size)
+		 || cfun->machine->has_explicit_vzeroupper);
     }
 
   virtual unsigned int execute (function *)
diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
index e93935f..7865bc1 100644
--- a/gcc/config/i386/i386-options.c
+++ b/gcc/config/i386/i386-options.c
@@ -743,7 +743,7 @@ static const struct processor_costs *processor_cost_table[] =
   &btver2_cost,
   &znver1_cost,
   &znver2_cost,
-  &znver2_cost
+  &znver3_cost
 };
 
 /* Guarantee that the array is aligned with enum processor_type.  */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 2603333..540d4f4 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -10265,15 +10265,20 @@ darwin_local_data_pic (rtx disp)
 	  && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
 }
 
-/* True if operand X should be loaded from GOT.  */
+/* True if the function symbol operand X should be loaded from GOT.
+
+   NB: In 32-bit mode, only non-PIC is allowed in inline assembly
+   statements, since a PIC register could not be available at the
+   call site.  */
 
 bool
 ix86_force_load_from_GOT_p (rtx x)
 {
-  return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X)
+  return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X))
 	  && !TARGET_PECOFF && !TARGET_MACHO
-	  && !flag_pic
+	  && (!flag_pic || this_is_asm_operands)
 	  && ix86_cmodel != CM_LARGE
+	  && ix86_cmodel != CM_LARGE_PIC
 	  && GET_CODE (x) == SYMBOL_REF
 	  && SYMBOL_REF_FUNCTION_P (x)
 	  && (!flag_plt
@@ -12701,7 +12706,8 @@ print_reg (rtx x, int code, FILE *file)
    y -- print "st(0)" instead of "st" as a register.
    d -- print duplicated register operand for AVX instruction.
    D -- print condition for SSE cmp instruction.
-   P -- if PIC, print an @PLT suffix.
+   P -- if PIC, print an @PLT suffix.  For -fno-plt, load function
+	address from GOT.
    p -- print raw symbol name.
    X -- don't print any sort of PIC '@' suffix for a symbol.
    & -- print some in-use local-dynamic symbol name.
@@ -13445,7 +13451,23 @@ ix86_print_operand (FILE *file, rtx x, int code)
 	  x = const0_rtx;
 	}
 
-      if (code != 'P' && code != 'p')
+      if (code == 'P')
+	{
+	  if (ix86_force_load_from_GOT_p (x))
+	    {
+	      /* For inline assembly statement, load function address
+		 from GOT with 'P' operand modifier to avoid PLT.  */
+	      x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
+				  (TARGET_64BIT
+				   ? UNSPEC_GOTPCREL
+				   : UNSPEC_GOT));
+	      x = gen_rtx_CONST (Pmode, x);
+	      x = gen_const_mem (Pmode, x);
+	      ix86_print_operand (file, x, 'A');
+	      return;
+	    }
+	}
+      else if (code != 'p')
 	{
 	  if (CONST_INT_P (x))
 	    {
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 69fddca..4874910 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2941,6 +2941,10 @@ struct GTY(()) machine_function {
   /* True if the function needs a stack frame.  */
   BOOL_BITFIELD stack_frame_required : 1;
 
+  /* True if __builtin_ia32_vzeroupper () has been expanded in current
+     function.  */
+  BOOL_BITFIELD has_explicit_vzeroupper : 1;
+
   /* The largest alignment, in bytes, of stack slot actually used.  */
   unsigned int max_used_stack_alignment;
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 2820f6d..9ff35d9 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -5219,6 +5219,18 @@
 
   DONE;
 })
+
+;; ix86_split_lea_for_addr emits the shifts as MULT to avoid it from being
+;; peephole2 optimized back into a lea.  Split that into the shift during
+;; the following split pass.
+(define_split
+  [(set (match_operand:SWI48 0 "general_reg_operand")
+	(mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
+              (clobber (reg:CC FLAGS_REG))])]
+  "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
 
 ;; Add instructions
 
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index c6a2882..4c2b724 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -61,6 +61,9 @@
 (define_mode_attr mmxdoublemode
   [(V8QI "V8HI") (V4HI "V4SI")])
 
+(define_mode_attr Yv_Yw
+  [(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")])
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Move patterns
@@ -1152,10 +1155,10 @@
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*mmx_<insn><mode>3"
-  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,<Yv_Yw>")
         (plusminus:MMXMODEI8
-	  (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "<comm>0,0,Yv")
-	  (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,Yv")))]
+	  (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "<comm>0,0,<Yv_Yw>")
+	  (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,<Yv_Yw>")))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "@
@@ -1176,10 +1179,10 @@
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*mmx_<insn><mode>3"
-  [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yw")
         (sat_plusminus:MMXMODE12
-	  (match_operand:MMXMODE12 1 "register_mmxmem_operand" "<comm>0,0,Yv")
-	  (match_operand:MMXMODE12 2 "register_mmxmem_operand" "ym,x,Yv")))]
+	  (match_operand:MMXMODE12 1 "register_mmxmem_operand" "<comm>0,0,Yw")
+	  (match_operand:MMXMODE12 2 "register_mmxmem_operand" "ym,x,Yw")))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "@
@@ -1206,9 +1209,9 @@
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_mulv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
-        (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
-		   (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
+        (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")
+		   (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && ix86_binary_operator_ok (MULT, V4HImode, operands)"
   "@
@@ -1234,14 +1237,14 @@
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_smulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
 	(truncate:V4HI
 	  (lshiftrt:V4SI
 	    (mult:V4SI
 	      (sign_extend:V4SI
-		(match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
+		(match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw"))
 	      (sign_extend:V4SI
-		(match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
+		(match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))
 	    (const_int 16))))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && ix86_binary_operator_ok (MULT, V4HImode, operands)"
@@ -1269,14 +1272,14 @@
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_umulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
 	(truncate:V4HI
 	  (lshiftrt:V4SI
 	    (mult:V4SI
 	      (zero_extend:V4SI
-		(match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
+		(match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw"))
 	      (zero_extend:V4SI
-		(match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
+		(match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))
 	  (const_int 16))))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && (TARGET_SSE || TARGET_3DNOW_A)
@@ -1313,16 +1316,16 @@
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_pmaddwd"
-  [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yw")
         (plus:V2SI
 	  (mult:V2SI
 	    (sign_extend:V2SI
 	      (vec_select:V2HI
-		(match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
+		(match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")
 		(parallel [(const_int 0) (const_int 2)])))
 	    (sign_extend:V2SI
 	      (vec_select:V2HI
-		(match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")
+		(match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")
 		(parallel [(const_int 0) (const_int 2)]))))
 	  (mult:V2SI
 	    (sign_extend:V2SI
@@ -1432,10 +1435,10 @@
   "ix86_fixup_binary_operands_no_copy (<CODE>, V4HImode, operands);")
 
 (define_insn "*mmx_<code>v4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
         (smaxmin:V4HI
-	  (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
-	  (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+	  (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")
+	  (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && (TARGET_SSE || TARGET_3DNOW_A)
    && ix86_binary_operator_ok (<CODE>, V4HImode, operands)"
@@ -1466,10 +1469,10 @@
   "ix86_fixup_binary_operands_no_copy (<CODE>, V8QImode, operands);")
 
 (define_insn "*mmx_<code>v8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
         (umaxmin:V8QI
-	  (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv")
-	  (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+	  (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yw")
+	  (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && (TARGET_SSE || TARGET_3DNOW_A)
    && ix86_binary_operator_ok (<CODE>, V8QImode, operands)"
@@ -1483,10 +1486,10 @@
    (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_ashr<mode>3"
-  [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,<Yv_Yw>")
         (ashiftrt:MMXMODE24
-	  (match_operand:MMXMODE24 1 "register_operand" "0,0,Yv")
-	  (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
+	  (match_operand:MMXMODE24 1 "register_operand" "0,0,<Yv_Yw>")
+	  (match_operand:DI 2 "nonmemory_operand" "yN,xN,<Yv_Yw>N")))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "@
    psra<mmxvecsize>\t{%2, %0|%0, %2}
@@ -1509,10 +1512,10 @@
   "TARGET_MMX_WITH_SSE")
 
 (define_insn "mmx_<insn><mode>3"
-  [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,<Yv_Yw>")
         (any_lshift:MMXMODE248
-	  (match_operand:MMXMODE248 1 "register_operand" "0,0,Yv")
-	  (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
+	  (match_operand:MMXMODE248 1 "register_operand" "0,0,<Yv_Yw>")
+	  (match_operand:DI 2 "nonmemory_operand" "yN,xN,<Yv_Yw>N")))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "@
    p<vshift><mmxvecsize>\t{%2, %0|%0, %2}
@@ -1549,10 +1552,10 @@
   "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
 
 (define_insn "*mmx_eq<mode>3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x")
         (eq:MMXMODEI
-	  (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv")
-	  (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+	  (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,x")
+	  (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x")))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
   "@
@@ -1565,10 +1568,10 @@
    (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_gt<mode>3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x")
         (gt:MMXMODEI
-	  (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
-	  (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+	  (match_operand:MMXMODEI 1 "register_operand" "0,0,x")
+	  (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x")))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "@
    pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}
@@ -1594,19 +1597,20 @@
   "operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));")
 
 (define_insn "mmx_andnot<mode>3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
 	(and:MMXMODEI
-	  (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv"))
-	  (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+	  (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,x,v"))
+	  (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x,v")))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "@
    pandn\t{%2, %0|%0, %2}
    pandn\t{%2, %0|%0, %2}
-   vpandn\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "*,sse2_noavx,avx")
-   (set_attr "mmx_isa" "native,*,*")
-   (set_attr "type" "mmxadd,sselog,sselog")
-   (set_attr "mode" "DI,TI,TI")])
+   vpandn\t{%2, %1, %0|%0, %1, %2}
+   vpandnd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,sse2_noavx,avx,avx512vl")
+   (set_attr "mmx_isa" "native,*,*,*")
+   (set_attr "type" "mmxadd,sselog,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI,TI")])
 
 (define_expand "mmx_<code><mode>3"
   [(set (match_operand:MMXMODEI 0 "register_operand")
@@ -1625,20 +1629,21 @@
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*mmx_<code><mode>3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
         (any_logic:MMXMODEI
-	  (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv")
-	  (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+	  (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,x,v")
+	  (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x,v")))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "@
    p<logic>\t{%2, %0|%0, %2}
    p<logic>\t{%2, %0|%0, %2}
-   vp<logic>\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "*,sse2_noavx,avx")
-   (set_attr "mmx_isa" "native,*,*")
-   (set_attr "type" "mmxadd,sselog,sselog")
-   (set_attr "mode" "DI,TI,TI")])
+   vp<logic>\t{%2, %1, %0|%0, %1, %2}
+   vp<logic>d\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,sse2_noavx,avx,avx512vl")
+   (set_attr "mmx_isa" "native,*,*,*")
+   (set_attr "type" "mmxadd,sselog,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI,TI")])
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
@@ -1652,12 +1657,12 @@
 (define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")])
 
 (define_insn_and_split "mmx_pack<s_trunsuffix>swb"
-  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
 	(vec_concat:V8QI
 	  (any_s_truncate:V4QI
-	    (match_operand:V4HI 1 "register_operand" "0,0,Yv"))
+	    (match_operand:V4HI 1 "register_operand" "0,0,Yw"))
 	  (any_s_truncate:V4QI
-	    (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))))]
+	    (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "@
    pack<s_trunsuffix>swb\t{%2, %0|%0, %2}
@@ -1672,12 +1677,12 @@
    (set_attr "mode" "DI,TI,TI")])
 
 (define_insn_and_split "mmx_packssdw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
 	(vec_concat:V4HI
 	  (ss_truncate:V2HI
-	    (match_operand:V2SI 1 "register_operand" "0,0,Yv"))
+	    (match_operand:V2SI 1 "register_operand" "0,0,Yw"))
 	  (ss_truncate:V2HI
-	    (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))))]
+	    (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yw"))))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "@
    packssdw\t{%2, %0|%0, %2}
@@ -1692,11 +1697,11 @@
    (set_attr "mode" "DI,TI,TI")])
 
 (define_insn_and_split "mmx_punpckhbw"
-  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
 	(vec_select:V8QI
 	  (vec_concat:V16QI
-	    (match_operand:V8QI 1 "register_operand" "0,0,Yv")
-	    (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))
+	    (match_operand:V8QI 1 "register_operand" "0,0,Yw")
+	    (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw"))
           (parallel [(const_int 4) (const_int 12)
                      (const_int 5) (const_int 13)
                      (const_int 6) (const_int 14)
@@ -1715,11 +1720,11 @@
    (set_attr "mode" "DI,TI,TI")])
 
 (define_insn_and_split "mmx_punpcklbw"
-  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
 	(vec_select:V8QI
 	  (vec_concat:V16QI
-	    (match_operand:V8QI 1 "register_operand" "0,0,Yv")
-	    (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))
+	    (match_operand:V8QI 1 "register_operand" "0,0,Yw")
+	    (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw"))
           (parallel [(const_int 0) (const_int 8)
                      (const_int 1) (const_int 9)
                      (const_int 2) (const_int 10)
@@ -1738,11 +1743,11 @@
    (set_attr "mode" "DI,TI,TI")])
 
 (define_insn_and_split "mmx_punpckhwd"
-  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
 	(vec_select:V4HI
 	  (vec_concat:V8HI
-	    (match_operand:V4HI 1 "register_operand" "0,0,Yv")
-	    (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
+	    (match_operand:V4HI 1 "register_operand" "0,0,Yw")
+	    (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))
           (parallel [(const_int 2) (const_int 6)
                      (const_int 3) (const_int 7)])))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
@@ -1759,11 +1764,11 @@
    (set_attr "mode" "DI,TI,TI")])
 
 (define_insn_and_split "mmx_punpcklwd"
-  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
 	(vec_select:V4HI
 	  (vec_concat:V8HI
-	    (match_operand:V4HI 1 "register_operand" "0,0,Yv")
-	    (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
+	    (match_operand:V4HI 1 "register_operand" "0,0,Yw")
+	    (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))
           (parallel [(const_int 0) (const_int 4)
                      (const_int 1) (const_int 5)])))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
@@ -1866,11 +1871,11 @@
 })
 
 (define_insn "*mmx_pinsrw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,YW")
         (vec_merge:V4HI
           (vec_duplicate:V4HI
             (match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm"))
-	  (match_operand:V4HI 1 "register_operand" "0,0,Yv")
+	  (match_operand:V4HI 1 "register_operand" "0,0,YW")
           (match_operand:SI 3 "const_int_operand")))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && (TARGET_SSE || TARGET_3DNOW_A)
@@ -1902,11 +1907,11 @@
    (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "*mmx_pinsrb"
-  [(set (match_operand:V8QI 0 "register_operand" "=x,Yv")
+  [(set (match_operand:V8QI 0 "register_operand" "=x,YW")
         (vec_merge:V8QI
           (vec_duplicate:V8QI
             (match_operand:QI 2 "nonimmediate_operand" "rm,rm"))
-	  (match_operand:V8QI 1 "register_operand" "0,Yv")
+	  (match_operand:V8QI 1 "register_operand" "0,YW")
           (match_operand:SI 3 "const_int_operand")))]
   "TARGET_MMX_WITH_SSE && TARGET_SSE4_1
    && ((unsigned) exact_log2 (INTVAL (operands[3]))
@@ -1940,7 +1945,7 @@
 (define_insn "*mmx_pextrw"
   [(set (match_operand:HI 0 "register_sse4nonimm_operand" "=r,r,m")
 	(vec_select:HI
-	  (match_operand:V4HI 1 "register_operand" "y,Yv,Yv")
+	  (match_operand:V4HI 1 "register_operand" "y,YW,YW")
 	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && (TARGET_SSE || TARGET_3DNOW_A)"
@@ -1959,7 +1964,7 @@
   [(set (match_operand:SWI48 0 "register_operand" "=r,r")
 	(zero_extend:SWI48
 	  (vec_select:HI
-	    (match_operand:V4HI 1 "register_operand" "y,Yv")
+	    (match_operand:V4HI 1 "register_operand" "y,YW")
 	    (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]))))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && (TARGET_SSE || TARGET_3DNOW_A)"
@@ -1976,7 +1981,7 @@
 (define_insn "*mmx_pextrb"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=r,m")
 	(vec_select:QI
-	  (match_operand:V8QI 1 "register_operand" "Yv,Yv")
+	  (match_operand:V8QI 1 "register_operand" "YW,YW")
 	  (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n,n")])))]
   "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
   "@
@@ -1993,7 +1998,7 @@
   [(set (match_operand:SWI248 0 "register_operand" "=r")
 	(zero_extend:SWI248
 	  (vec_select:QI
-	    (match_operand:V8QI 1 "register_operand" "Yv")
+	    (match_operand:V8QI 1 "register_operand" "YW")
 	    (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
   "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
   "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
@@ -2394,15 +2399,15 @@
 })
 
 (define_insn "*mmx_uavgv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
 	(truncate:V8QI
 	  (lshiftrt:V8HI
 	    (plus:V8HI
 	      (plus:V8HI
 		(zero_extend:V8HI
-		  (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv"))
+		  (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yw"))
 		(zero_extend:V8HI
-		  (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")))
+		  (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")))
 	      (const_vector:V8HI [(const_int 1) (const_int 1)
 				  (const_int 1) (const_int 1)
 				  (const_int 1) (const_int 1)
@@ -2440,15 +2445,15 @@
    (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "*mmx_uavgv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
 	(truncate:V4HI
 	  (lshiftrt:V4SI
 	    (plus:V4SI
 	      (plus:V4SI
 		(zero_extend:V4SI
-		  (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
+		  (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw"))
 		(zero_extend:V4SI
-		  (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
+		  (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))
 	      (const_vector:V4SI [(const_int 1) (const_int 1)
 				  (const_int 1) (const_int 1)]))
 	    (const_int 1))))]
@@ -2483,9 +2488,9 @@
 })
 
 (define_insn "mmx_psadbw"
-  [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
-        (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
-		      (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
+  [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yw")
+        (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yw")
+		      (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")]
 		     UNSPEC_PSADBW))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && (TARGET_SSE || TARGET_3DNOW_A)"
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ca4372d..43e4d57 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -566,7 +566,8 @@
    (V4SI "v") (V8SI "v") (V16SI "v")
    (V2DI "v") (V4DI "v") (V8DI "v")
    (V4SF "v") (V8SF "v") (V16SF "v")
-   (V2DF "v") (V4DF "v") (V8DF "v")])
+   (V2DF "v") (V4DF "v") (V8DF "v")
+   (TI "Yw") (V1TI "Yw") (V2TI "Yw") (V4TI "v")])
 
 (define_mode_attr sse2_avx_avx512f
   [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
@@ -11736,10 +11737,10 @@
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*<sse2_avx2>_<insn><mode>3<mask_name>"
-  [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v")
+  [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,<v_Yw>")
 	(sat_plusminus:VI12_AVX2_AVX512BW
-	  (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "<comm>0,v")
-	  (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))]
+	  (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "<comm>0,<v_Yw>")
+	  (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,<v_Yw>m")))]
   "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "@
@@ -11827,14 +11828,14 @@
   "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
 
 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
-  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
+  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
 	(truncate:VI2_AVX2
 	  (lshiftrt:<ssedoublemode>
 	    (mult:<ssedoublemode>
 	      (any_extend:<ssedoublemode>
-		(match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
+		(match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>"))
 	      (any_extend:<ssedoublemode>
-		(match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
+		(match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))
 	    (const_int 16))))]
   "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
    && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
@@ -12128,19 +12129,19 @@
   "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
 
 (define_insn "*avx2_pmaddwd"
-  [(set (match_operand:V8SI 0 "register_operand" "=x,v")
+  [(set (match_operand:V8SI 0 "register_operand" "=Yw")
 	(plus:V8SI
 	  (mult:V8SI
 	    (sign_extend:V8SI
 	      (vec_select:V8HI
-		(match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
+		(match_operand:V16HI 1 "nonimmediate_operand" "%Yw")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)
 			   (const_int 8) (const_int 10)
 			   (const_int 12) (const_int 14)])))
 	    (sign_extend:V8SI
 	      (vec_select:V8HI
-		(match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
+		(match_operand:V16HI 2 "nonimmediate_operand" "Ywm")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)
 			   (const_int 8) (const_int 10)
@@ -12161,8 +12162,7 @@
   "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseiadd")
-   (set_attr "isa" "*,avx512bw")
-   (set_attr "prefix" "vex,evex")
+   (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
 (define_expand "sse2_pmaddwd"
@@ -12192,17 +12192,17 @@
   "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
 
 (define_insn "*sse2_pmaddwd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
+  [(set (match_operand:V4SI 0 "register_operand" "=x,Yw")
 	(plus:V4SI
 	  (mult:V4SI
 	    (sign_extend:V4SI
 	      (vec_select:V4HI
-		(match_operand:V8HI 1 "vector_operand" "%0,x,v")
+		(match_operand:V8HI 1 "vector_operand" "%0,Yw")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)])))
 	    (sign_extend:V4SI
 	      (vec_select:V4HI
-		(match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
+		(match_operand:V8HI 2 "vector_operand" "xBm,Ywm")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)]))))
 	  (mult:V4SI
@@ -12217,13 +12217,12 @@
   "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
    pmaddwd\t{%2, %0|%0, %2}
-   vpmaddwd\t{%2, %1, %0|%0, %1, %2}
    vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx,avx512bw")
+  [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseiadd")
    (set_attr "atom_unit" "simul")
-   (set_attr "prefix_data16" "1,*,*")
-   (set_attr "prefix" "orig,vex,evex")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
 (define_insn "avx512dq_mul<mode>3<mask_name>"
@@ -12449,10 +12448,10 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "ashr<mode>3"
-  [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
+  [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,<v_Yw>")
 	(ashiftrt:VI24_AVX2
-	  (match_operand:VI24_AVX2 1 "register_operand" "0,x")
-	  (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
+	  (match_operand:VI24_AVX2 1 "register_operand" "0,<v_Yw>")
+	  (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
   "TARGET_SSE2"
   "@
    psra<ssemodesuffix>\t{%2, %0|%0, %2}
@@ -12496,10 +12495,10 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "<insn><mode>3"
-  [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
+  [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,<v_Yw>")
 	(any_lshift:VI248_AVX2
-	  (match_operand:VI248_AVX2 1 "register_operand" "0,x")
-	  (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
+	  (match_operand:VI248_AVX2 1 "register_operand" "0,<v_Yw>")
+	  (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
   "TARGET_SSE2"
   "@
    p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -12571,9 +12570,9 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "<sse2_avx2>_<insn><mode>3"
-  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
+  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,Yw")
 	(any_lshift:VIMAX_AVX2
-	 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
+	 (match_operand:VIMAX_AVX2 1 "register_operand" "0,Yw")
 	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
   "TARGET_SSE2"
 {
@@ -12771,20 +12770,19 @@
    (set_attr "mode" "TI")])
 
 (define_insn "*<code>v8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
+  [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
 	(smaxmin:V8HI
-	  (match_operand:V8HI 1 "vector_operand" "%0,x,v")
-	  (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
+	  (match_operand:V8HI 1 "vector_operand" "%0,Yw")
+	  (match_operand:V8HI 2 "vector_operand" "xBm,Ywm")))]
   "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
    p<maxmin_int>w\t{%2, %0|%0, %2}
-   vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
    vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx,avx512bw")
+  [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseiadd")
-   (set_attr "prefix_data16" "1,*,*")
-   (set_attr "prefix_extra" "*,1,1")
-   (set_attr "prefix" "orig,vex,evex")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix_extra" "*,1")
+   (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
 (define_expand "<code><mode>3"
@@ -12856,20 +12854,19 @@
    (set_attr "mode" "TI")])
 
 (define_insn "*<code>v16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
+  [(set (match_operand:V16QI 0 "register_operand" "=x,Yw")
 	(umaxmin:V16QI
-	  (match_operand:V16QI 1 "vector_operand" "%0,x,v")
-	  (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
+	  (match_operand:V16QI 1 "vector_operand" "%0,Yw")
+	  (match_operand:V16QI 2 "vector_operand" "xBm,Ywm")))]
   "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
    p<maxmin_int>b\t{%2, %0|%0, %2}
-   vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
    vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx,avx512bw")
+  [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseiadd")
-   (set_attr "prefix_data16" "1,*,*")
-   (set_attr "prefix_extra" "*,1,1")
-   (set_attr "prefix" "orig,vex,evex")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix_extra" "*,1")
+   (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -13888,57 +13885,54 @@
 })
 
 (define_insn "<sse2_avx2>_packsswb<mask_name>"
-  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
+  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
 	(vec_concat:VI1_AVX512
 	  (ss_truncate:<ssehalfvecmode>
-	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
+	    (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
 	  (ss_truncate:<ssehalfvecmode>
-	    (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
+	    (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
   "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
   "@
    packsswb\t{%2, %0|%0, %2}
-   vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
    vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
-  [(set_attr "isa" "noavx,avx,avx512bw")
+  [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sselog")
-   (set_attr "prefix_data16" "1,*,*")
-   (set_attr "prefix" "orig,<mask_prefix>,evex")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,<mask_prefix>")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "<sse2_avx2>_packssdw<mask_name>"
-  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
+  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
 	(vec_concat:VI2_AVX2
 	  (ss_truncate:<ssehalfvecmode>
-	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
+	    (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
 	  (ss_truncate:<ssehalfvecmode>
-	    (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
+	    (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
   "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
   "@
    packssdw\t{%2, %0|%0, %2}
-   vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
    vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
-  [(set_attr "isa" "noavx,avx,avx512bw")
+  [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sselog")
-   (set_attr "prefix_data16" "1,*,*")
-   (set_attr "prefix" "orig,<mask_prefix>,evex")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,<mask_prefix>")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "<sse2_avx2>_packuswb<mask_name>"
-  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
+  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
 	(vec_concat:VI1_AVX512
 	  (us_truncate:<ssehalfvecmode>
-	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
+	    (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
 	  (us_truncate:<ssehalfvecmode>
-	    (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
+	    (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
   "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
   "@
    packuswb\t{%2, %0|%0, %2}
-   vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
    vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
-  [(set_attr "isa" "noavx,avx,avx512bw")
+  [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sselog")
-   (set_attr "prefix_data16" "1,*,*")
-   (set_attr "prefix" "orig,<mask_prefix>,evex")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix" "orig,<mask_prefix>")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
@@ -13986,11 +13980,11 @@
    (set_attr "mode" "XI")])
 
 (define_insn "avx2_interleave_highv32qi<mask_name>"
-  [(set (match_operand:V32QI 0 "register_operand" "=v")
+  [(set (match_operand:V32QI 0 "register_operand" "=Yw")
 	(vec_select:V32QI
 	  (vec_concat:V64QI
-	    (match_operand:V32QI 1 "register_operand" "v")
-	    (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
+	    (match_operand:V32QI 1 "register_operand" "Yw")
+	    (match_operand:V32QI 2 "nonimmediate_operand" "Ywm"))
 	  (parallel [(const_int 8)  (const_int 40)
 		     (const_int 9)  (const_int 41)
 		     (const_int 10) (const_int 42)
@@ -14007,18 +14001,18 @@
 		     (const_int 29) (const_int 61)
 		     (const_int 30) (const_int 62)
 		     (const_int 31) (const_int 63)])))]
-  "TARGET_AVX2 && <mask_avx512vl_condition>"
+  "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
   "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "<mask_prefix>")
    (set_attr "mode" "OI")])
 
 (define_insn "vec_interleave_highv16qi<mask_name>"
-  [(set (match_operand:V16QI 0 "register_operand" "=x,v")
+  [(set (match_operand:V16QI 0 "register_operand" "=x,Yw")
 	(vec_select:V16QI
 	  (vec_concat:V32QI
-	    (match_operand:V16QI 1 "register_operand" "0,v")
-	    (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
+	    (match_operand:V16QI 1 "register_operand" "0,Yw")
+	    (match_operand:V16QI 2 "vector_operand" "xBm,Ywm"))
 	  (parallel [(const_int 8)  (const_int 24)
 		     (const_int 9)  (const_int 25)
 		     (const_int 10) (const_int 26)
@@ -14027,7 +14021,7 @@
 		     (const_int 13) (const_int 29)
 		     (const_int 14) (const_int 30)
 		     (const_int 15) (const_int 31)])))]
-  "TARGET_SSE2 && <mask_avx512vl_condition>"
+  "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
   "@
    punpckhbw\t{%2, %0|%0, %2}
    vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
@@ -14082,11 +14076,11 @@
    (set_attr "mode" "XI")])
 
 (define_insn "avx2_interleave_lowv32qi<mask_name>"
-  [(set (match_operand:V32QI 0 "register_operand" "=v")
+  [(set (match_operand:V32QI 0 "register_operand" "=Yw")
 	(vec_select:V32QI
 	  (vec_concat:V64QI
-	    (match_operand:V32QI 1 "register_operand" "v")
-	    (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
+	    (match_operand:V32QI 1 "register_operand" "Yw")
+	    (match_operand:V32QI 2 "nonimmediate_operand" "Ywm"))
 	  (parallel [(const_int 0) (const_int 32)
 		     (const_int 1) (const_int 33)
 		     (const_int 2) (const_int 34)
@@ -14110,11 +14104,11 @@
    (set_attr "mode" "OI")])
 
 (define_insn "vec_interleave_lowv16qi<mask_name>"
-  [(set (match_operand:V16QI 0 "register_operand" "=x,v")
+  [(set (match_operand:V16QI 0 "register_operand" "=x,Yw")
 	(vec_select:V16QI
 	  (vec_concat:V32QI
-	    (match_operand:V16QI 1 "register_operand" "0,v")
-	    (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
+	    (match_operand:V16QI 1 "register_operand" "0,Yw")
+	    (match_operand:V16QI 2 "vector_operand" "xBm,Ywm"))
 	  (parallel [(const_int 0) (const_int 16)
 		     (const_int 1) (const_int 17)
 		     (const_int 2) (const_int 18)
@@ -14162,11 +14156,11 @@
    (set_attr "mode" "XI")])
 
 (define_insn "avx2_interleave_highv16hi<mask_name>"
-  [(set (match_operand:V16HI 0 "register_operand" "=v")
+  [(set (match_operand:V16HI 0 "register_operand" "=Yw")
 	(vec_select:V16HI
 	  (vec_concat:V32HI
-	    (match_operand:V16HI 1 "register_operand" "v")
-	    (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
+	    (match_operand:V16HI 1 "register_operand" "Yw")
+	    (match_operand:V16HI 2 "nonimmediate_operand" "Ywm"))
 	  (parallel [(const_int 4) (const_int 20)
 		     (const_int 5) (const_int 21)
 		     (const_int 6) (const_int 22)
@@ -14182,11 +14176,11 @@
    (set_attr "mode" "OI")])
 
 (define_insn "vec_interleave_highv8hi<mask_name>"
-  [(set (match_operand:V8HI 0 "register_operand" "=x,v")
+  [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
 	(vec_select:V8HI
 	  (vec_concat:V16HI
-	    (match_operand:V8HI 1 "register_operand" "0,v")
-	    (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
+	    (match_operand:V8HI 1 "register_operand" "0,Yw")
+	    (match_operand:V8HI 2 "vector_operand" "xBm,Ywm"))
 	  (parallel [(const_int 4) (const_int 12)
 		     (const_int 5) (const_int 13)
 		     (const_int 6) (const_int 14)
@@ -14230,11 +14224,11 @@
    (set_attr "mode" "XI")])
 
 (define_insn "avx2_interleave_lowv16hi<mask_name>"
-  [(set (match_operand:V16HI 0 "register_operand" "=v")
+  [(set (match_operand:V16HI 0 "register_operand" "=Yw")
 	(vec_select:V16HI
 	  (vec_concat:V32HI
-	    (match_operand:V16HI 1 "register_operand" "v")
-	    (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
+	    (match_operand:V16HI 1 "register_operand" "Yw")
+	    (match_operand:V16HI 2 "nonimmediate_operand" "Ywm"))
 	  (parallel [(const_int 0) (const_int 16)
 		     (const_int 1) (const_int 17)
 		     (const_int 2) (const_int 18)
@@ -14250,11 +14244,11 @@
    (set_attr "mode" "OI")])
 
 (define_insn "vec_interleave_lowv8hi<mask_name>"
-  [(set (match_operand:V8HI 0 "register_operand" "=x,v")
+  [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
 	(vec_select:V8HI
 	  (vec_concat:V16HI
-	    (match_operand:V8HI 1 "register_operand" "0,v")
-	    (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
+	    (match_operand:V8HI 1 "register_operand" "0,Yw")
+	    (match_operand:V8HI 2 "vector_operand" "xBm,Ywm"))
 	  (parallel [(const_int 0) (const_int 8)
 		     (const_int 1) (const_int 9)
 		     (const_int 2) (const_int 10)
@@ -15190,9 +15184,9 @@
 })
 
 (define_insn "avx2_pshuflw_1<mask_name>"
-  [(set (match_operand:V16HI 0 "register_operand" "=v")
+  [(set (match_operand:V16HI 0 "register_operand" "=Yw")
 	(vec_select:V16HI
-	  (match_operand:V16HI 1 "nonimmediate_operand" "vm")
+	  (match_operand:V16HI 1 "nonimmediate_operand" "Ywm")
 	  (parallel [(match_operand 2 "const_0_to_3_operand")
 		     (match_operand 3 "const_0_to_3_operand")
 		     (match_operand 4 "const_0_to_3_operand")
@@ -15264,9 +15258,9 @@
 })
 
 (define_insn "sse2_pshuflw_1<mask_name>"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
+  [(set (match_operand:V8HI 0 "register_operand" "=Yw")
 	(vec_select:V8HI
-	  (match_operand:V8HI 1 "vector_operand" "vBm")
+	  (match_operand:V8HI 1 "vector_operand" "YwBm")
 	  (parallel [(match_operand 2 "const_0_to_3_operand")
 		     (match_operand 3 "const_0_to_3_operand")
 		     (match_operand 4 "const_0_to_3_operand")
@@ -15347,9 +15341,9 @@
 })
 
 (define_insn "avx2_pshufhw_1<mask_name>"
-  [(set (match_operand:V16HI 0 "register_operand" "=v")
+  [(set (match_operand:V16HI 0 "register_operand" "=Yw")
 	(vec_select:V16HI
-	  (match_operand:V16HI 1 "nonimmediate_operand" "vm")
+	  (match_operand:V16HI 1 "nonimmediate_operand" "Ywm")
 	  (parallel [(const_int 0)
 		     (const_int 1)
 		     (const_int 2)
@@ -15421,9 +15415,9 @@
 })
 
 (define_insn "sse2_pshufhw_1<mask_name>"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
+  [(set (match_operand:V8HI 0 "register_operand" "=Yw")
 	(vec_select:V8HI
-	  (match_operand:V8HI 1 "vector_operand" "vBm")
+	  (match_operand:V8HI 1 "vector_operand" "YwBm")
 	  (parallel [(const_int 0)
 		     (const_int 1)
 		     (const_int 2)
@@ -15489,18 +15483,16 @@
   [(V16QI "TARGET_SSE4_1") V8HI])
 
 (define_insn "*vec_extract<mode>"
-  [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m")
+  [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m")
 	(vec_select:<ssescalarmode>
-	  (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v")
+	  (match_operand:PEXTR_MODE12 1 "register_operand" "YW,YW")
 	  (parallel
 	    [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
   "TARGET_SSE2"
   "@
    %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
-   %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
-   vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
-   vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "*,sse4,avx512bw,avx512bw")
+   %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,sse4")
    (set_attr "type" "sselog1")
    (set_attr "prefix_data16" "1")
    (set (attr "prefix_extra")
@@ -15510,23 +15502,20 @@
        (const_string "*")
        (const_string "1")))
    (set_attr "length_immediate" "1")
-   (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex")
+   (set_attr "prefix" "maybe_vex,maybe_vex")
    (set_attr "mode" "TI")])
 
 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
-  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(zero_extend:SWI48
 	  (vec_select:<PEXTR_MODE12:ssescalarmode>
-	    (match_operand:PEXTR_MODE12 1 "register_operand" "x,v")
+	    (match_operand:PEXTR_MODE12 1 "register_operand" "YW")
 	    (parallel
 	      [(match_operand:SI 2
 		"const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
   "TARGET_SSE2"
-  "@
-   %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
-   vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
-  [(set_attr "isa" "*,avx512bw")
-   (set_attr "type" "sselog1")
+  "%vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "sselog1")
    (set_attr "prefix_data16" "1")
    (set (attr "prefix_extra")
      (if_then_else
@@ -15538,18 +15527,15 @@
    (set_attr "mode" "TI")])
 
 (define_insn "*vec_extractv16qi_zext"
-  [(set (match_operand:HI 0 "register_operand" "=r,r")
+  [(set (match_operand:HI 0 "register_operand" "=r")
 	(zero_extend:HI
 	  (vec_select:QI
-	    (match_operand:V16QI 1 "register_operand" "x,v")
+	    (match_operand:V16QI 1 "register_operand" "YW")
 	    (parallel
 	      [(match_operand:SI 2 "const_0_to_15_operand")]))))]
   "TARGET_SSE4_1"
-  "@
-   %vpextrb\t{%2, %1, %k0|%k0, %1, %2}
-   vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
-  [(set_attr "isa" "*,avx512bw")
-   (set_attr "type" "sselog1")
+  "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "sselog1")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
@@ -15656,9 +15642,9 @@
   "operands[1] = gen_lowpart (SImode, operands[1]);")
 
 (define_insn "*vec_extractv4si"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv")
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,Yw")
 	(vec_select:SI
-	  (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v")
+	  (match_operand:V4SI 1 "register_operand" "  x, v, 0, 0,Yw")
 	  (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
   "TARGET_SSE4_1"
 {
@@ -15674,7 +15660,6 @@
       return "psrldq\t{%2, %0|%0, %2}";
 
     case 4:
-    case 5:
       operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
       return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
 
@@ -15682,14 +15667,14 @@
       gcc_unreachable ();
     }
 }
-  [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw")
-   (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1")
+  [(set_attr "isa" "*,avx512dq,noavx,noavx,avx")
+   (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1")
    (set (attr "prefix_extra")
      (if_then_else (eq_attr "alternative" "0,1")
 		   (const_string "1")
 		   (const_string "*")))
    (set_attr "length_immediate" "1")
-   (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex")
+   (set_attr "prefix" "maybe_vex,evex,orig,orig,maybe_vex")
    (set_attr "mode" "TI")])
 
 (define_insn "*vec_extractv4si_zext"
@@ -16213,15 +16198,15 @@
 })
 
 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
-  [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v")
+  [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,<v_Yw>")
 	(truncate:VI12_AVX2_AVX512BW
 	  (lshiftrt:<ssedoublemode>
 	    (plus:<ssedoublemode>
 	      (plus:<ssedoublemode>
 		(zero_extend:<ssedoublemode>
-		  (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "%0,v"))
+		  (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "%0,<v_Yw>"))
 		(zero_extend:<ssedoublemode>
-		  (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))
+		  (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,<v_Yw>m")))
 	      (match_operand:<ssedoublemode> <mask_expand_op3> "const1_operand"))
 	    (const_int 1))))]
   "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
@@ -16238,10 +16223,10 @@
 ;; The correct representation for this is absolutely enormous, and
 ;; surely not generally useful.
 (define_insn "<sse2_avx2>_psadbw"
-  [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
+  [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,YW")
 	(unspec:VI8_AVX2_AVX512BW
-	  [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
-	   (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
+	  [(match_operand:<ssebytemode> 1 "register_operand" "0,YW")
+	   (match_operand:<ssebytemode> 2 "vector_operand" "xBm,YWm")]
 	  UNSPEC_PSADBW))]
   "TARGET_SSE2"
   "@
@@ -16815,12 +16800,12 @@
    (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "avx2_pmaddubsw256"
-  [(set (match_operand:V16HI 0 "register_operand" "=x,v")
+  [(set (match_operand:V16HI 0 "register_operand" "=Yw")
 	(ss_plus:V16HI
 	  (mult:V16HI
 	    (zero_extend:V16HI
 	      (vec_select:V16QI
-		(match_operand:V32QI 1 "register_operand" "x,v")
+		(match_operand:V32QI 1 "register_operand" "Yw")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)
 			   (const_int 8) (const_int 10)
@@ -16831,7 +16816,7 @@
 			   (const_int 28) (const_int 30)])))
 	    (sign_extend:V16HI
 	      (vec_select:V16QI
-		(match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
+		(match_operand:V32QI 2 "nonimmediate_operand" "Ywm")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)
 			   (const_int 8) (const_int 10)
@@ -16863,10 +16848,9 @@
 			   (const_int 29) (const_int 31)]))))))]
   "TARGET_AVX2"
   "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "*,avx512bw")
-   (set_attr "type" "sseiadd")
+  [(set_attr "type" "sseiadd")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "vex,evex")
+   (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
 ;; The correct representation for this is absolutely enormous, and
@@ -16919,19 +16903,19 @@
    (set_attr "mode" "XI")])
 
 (define_insn "ssse3_pmaddubsw128"
-  [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
+  [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
 	(ss_plus:V8HI
 	  (mult:V8HI
 	    (zero_extend:V8HI
 	      (vec_select:V8QI
-		(match_operand:V16QI 1 "register_operand" "0,x,v")
+		(match_operand:V16QI 1 "register_operand" "0,Yw")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)
 			   (const_int 8) (const_int 10)
 			   (const_int 12) (const_int 14)])))
 	    (sign_extend:V8HI
 	      (vec_select:V8QI
-		(match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
+		(match_operand:V16QI 2 "vector_operand" "xBm,Ywm")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)
 			   (const_int 8) (const_int 10)
@@ -16952,14 +16936,13 @@
   "TARGET_SSSE3"
   "@
    pmaddubsw\t{%2, %0|%0, %2}
-   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
    vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx,avx512bw")
+  [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseiadd")
    (set_attr "atom_unit" "simul")
-   (set_attr "prefix_data16" "1,*,*")
+   (set_attr "prefix_data16" "1,*")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "orig,vex,evex")
+   (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
 (define_insn "ssse3_pmaddubsw"
@@ -17065,16 +17048,16 @@
 })
 
 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
-  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
+  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
 	(truncate:VI2_AVX2
 	  (lshiftrt:<ssedoublemode>
 	    (plus:<ssedoublemode>
 	      (lshiftrt:<ssedoublemode>
 		(mult:<ssedoublemode>
 		  (sign_extend:<ssedoublemode>
-		    (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
+		    (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>"))
 		  (sign_extend:<ssedoublemode>
-		    (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
+		    (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))
 		(const_int 14))
 	      (match_operand:VI2_AVX2 3 "const1_operand"))
 	    (const_int 1))))]
@@ -17082,13 +17065,12 @@
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
    pmulhrsw\t{%2, %0|%0, %2}
-   vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
    vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
-  [(set_attr "isa" "noavx,avx,avx512bw")
+  [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseimul")
-   (set_attr "prefix_data16" "1,*,*")
+   (set_attr "prefix_data16" "1,*")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "orig,maybe_evex,evex")
+   (set_attr "prefix" "orig,maybe_evex")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_expand "smulhrsv4hi3"
@@ -17160,21 +17142,20 @@
    (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
-  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
+  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
 	(unspec:VI1_AVX512
-	  [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
-	   (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
+	  [(match_operand:VI1_AVX512 1 "register_operand" "0,<v_Yw>")
+	   (match_operand:VI1_AVX512 2 "vector_operand" "xBm,<v_Yw>m")]
 	  UNSPEC_PSHUFB))]
   "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
   "@
    pshufb\t{%2, %0|%0, %2}
-   vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
    vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
-  [(set_attr "isa" "noavx,avx,avx512bw")
+  [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sselog1")
-   (set_attr "prefix_data16" "1,*,*")
+   (set_attr "prefix_data16" "1,*")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "orig,maybe_evex,evex")
+   (set_attr "prefix" "orig,maybe_evex")
    (set_attr "btver2_decode" "vector")
    (set_attr "mode" "<sseinsnmode>")])
 
@@ -17274,11 +17255,11 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "<ssse3_avx2>_palignr<mode>"
-  [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
+  [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,<v_Yw>")
 	(unspec:SSESCALARMODE
-	  [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
-	   (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
-	   (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
+	  [(match_operand:SSESCALARMODE 1 "register_operand" "0,<v_Yw>")
+	   (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,<v_Yw>m")
+	   (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
 	  UNSPEC_PALIGNR))]
   "TARGET_SSSE3"
 {
@@ -17289,19 +17270,18 @@
     case 0:
       return "palignr\t{%3, %2, %0|%0, %2, %3}";
     case 1:
-    case 2:
       return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "isa" "noavx,avx,avx512bw")
+  [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseishft")
    (set_attr "atom_unit" "sishuf")
-   (set_attr "prefix_data16" "1,*,*")
+   (set_attr "prefix_data16" "1,*")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
-   (set_attr "prefix" "orig,vex,evex")
+   (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn_and_split "ssse3_palignrdi"
@@ -17367,9 +17347,9 @@
    (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
 
 (define_insn "*abs<mode>2"
-  [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
+  [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=<v_Yw>")
 	(abs:VI1248_AVX512VL_AVX512BW
-	  (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
+	  (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "<v_Yw>Bm")))]
   "TARGET_SSSE3"
   "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
   [(set_attr "type" "sselog1")
@@ -17731,22 +17711,21 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
-  [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
+  [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,<v_Yw>")
 	(vec_concat:VI2_AVX2
 	  (us_truncate:<ssehalfvecmode>
-	    (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
+	    (match_operand:<sseunpackmode> 1 "register_operand" "0,0,<v_Yw>"))
 	  (us_truncate:<ssehalfvecmode>
-	    (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
+	    (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,<v_Yw>m"))))]
   "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
   "@
    packusdw\t{%2, %0|%0, %2}
    packusdw\t{%2, %0|%0, %2}
-   vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
    vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
-  [(set_attr "isa" "noavx,noavx,avx,avx512bw")
+  [(set_attr "isa" "noavx,noavx,avx")
    (set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
+   (set_attr "prefix" "orig,orig,<mask_prefix>")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "<sse4_1_avx2>_pblendvb"
@@ -17867,9 +17846,9 @@
    (set_attr "mode" "TI")])
 
 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
-  [(set (match_operand:V16HI 0 "register_operand" "=v")
+  [(set (match_operand:V16HI 0 "register_operand" "=Yw")
 	(any_extend:V16HI
-	  (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
+	  (match_operand:V16QI 1 "nonimmediate_operand" "Ywm")))]
   "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
   "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
@@ -17935,10 +17914,10 @@
   "TARGET_AVX512BW")
 
 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
-  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
+  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw")
 	(any_extend:V8HI
 	  (vec_select:V8QI
-	    (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
+	    (match_operand:V16QI 1 "register_operand" "Yr,*x,Yw")
 	    (parallel [(const_int 0) (const_int 1)
 		       (const_int 2) (const_int 3)
 		       (const_int 4) (const_int 5)
@@ -17952,7 +17931,7 @@
    (set_attr "mode" "TI")])
 
 (define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>_1"
-  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
+  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw")
 	(any_extend:V8HI
 	  (match_operand:V8QI 1 "memory_operand" "m,m,m")))]
   "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
@@ -21611,11 +21590,11 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "*ssse3_palignr<mode>_perm"
-  [(set (match_operand:V_128 0 "register_operand" "=x,x,v")
+  [(set (match_operand:V_128 0 "register_operand" "=x,Yw")
       (vec_select:V_128
-	(match_operand:V_128 1 "register_operand" "0,x,v")
+	(match_operand:V_128 1 "register_operand" "0,Yw")
 	(match_parallel 2 "palignr_operand"
-	  [(match_operand 3 "const_int_operand" "n,n,n")])))]
+	  [(match_operand 3 "const_int_operand" "n,n")])))]
   "TARGET_SSSE3"
 {
   operands[2] = (GEN_INT (INTVAL (operands[3])
@@ -21626,19 +21605,18 @@
     case 0:
       return "palignr\t{%2, %1, %0|%0, %1, %2}";
     case 1:
-    case 2:
       return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "isa" "noavx,avx,avx512bw")
+  [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseishft")
    (set_attr "atom_unit" "sishuf")
-   (set_attr "prefix_data16" "1,*,*")
+   (set_attr "prefix_data16" "1,*")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
-   (set_attr "prefix" "orig,vex,evex")])
+   (set_attr "prefix" "orig,maybe_evex")])
 
 (define_expand "avx512vl_vinsert<mode>"
   [(match_operand:VI48F_256 0 "register_operand")
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index cc27c79..e655e66 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1688,6 +1688,140 @@ struct processor_costs znver2_cost = {
   "16",					/* Func alignment.  */
 };
 
+struct processor_costs znver3_cost = {
+  {
+  /* Start of register allocator costs.  integer->integer move cost is 2. */
+
+  /* reg-reg moves are done by renaming and thus they are even cheaper than
+     1 cycle.  Because reg-reg move cost is 2 and following tables correspond
+     to doubles of latencies, we do not model this correctly.  It does not
+     seem to make practical difference to bump prices up even more.  */
+  6,					/* cost for loading QImode using
+					   movzbl.  */
+  {6, 6, 6},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {8, 8, 8},				/* cost of storing integer
+					   registers.  */
+  2,					/* cost of reg,reg fld/fst.  */
+  {6, 6, 16},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode.  */
+  {8, 8, 16},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode.  */
+  2,					/* cost of moving MMX register.  */
+  {6, 6},				/* cost of loading MMX registers
+					   in SImode and DImode.  */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode.  */
+  2, 2, 3,				/* cost of moving XMM,YMM,ZMM
+					   register.  */
+  {6, 6, 6, 6, 12},			/* cost of loading SSE registers
+					   in 32,64,128,256 and 512-bit.  */
+  {8, 8, 8, 8, 16},			/* cost of storing SSE registers
+					   in 32,64,128,256 and 512-bit.  */
+  6, 6,					/* SSE->integer and integer->SSE
+					   moves.  */
+  8, 8,				/* mask->integer and integer->mask moves */
+  {6, 6, 6},				/* cost of loading mask register
+					   in QImode, HImode, SImode.  */
+  {8, 8, 8},				/* cost if storing mask register
+					   in QImode, HImode, SImode.  */
+  2,					/* cost of moving mask register.  */
+  /* End of register allocator costs.  */
+  },
+
+  COSTS_N_INSNS (1),			/* cost of an add instruction.  */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction.  */
+  COSTS_N_INSNS (1),			/* variable shift costs.  */
+  COSTS_N_INSNS (1),			/* constant shift costs.  */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI.  */
+   COSTS_N_INSNS (3),			/* 				 HI.  */
+   COSTS_N_INSNS (3),			/*				 SI.  */
+   COSTS_N_INSNS (3),			/*				 DI.  */
+   COSTS_N_INSNS (3)},			/*			other.  */
+  0,					/* cost of multiply per each bit
+					   set.  */
+   /* Depending on parameters, idiv can get faster on ryzen.  This is upper
+      bound.  */
+  {COSTS_N_INSNS (16),			/* cost of a divide/mod for QI.  */
+   COSTS_N_INSNS (22),			/* 			    HI.  */
+   COSTS_N_INSNS (30),			/*			    SI.  */
+   COSTS_N_INSNS (45),			/*			    DI.  */
+   COSTS_N_INSNS (45)},			/*			    other.  */
+  COSTS_N_INSNS (1),			/* cost of movsx.  */
+  COSTS_N_INSNS (1),			/* cost of movzx.  */
+  8,					/* "large" insn.  */
+  9,					/* MOVE_RATIO.  */
+  6,					/* CLEAR_RATIO */
+  {6, 6, 6},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {8, 8, 8},				/* cost of storing integer
+					   registers.  */
+  {6, 6, 6, 6, 12},			/* cost of loading SSE registers
+					   in 32bit, 64bit, 128bit, 256bit and 512bit */
+  {8, 8, 8, 8, 16},			/* cost of storing SSE register
+					   in 32bit, 64bit, 128bit, 256bit and 512bit */
+  {6, 6, 6, 6, 12},			/* cost of unaligned loads.  */
+  {8, 8, 8, 8, 16},			/* cost of unaligned stores.  */
+  2, 2, 3,				/* cost of moving XMM,YMM,ZMM
+					   register.  */
+  6,					/* cost of moving SSE register to integer.  */
+  /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
+     throughput 12.  Approx 9 uops do not depend on vector size and every load
+     is 7 uops.  */
+  18, 8,				/* Gather load static, per_elt.  */
+  18, 10,				/* Gather store static, per_elt.  */
+  32,					/* size of l1 cache.  */
+  512,					/* size of l2 cache.  */
+  64,					/* size of prefetch block.  */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,					/* number of parallel prefetches.  */
+  3,					/* Branch cost.  */
+  COSTS_N_INSNS (5),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
+  /* Latency of fdiv is 8-15.  */
+  COSTS_N_INSNS (15),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
+  /* Latency of fsqrt is 4-10.  */
+  COSTS_N_INSNS (10),			/* cost of FSQRT instruction.  */
+
+  COSTS_N_INSNS (1),			/* cost of cheap SSE instruction.  */
+  COSTS_N_INSNS (3),			/* cost of ADDSS/SD SUBSS/SD insns.  */
+  COSTS_N_INSNS (3),			/* cost of MULSS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of MULSD instruction.  */
+  COSTS_N_INSNS (5),			/* cost of FMA SS instruction.  */
+  COSTS_N_INSNS (5),			/* cost of FMA SD instruction.  */
+  COSTS_N_INSNS (10),			/* cost of DIVSS instruction.  */
+  /* 9-13.  */
+  COSTS_N_INSNS (13),			/* cost of DIVSD instruction.  */
+  COSTS_N_INSNS (10),			/* cost of SQRTSS instruction.  */
+  COSTS_N_INSNS (15),			/* cost of SQRTSD instruction.  */
+  /* Zen can execute 4 integer operations per cycle.  FP operations
+     take 3 cycles and it can execute 2 integer additions and 2
+     multiplications thus reassociation may make sense up to with of 6.
+     SPEC2k6 bencharks suggests
+     that 4 works better than 6 probably due to register pressure.
+
+     Integer vector operations are taken by FP unit and execute 3 vector
+     plus/minus operations per cycle but only one multiply.  This is adjusted
+     in ix86_reassociation_width.  */
+  4, 4, 3, 6,				/* reassoc int, fp, vec_int, vec_fp.  */
+  znver2_memcpy,
+  znver2_memset,
+  COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (2),			/* cond_not_taken_branch_cost.  */
+  "16",					/* Loop alignment.  */
+  "16",					/* Jump alignment.  */
+  "0:0:8",				/* Label alignment.  */
+  "16",					/* Func alignment.  */
+};
+
 /* skylake_cost should produce code tuned for Skylake familly of CPUs.  */
 static stringop_algs skylake_memcpy[2] =   {
   {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 69f3c70..859af75 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1208,10 +1208,11 @@
 (define_predicate "branch_comparison_operator"
    (and (match_operand 0 "comparison_operator")
 	(match_test "GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC")
-	(if_then_else (match_test "GET_MODE (XEXP (op, 0)) == CCFPmode
-				   && !flag_finite_math_only")
-		      (match_code "lt,gt,eq,unordered,unge,unle,ne,ordered")
-		      (match_code "lt,ltu,le,leu,gt,gtu,ge,geu,eq,ne"))
+	(if_then_else (match_test "GET_MODE (XEXP (op, 0)) == CCFPmode")
+	  (if_then_else (match_test "flag_finite_math_only")
+	    (match_code "lt,le,gt,ge,eq,ne,unordered,ordered")
+	    (match_code "lt,gt,eq,unordered,unge,unle,ne,ordered"))
+	  (match_code "lt,ltu,le,leu,gt,gtu,ge,geu,eq,ne"))
 	(match_test "validate_condition_mode (GET_CODE (op),
 					      GET_MODE (XEXP (op, 0))),
 		     1")))
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 151136b..f7b1c03 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -16714,13 +16714,21 @@ s390_shift_truncation_mask (machine_mode mode)
 static bool
 f_constraint_p (const char *constraint)
 {
+  bool seen_f_p = false;
+  bool seen_v_p = false;
+
   for (size_t i = 0, c_len = strlen (constraint); i < c_len;
        i += CONSTRAINT_LEN (constraint[i], constraint + i))
     {
       if (constraint[i] == 'f')
-	return true;
+	seen_f_p = true;
+      if (constraint[i] == 'v')
+	seen_v_p = true;
     }
-  return false;
+
+  /* Treat "fv" constraints as "v", because LRA will choose the widest register
+   * class.  */
+  return seen_f_p && !seen_v_p;
 }
 
 /* Implement TARGET_MD_ASM_ADJUST hook in order to fix up "f"
diff --git a/gcc/config/sparc/constraints.md b/gcc/config/sparc/constraints.md
index 82bbba9..7ddf014 100644
--- a/gcc/config/sparc/constraints.md
+++ b/gcc/config/sparc/constraints.md
@@ -19,7 +19,7 @@
 
 ;;; Unused letters:
 ;;;     B
-;;;    a        jkl        uv xyz
+;;;    a        jkl        uvwxyz
 
 
 ;; Register constraints
@@ -190,14 +190,7 @@
       (match_test "TARGET_ARCH32")
       (match_test "register_ok_for_ldd (op)")))
 
-;; Equivalent to 'T' but in 64-bit mode without alignment requirement
 (define_memory_constraint "W"
- "Memory reference for 'e' constraint floating-point register"
- (and (match_code "mem")
-      (match_test "TARGET_ARCH64")
-      (match_test "memory_ok_for_ldd (op)")))
-
-(define_memory_constraint "w"
   "A memory with only a base register"
   (match_operand 0 "mem_noofs_operand"))
 
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index f150417..42ba415 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -9223,14 +9223,17 @@ register_ok_for_ldd (rtx reg)
 int
 memory_ok_for_ldd (rtx op)
 {
-  /* In 64-bit mode, we assume that the address is word-aligned.  */
-  if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
+  if (!mem_min_alignment (op, 8))
     return 0;
 
-  if (! can_create_pseudo_p ()
+  /* We need to perform the job of a memory constraint.  */
+  if ((reload_in_progress || reload_completed)
       && !strict_memory_address_p (Pmode, XEXP (op, 0)))
     return 0;
 
+  if (lra_in_progress && !memory_address_p (Pmode, XEXP (op, 0)))
+    return 0;
+
   return 1;
 }
 
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 02b7c8d..c5d3696 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -1869,8 +1869,8 @@ visl")
    (set_attr "lra" "*,*,disabled,disabled,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
 
 (define_insn "*movdi_insn_sp64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r, m, r,*e,?*e,?*e,  W,b,b")
-        (match_operand:DI 1 "input_operand"        "rI,N,m,rJ,*e, r, *e,  W,?*e,J,P"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r, m, r,*e,?*e,?*e,  m,b,b")
+        (match_operand:DI 1 "input_operand"        "rI,N,m,rJ,*e, r, *e,  m,?*e,J,P"))]
   "TARGET_ARCH64
    && (register_operand (operands[0], DImode)
        || register_or_zero_or_all_ones_operand (operands[1], DImode))"
@@ -2498,8 +2498,8 @@ visl")
    (set_attr "lra" "*,*,*,*,*,*,*,*,*,*,disabled,disabled,*,*,*,*,*")])
 
 (define_insn "*movdf_insn_sp64"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,*r, e,  e,W, *r,*r,  m,*r")
-	(match_operand:DF 1 "input_operand"         "G,C,e, e,*r,W#F,e,*rG, m,*rG, F"))]
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,*r, e,  e,m, *r,*r,  m,*r")
+	(match_operand:DF 1 "input_operand"         "G,C,e, e,*r,m#F,e,*rG, m,*rG, F"))]
   "TARGET_ARCH64
    && (register_operand (operands[0], DFmode)
        || register_or_zero_or_all_ones_operand (operands[1], DFmode))"
@@ -8467,8 +8467,8 @@ visl")
    (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,*,vis3,vis3")])
 
 (define_insn "*mov<VM64:mode>_insn_sp64"
-  [(set (match_operand:VM64 0 "nonimmediate_operand" "=e,e,e,e,W,m,*r, m,*r, e,*r")
-	(match_operand:VM64 1 "input_operand"         "Y,Z,e,W,e,Y, m,*r, e,*r,*r"))]
+  [(set (match_operand:VM64 0 "nonimmediate_operand" "=e,e,e,e,m,m,*r, m,*r, e,*r")
+	(match_operand:VM64 1 "input_operand"         "Y,Z,e,m,e,Y, m,*r, e,*r,*r"))]
   "TARGET_VIS
    && TARGET_ARCH64
    && (register_operand (operands[0], <VM64:MODE>mode)
diff --git a/gcc/config/sparc/sync.md b/gcc/config/sparc/sync.md
index c578e95..c0a20ef 100644
--- a/gcc/config/sparc/sync.md
+++ b/gcc/config/sparc/sync.md
@@ -202,7 +202,7 @@
 
 (define_insn "*atomic_compare_and_swap<mode>_1"
   [(set (match_operand:I48MODE 0 "register_operand" "=r")
-	(match_operand:I48MODE 1 "mem_noofs_operand" "+w"))
+	(match_operand:I48MODE 1 "mem_noofs_operand" "+W"))
    (set (match_dup 1)
 	(unspec_volatile:I48MODE
 	  [(match_operand:I48MODE 2 "register_operand" "r")
@@ -214,7 +214,7 @@
 
 (define_insn "atomic_compare_and_swap_leon3_1"
   [(set (match_operand:SI 0 "register_operand" "=r")
-	(match_operand:SI 1 "mem_noofs_operand" "+w"))
+	(match_operand:SI 1 "mem_noofs_operand" "+W"))
    (set (match_dup 1)
 	(unspec_volatile:SI
 	  [(match_operand:SI 2 "register_operand" "r")
@@ -235,7 +235,7 @@
 
 (define_insn "*atomic_compare_and_swapdi_v8plus"
   [(set (match_operand:DI 0 "register_operand" "=h")
-	(match_operand:DI 1 "mem_noofs_operand" "+w"))
+	(match_operand:DI 1 "mem_noofs_operand" "+W"))
    (set (match_dup 1)
 	(unspec_volatile:DI
 	  [(match_operand:DI 2 "register_operand" "h")
author	Ian Lance Taylor <iant@golang.org>	2021-03-17 12:19:01 -0700
committer	Ian Lance Taylor <iant@golang.org>	2021-03-17 12:19:01 -0700
commit	f10c7c4596dda99d2ee872c995ae4aeda65adbdf (patch)
tree	a3451277603bc8fbe2eddce5f4ad63f790129a01 /gcc/config
parent	bc636c218f2b28da06cd1404d5b35d1f8cc43fd1 (diff)
parent	f3e9c98a9f40fc24bb4ecef6aaa94ff799c8d587 (diff)
download	gcc-f10c7c4596dda99d2ee872c995ae4aeda65adbdf.zip gcc-f10c7c4596dda99d2ee872c995ae4aeda65adbdf.tar.gz gcc-f10c7c4596dda99d2ee872c995ae4aeda65adbdf.tar.bz2