aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2021-03-17 12:19:01 -0700
committerIan Lance Taylor <iant@golang.org>2021-03-17 12:19:01 -0700
commitf10c7c4596dda99d2ee872c995ae4aeda65adbdf (patch)
treea3451277603bc8fbe2eddce5f4ad63f790129a01 /gcc/config
parentbc636c218f2b28da06cd1404d5b35d1f8cc43fd1 (diff)
parentf3e9c98a9f40fc24bb4ecef6aaa94ff799c8d587 (diff)
downloadgcc-f10c7c4596dda99d2ee872c995ae4aeda65adbdf.zip
gcc-f10c7c4596dda99d2ee872c995ae4aeda65adbdf.tar.gz
gcc-f10c7c4596dda99d2ee872c995ae4aeda65adbdf.tar.bz2
Merge from trunk revision f3e9c98a9f40fc24bb4ecef6aaa94ff799c8d587.
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-builtins.c2
-rw-r--r--gcc/config/aarch64/aarch64.c14
-rw-r--r--gcc/config/i386/constraints.md6
-rw-r--r--gcc/config/i386/i386-expand.c11
-rw-r--r--gcc/config/i386/i386-features.c31
-rw-r--r--gcc/config/i386/i386-options.c2
-rw-r--r--gcc/config/i386/i386.c32
-rw-r--r--gcc/config/i386/i386.h4
-rw-r--r--gcc/config/i386/i386.md12
-rw-r--r--gcc/config/i386/mmx.md179
-rw-r--r--gcc/config/i386/sse.md350
-rw-r--r--gcc/config/i386/x86-tune-costs.h134
-rw-r--r--gcc/config/rs6000/predicates.md9
-rw-r--r--gcc/config/s390/s390.c12
-rw-r--r--gcc/config/sparc/constraints.md9
-rw-r--r--gcc/config/sparc/sparc.c9
-rw-r--r--gcc/config/sparc/sparc.md12
-rw-r--r--gcc/config/sparc/sync.md6
18 files changed, 508 insertions, 326 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 25ab866..acdea2a 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1954,7 +1954,7 @@ aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
return target;
rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM);
- rtx cmp_rtx = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx);
+ rtx cmp_rtx = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
emit_insn (gen_aarch64_cstoresi (target, cmp_rtx, cc_reg));
return target;
}
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 8a86889..7838d99 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -23372,7 +23372,7 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
struct cgraph_simd_clone *clonei,
tree base_type, int num)
{
- tree t, ret_type, arg_type;
+ tree t, ret_type;
unsigned int elt_bits, count;
unsigned HOST_WIDE_INT const_simdlen;
poly_uint64 vec_bits;
@@ -23412,11 +23412,17 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
return 0;
}
- for (t = DECL_ARGUMENTS (node->decl); t; t = DECL_CHAIN (t))
+ int i;
+ tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
+ bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
+
+ for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
+ t && t != void_list_node; t = TREE_CHAIN (t), i++)
{
- arg_type = TREE_TYPE (t);
+ tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
- if (!currently_supported_simd_type (arg_type, base_type))
+ if (clonei->args[i].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM
+ && !currently_supported_simd_type (arg_type, base_type))
{
if (TYPE_SIZE (arg_type) != TYPE_SIZE (base_type))
warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index a8db33e..eaa582d 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -111,6 +111,8 @@
;; otherwise any SSE register
;; w any EVEX encodable SSE register for AVX512BW with TARGET_AVX512VL
;; target, otherwise any SSE register.
+;; W any EVEX encodable SSE register for AVX512BW target,
+;; otherwise any SSE register.
(define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
"First SSE register (@code{%xmm0}).")
@@ -151,6 +153,10 @@
"TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
"@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target, otherwise any SSE register.")
+(define_register_constraint "YW"
+ "TARGET_AVX512BW ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
+ "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW target, otherwise any SSE register.")
+
;; We use the B prefix to denote any number of internal operands:
;; f FLAGS_REG
;; g GOT memory operand.
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 02d3142..ac69eed 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -1348,9 +1348,10 @@ ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
if (regno0 != regno2)
emit_insn (gen_rtx_SET (target, parts.index));
- /* Use shift for scaling. */
- ix86_emit_binop (ASHIFT, mode, target,
- GEN_INT (exact_log2 (parts.scale)));
+ /* Use shift for scaling, but emit it as MULT instead
+ to avoid it being immediately peephole2 optimized back
+ into lea. */
+ ix86_emit_binop (MULT, mode, target, GEN_INT (parts.scale));
if (parts.base)
ix86_emit_binop (PLUS, mode, target, parts.base);
@@ -13210,6 +13211,10 @@ rdseed_step:
return 0;
+ case IX86_BUILTIN_VZEROUPPER:
+ cfun->machine->has_explicit_vzeroupper = true;
+ break;
+
default:
break;
}
diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c
index 41891c9..77783a1 100644
--- a/gcc/config/i386/i386-features.c
+++ b/gcc/config/i386/i386-features.c
@@ -1837,19 +1837,22 @@ ix86_add_reg_usage_to_vzerouppers (void)
static unsigned int
rest_of_handle_insert_vzeroupper (void)
{
- int i;
-
- /* vzeroupper instructions are inserted immediately after reload to
- account for possible spills from 256bit or 512bit registers. The pass
- reuses mode switching infrastructure by re-running mode insertion
- pass, so disable entities that have already been processed. */
- for (i = 0; i < MAX_386_ENTITIES; i++)
- ix86_optimize_mode_switching[i] = 0;
+ if (TARGET_VZEROUPPER
+ && flag_expensive_optimizations
+ && !optimize_size)
+ {
+ /* vzeroupper instructions are inserted immediately after reload to
+ account for possible spills from 256bit or 512bit registers. The pass
+ reuses mode switching infrastructure by re-running mode insertion
+ pass, so disable entities that have already been processed. */
+ for (int i = 0; i < MAX_386_ENTITIES; i++)
+ ix86_optimize_mode_switching[i] = 0;
- ix86_optimize_mode_switching[AVX_U128] = 1;
+ ix86_optimize_mode_switching[AVX_U128] = 1;
- /* Call optimize_mode_switching. */
- g->get_passes ()->execute_pass_mode_switching ();
+ /* Call optimize_mode_switching. */
+ g->get_passes ()->execute_pass_mode_switching ();
+ }
ix86_add_reg_usage_to_vzerouppers ();
return 0;
}
@@ -1880,8 +1883,10 @@ public:
virtual bool gate (function *)
{
return TARGET_AVX
- && TARGET_VZEROUPPER && flag_expensive_optimizations
- && !optimize_size;
+ && ((TARGET_VZEROUPPER
+ && flag_expensive_optimizations
+ && !optimize_size)
+ || cfun->machine->has_explicit_vzeroupper);
}
virtual unsigned int execute (function *)
diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
index e93935f..7865bc1 100644
--- a/gcc/config/i386/i386-options.c
+++ b/gcc/config/i386/i386-options.c
@@ -743,7 +743,7 @@ static const struct processor_costs *processor_cost_table[] =
&btver2_cost,
&znver1_cost,
&znver2_cost,
- &znver2_cost
+ &znver3_cost
};
/* Guarantee that the array is aligned with enum processor_type. */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 2603333..540d4f4 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -10265,15 +10265,20 @@ darwin_local_data_pic (rtx disp)
&& XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
}
-/* True if operand X should be loaded from GOT. */
+/* True if the function symbol operand X should be loaded from GOT.
+
+ NB: In 32-bit mode, only non-PIC is allowed in inline assembly
+ statements, since a PIC register could not be available at the
+ call site. */
bool
ix86_force_load_from_GOT_p (rtx x)
{
- return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X)
+ return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X))
&& !TARGET_PECOFF && !TARGET_MACHO
- && !flag_pic
+ && (!flag_pic || this_is_asm_operands)
&& ix86_cmodel != CM_LARGE
+ && ix86_cmodel != CM_LARGE_PIC
&& GET_CODE (x) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (x)
&& (!flag_plt
@@ -12701,7 +12706,8 @@ print_reg (rtx x, int code, FILE *file)
y -- print "st(0)" instead of "st" as a register.
d -- print duplicated register operand for AVX instruction.
D -- print condition for SSE cmp instruction.
- P -- if PIC, print an @PLT suffix.
+ P -- if PIC, print an @PLT suffix. For -fno-plt, load function
+ address from GOT.
p -- print raw symbol name.
X -- don't print any sort of PIC '@' suffix for a symbol.
& -- print some in-use local-dynamic symbol name.
@@ -13445,7 +13451,23 @@ ix86_print_operand (FILE *file, rtx x, int code)
x = const0_rtx;
}
- if (code != 'P' && code != 'p')
+ if (code == 'P')
+ {
+ if (ix86_force_load_from_GOT_p (x))
+ {
+ /* For inline assembly statement, load function address
+ from GOT with 'P' operand modifier to avoid PLT. */
+ x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
+ (TARGET_64BIT
+ ? UNSPEC_GOTPCREL
+ : UNSPEC_GOT));
+ x = gen_rtx_CONST (Pmode, x);
+ x = gen_const_mem (Pmode, x);
+ ix86_print_operand (file, x, 'A');
+ return;
+ }
+ }
+ else if (code != 'p')
{
if (CONST_INT_P (x))
{
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 69fddca..4874910 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2941,6 +2941,10 @@ struct GTY(()) machine_function {
/* True if the function needs a stack frame. */
BOOL_BITFIELD stack_frame_required : 1;
+ /* True if __builtin_ia32_vzeroupper () has been expanded in current
+ function. */
+ BOOL_BITFIELD has_explicit_vzeroupper : 1;
+
/* The largest alignment, in bytes, of stack slot actually used. */
unsigned int max_used_stack_alignment;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 2820f6d..9ff35d9 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -5219,6 +5219,18 @@
DONE;
})
+
+;; ix86_split_lea_for_addr emits the shifts as MULT to avoid it from being
+;; peephole2 optimized back into a lea. Split that into the shift during
+;; the following split pass.
+(define_split
+ [(set (match_operand:SWI48 0 "general_reg_operand")
+ (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
;; Add instructions
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index c6a2882..4c2b724 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -61,6 +61,9 @@
(define_mode_attr mmxdoublemode
[(V8QI "V8HI") (V4HI "V4SI")])
+(define_mode_attr Yv_Yw
+ [(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")])
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Move patterns
@@ -1152,10 +1155,10 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*mmx_<insn><mode>3"
- [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,<Yv_Yw>")
(plusminus:MMXMODEI8
- (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "<comm>0,0,Yv")
- (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "<comm>0,0,<Yv_Yw>")
+ (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,<Yv_Yw>")))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
@@ -1176,10 +1179,10 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*mmx_<insn><mode>3"
- [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yw")
(sat_plusminus:MMXMODE12
- (match_operand:MMXMODE12 1 "register_mmxmem_operand" "<comm>0,0,Yv")
- (match_operand:MMXMODE12 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ (match_operand:MMXMODE12 1 "register_mmxmem_operand" "<comm>0,0,Yw")
+ (match_operand:MMXMODE12 2 "register_mmxmem_operand" "ym,x,Yw")))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
@@ -1206,9 +1209,9 @@
"ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
(define_insn "*mmx_mulv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
- (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
- (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
+ (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& ix86_binary_operator_ok (MULT, V4HImode, operands)"
"@
@@ -1234,14 +1237,14 @@
"ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
(define_insn "*mmx_smulv4hi3_highpart"
- [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
(truncate:V4HI
(lshiftrt:V4SI
(mult:V4SI
(sign_extend:V4SI
- (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw"))
(sign_extend:V4SI
- (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))
(const_int 16))))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& ix86_binary_operator_ok (MULT, V4HImode, operands)"
@@ -1269,14 +1272,14 @@
"ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
(define_insn "*mmx_umulv4hi3_highpart"
- [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
(truncate:V4HI
(lshiftrt:V4SI
(mult:V4SI
(zero_extend:V4SI
- (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw"))
(zero_extend:V4SI
- (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))
(const_int 16))))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)
@@ -1313,16 +1316,16 @@
"ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
(define_insn "*mmx_pmaddwd"
- [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yw")
(plus:V2SI
(mult:V2SI
(sign_extend:V2SI
(vec_select:V2HI
- (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2SI
(vec_select:V2HI
- (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")
(parallel [(const_int 0) (const_int 2)]))))
(mult:V2SI
(sign_extend:V2SI
@@ -1432,10 +1435,10 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, V4HImode, operands);")
(define_insn "*mmx_<code>v4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
(smaxmin:V4HI
- (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
- (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (<CODE>, V4HImode, operands)"
@@ -1466,10 +1469,10 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, V8QImode, operands);")
(define_insn "*mmx_<code>v8qi3"
- [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
(umaxmin:V8QI
- (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv")
- (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yw")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (<CODE>, V8QImode, operands)"
@@ -1483,10 +1486,10 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_ashr<mode>3"
- [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,<Yv_Yw>")
(ashiftrt:MMXMODE24
- (match_operand:MMXMODE24 1 "register_operand" "0,0,Yv")
- (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
+ (match_operand:MMXMODE24 1 "register_operand" "0,0,<Yv_Yw>")
+ (match_operand:DI 2 "nonmemory_operand" "yN,xN,<Yv_Yw>N")))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
psra<mmxvecsize>\t{%2, %0|%0, %2}
@@ -1509,10 +1512,10 @@
"TARGET_MMX_WITH_SSE")
(define_insn "mmx_<insn><mode>3"
- [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,<Yv_Yw>")
(any_lshift:MMXMODE248
- (match_operand:MMXMODE248 1 "register_operand" "0,0,Yv")
- (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
+ (match_operand:MMXMODE248 1 "register_operand" "0,0,<Yv_Yw>")
+ (match_operand:DI 2 "nonmemory_operand" "yN,xN,<Yv_Yw>N")))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
p<vshift><mmxvecsize>\t{%2, %0|%0, %2}
@@ -1549,10 +1552,10 @@
"ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
(define_insn "*mmx_eq<mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x")
(eq:MMXMODEI
- (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv")
- (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,x")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x")))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
"@
@@ -1565,10 +1568,10 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_gt<mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x")
(gt:MMXMODEI
- (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
- (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ (match_operand:MMXMODEI 1 "register_operand" "0,0,x")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x")))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}
@@ -1594,19 +1597,20 @@
"operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));")
(define_insn "mmx_andnot<mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
(and:MMXMODEI
- (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv"))
- (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,x,v"))
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x,v")))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
pandn\t{%2, %0|%0, %2}
pandn\t{%2, %0|%0, %2}
- vpandn\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "*,sse2_noavx,avx")
- (set_attr "mmx_isa" "native,*,*")
- (set_attr "type" "mmxadd,sselog,sselog")
- (set_attr "mode" "DI,TI,TI")])
+ vpandn\t{%2, %1, %0|%0, %1, %2}
+ vpandnd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,sse2_noavx,avx,avx512vl")
+ (set_attr "mmx_isa" "native,*,*,*")
+ (set_attr "type" "mmxadd,sselog,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI,TI")])
(define_expand "mmx_<code><mode>3"
[(set (match_operand:MMXMODEI 0 "register_operand")
@@ -1625,20 +1629,21 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*mmx_<code><mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
(any_logic:MMXMODEI
- (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv")
- (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,x,v")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x,v")))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
p<logic>\t{%2, %0|%0, %2}
p<logic>\t{%2, %0|%0, %2}
- vp<logic>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "*,sse2_noavx,avx")
- (set_attr "mmx_isa" "native,*,*")
- (set_attr "type" "mmxadd,sselog,sselog")
- (set_attr "mode" "DI,TI,TI")])
+ vp<logic>\t{%2, %1, %0|%0, %1, %2}
+ vp<logic>d\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,sse2_noavx,avx,avx512vl")
+ (set_attr "mmx_isa" "native,*,*,*")
+ (set_attr "type" "mmxadd,sselog,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI,TI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
@@ -1652,12 +1657,12 @@
(define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")])
(define_insn_and_split "mmx_pack<s_trunsuffix>swb"
- [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
(vec_concat:V8QI
(any_s_truncate:V4QI
- (match_operand:V4HI 1 "register_operand" "0,0,Yv"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yw"))
(any_s_truncate:V4QI
- (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))))]
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
pack<s_trunsuffix>swb\t{%2, %0|%0, %2}
@@ -1672,12 +1677,12 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn_and_split "mmx_packssdw"
- [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
(vec_concat:V4HI
(ss_truncate:V2HI
- (match_operand:V2SI 1 "register_operand" "0,0,Yv"))
+ (match_operand:V2SI 1 "register_operand" "0,0,Yw"))
(ss_truncate:V2HI
- (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))))]
+ (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yw"))))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
packssdw\t{%2, %0|%0, %2}
@@ -1692,11 +1697,11 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn_and_split "mmx_punpckhbw"
- [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
(vec_select:V8QI
(vec_concat:V16QI
- (match_operand:V8QI 1 "register_operand" "0,0,Yv")
- (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))
+ (match_operand:V8QI 1 "register_operand" "0,0,Yw")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw"))
(parallel [(const_int 4) (const_int 12)
(const_int 5) (const_int 13)
(const_int 6) (const_int 14)
@@ -1715,11 +1720,11 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn_and_split "mmx_punpcklbw"
- [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
(vec_select:V8QI
(vec_concat:V16QI
- (match_operand:V8QI 1 "register_operand" "0,0,Yv")
- (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))
+ (match_operand:V8QI 1 "register_operand" "0,0,Yw")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw"))
(parallel [(const_int 0) (const_int 8)
(const_int 1) (const_int 9)
(const_int 2) (const_int 10)
@@ -1738,11 +1743,11 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn_and_split "mmx_punpckhwd"
- [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
(vec_select:V4HI
(vec_concat:V8HI
- (match_operand:V4HI 1 "register_operand" "0,0,Yv")
- (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yw")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))
(parallel [(const_int 2) (const_int 6)
(const_int 3) (const_int 7)])))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
@@ -1759,11 +1764,11 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn_and_split "mmx_punpcklwd"
- [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
(vec_select:V4HI
(vec_concat:V8HI
- (match_operand:V4HI 1 "register_operand" "0,0,Yv")
- (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yw")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))
(parallel [(const_int 0) (const_int 4)
(const_int 1) (const_int 5)])))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
@@ -1866,11 +1871,11 @@
})
(define_insn "*mmx_pinsrw"
- [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,YW")
(vec_merge:V4HI
(vec_duplicate:V4HI
(match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm"))
- (match_operand:V4HI 1 "register_operand" "0,0,Yv")
+ (match_operand:V4HI 1 "register_operand" "0,0,YW")
(match_operand:SI 3 "const_int_operand")))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)
@@ -1902,11 +1907,11 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn "*mmx_pinsrb"
- [(set (match_operand:V8QI 0 "register_operand" "=x,Yv")
+ [(set (match_operand:V8QI 0 "register_operand" "=x,YW")
(vec_merge:V8QI
(vec_duplicate:V8QI
(match_operand:QI 2 "nonimmediate_operand" "rm,rm"))
- (match_operand:V8QI 1 "register_operand" "0,Yv")
+ (match_operand:V8QI 1 "register_operand" "0,YW")
(match_operand:SI 3 "const_int_operand")))]
"TARGET_MMX_WITH_SSE && TARGET_SSE4_1
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
@@ -1940,7 +1945,7 @@
(define_insn "*mmx_pextrw"
[(set (match_operand:HI 0 "register_sse4nonimm_operand" "=r,r,m")
(vec_select:HI
- (match_operand:V4HI 1 "register_operand" "y,Yv,Yv")
+ (match_operand:V4HI 1 "register_operand" "y,YW,YW")
(parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)"
@@ -1959,7 +1964,7 @@
[(set (match_operand:SWI48 0 "register_operand" "=r,r")
(zero_extend:SWI48
(vec_select:HI
- (match_operand:V4HI 1 "register_operand" "y,Yv")
+ (match_operand:V4HI 1 "register_operand" "y,YW")
(parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]))))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)"
@@ -1976,7 +1981,7 @@
(define_insn "*mmx_pextrb"
[(set (match_operand:QI 0 "nonimmediate_operand" "=r,m")
(vec_select:QI
- (match_operand:V8QI 1 "register_operand" "Yv,Yv")
+ (match_operand:V8QI 1 "register_operand" "YW,YW")
(parallel [(match_operand:SI 2 "const_0_to_7_operand" "n,n")])))]
"TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
"@
@@ -1993,7 +1998,7 @@
[(set (match_operand:SWI248 0 "register_operand" "=r")
(zero_extend:SWI248
(vec_select:QI
- (match_operand:V8QI 1 "register_operand" "Yv")
+ (match_operand:V8QI 1 "register_operand" "YW")
(parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
"TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
"%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
@@ -2394,15 +2399,15 @@
})
(define_insn "*mmx_uavgv8qi3"
- [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
(truncate:V8QI
(lshiftrt:V8HI
(plus:V8HI
(plus:V8HI
(zero_extend:V8HI
- (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv"))
+ (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yw"))
(zero_extend:V8HI
- (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")))
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")))
(const_vector:V8HI [(const_int 1) (const_int 1)
(const_int 1) (const_int 1)
(const_int 1) (const_int 1)
@@ -2440,15 +2445,15 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn "*mmx_uavgv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
(truncate:V4HI
(lshiftrt:V4SI
(plus:V4SI
(plus:V4SI
(zero_extend:V4SI
- (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw"))
(zero_extend:V4SI
- (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))
(const_vector:V4SI [(const_int 1) (const_int 1)
(const_int 1) (const_int 1)]))
(const_int 1))))]
@@ -2483,9 +2488,9 @@
})
(define_insn "mmx_psadbw"
- [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
- (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
- (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
+ [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yw")
+ (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yw")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")]
UNSPEC_PSADBW))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)"
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ca4372d..43e4d57 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -566,7 +566,8 @@
(V4SI "v") (V8SI "v") (V16SI "v")
(V2DI "v") (V4DI "v") (V8DI "v")
(V4SF "v") (V8SF "v") (V16SF "v")
- (V2DF "v") (V4DF "v") (V8DF "v")])
+ (V2DF "v") (V4DF "v") (V8DF "v")
+ (TI "Yw") (V1TI "Yw") (V2TI "Yw") (V4TI "v")])
(define_mode_attr sse2_avx_avx512f
[(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
@@ -11736,10 +11737,10 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*<sse2_avx2>_<insn><mode>3<mask_name>"
- [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v")
+ [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,<v_Yw>")
(sat_plusminus:VI12_AVX2_AVX512BW
- (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "<comm>0,v")
- (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))]
+ (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "<comm>0,<v_Yw>")
+ (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,<v_Yw>m")))]
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
@@ -11827,14 +11828,14 @@
"ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
(define_insn "*<s>mul<mode>3_highpart<mask_name>"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
(truncate:VI2_AVX2
(lshiftrt:<ssedoublemode>
(mult:<ssedoublemode>
(any_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
+ (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>"))
(any_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
+ (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))
(const_int 16))))]
"TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
&& <mask_mode512bit_condition> && <mask_avx512bw_condition>"
@@ -12128,19 +12129,19 @@
"ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
(define_insn "*avx2_pmaddwd"
- [(set (match_operand:V8SI 0 "register_operand" "=x,v")
+ [(set (match_operand:V8SI 0 "register_operand" "=Yw")
(plus:V8SI
(mult:V8SI
(sign_extend:V8SI
(vec_select:V8HI
- (match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
+ (match_operand:V16HI 1 "nonimmediate_operand" "%Yw")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
(const_int 12) (const_int 14)])))
(sign_extend:V8SI
(vec_select:V8HI
- (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
+ (match_operand:V16HI 2 "nonimmediate_operand" "Ywm")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
@@ -12161,8 +12162,7 @@
"TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseiadd")
- (set_attr "isa" "*,avx512bw")
- (set_attr "prefix" "vex,evex")
+ (set_attr "prefix" "vex")
(set_attr "mode" "OI")])
(define_expand "sse2_pmaddwd"
@@ -12192,17 +12192,17 @@
"ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
(define_insn "*sse2_pmaddwd"
- [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
+ [(set (match_operand:V4SI 0 "register_operand" "=x,Yw")
(plus:V4SI
(mult:V4SI
(sign_extend:V4SI
(vec_select:V4HI
- (match_operand:V8HI 1 "vector_operand" "%0,x,v")
+ (match_operand:V8HI 1 "vector_operand" "%0,Yw")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)])))
(sign_extend:V4SI
(vec_select:V4HI
- (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
+ (match_operand:V8HI 2 "vector_operand" "xBm,Ywm")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)]))))
(mult:V4SI
@@ -12217,13 +12217,12 @@
"TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"@
pmaddwd\t{%2, %0|%0, %2}
- vpmaddwd\t{%2, %1, %0|%0, %1, %2}
vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sseiadd")
(set_attr "atom_unit" "simul")
- (set_attr "prefix_data16" "1,*,*")
- (set_attr "prefix" "orig,vex,evex")
+ (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
(define_insn "avx512dq_mul<mode>3<mask_name>"
@@ -12449,10 +12448,10 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "ashr<mode>3"
- [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
+ [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,<v_Yw>")
(ashiftrt:VI24_AVX2
- (match_operand:VI24_AVX2 1 "register_operand" "0,x")
- (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
+ (match_operand:VI24_AVX2 1 "register_operand" "0,<v_Yw>")
+ (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
"TARGET_SSE2"
"@
psra<ssemodesuffix>\t{%2, %0|%0, %2}
@@ -12496,10 +12495,10 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<insn><mode>3"
- [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
+ [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,<v_Yw>")
(any_lshift:VI248_AVX2
- (match_operand:VI248_AVX2 1 "register_operand" "0,x")
- (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
+ (match_operand:VI248_AVX2 1 "register_operand" "0,<v_Yw>")
+ (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
"TARGET_SSE2"
"@
p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -12571,9 +12570,9 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse2_avx2>_<insn><mode>3"
- [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
+ [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,Yw")
(any_lshift:VIMAX_AVX2
- (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
+ (match_operand:VIMAX_AVX2 1 "register_operand" "0,Yw")
(match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
"TARGET_SSE2"
{
@@ -12771,20 +12770,19 @@
(set_attr "mode" "TI")])
(define_insn "*<code>v8hi3"
- [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
+ [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
(smaxmin:V8HI
- (match_operand:V8HI 1 "vector_operand" "%0,x,v")
- (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
+ (match_operand:V8HI 1 "vector_operand" "%0,Yw")
+ (match_operand:V8HI 2 "vector_operand" "xBm,Ywm")))]
"TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"@
p<maxmin_int>w\t{%2, %0|%0, %2}
- vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sseiadd")
- (set_attr "prefix_data16" "1,*,*")
- (set_attr "prefix_extra" "*,1,1")
- (set_attr "prefix" "orig,vex,evex")
+ (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix_extra" "*,1")
+ (set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
(define_expand "<code><mode>3"
@@ -12856,20 +12854,19 @@
(set_attr "mode" "TI")])
(define_insn "*<code>v16qi3"
- [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
+ [(set (match_operand:V16QI 0 "register_operand" "=x,Yw")
(umaxmin:V16QI
- (match_operand:V16QI 1 "vector_operand" "%0,x,v")
- (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
+ (match_operand:V16QI 1 "vector_operand" "%0,Yw")
+ (match_operand:V16QI 2 "vector_operand" "xBm,Ywm")))]
"TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"@
p<maxmin_int>b\t{%2, %0|%0, %2}
- vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sseiadd")
- (set_attr "prefix_data16" "1,*,*")
- (set_attr "prefix_extra" "*,1,1")
- (set_attr "prefix" "orig,vex,evex")
+ (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix_extra" "*,1")
+ (set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -13888,57 +13885,54 @@
})
(define_insn "<sse2_avx2>_packsswb<mask_name>"
- [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
+ [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
(vec_concat:VI1_AVX512
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
+ (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"@
packsswb\t{%2, %0|%0, %2}
- vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
- (set_attr "prefix_data16" "1,*,*")
- (set_attr "prefix" "orig,<mask_prefix>,evex")
+ (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix" "orig,<mask_prefix>")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse2_avx2>_packssdw<mask_name>"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
(vec_concat:VI2_AVX2
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
+ (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"@
packssdw\t{%2, %0|%0, %2}
- vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
- (set_attr "prefix_data16" "1,*,*")
- (set_attr "prefix" "orig,<mask_prefix>,evex")
+ (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix" "orig,<mask_prefix>")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse2_avx2>_packuswb<mask_name>"
- [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
+ [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
(vec_concat:VI1_AVX512
(us_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
(us_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
+ (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"@
packuswb\t{%2, %0|%0, %2}
- vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
- (set_attr "prefix_data16" "1,*,*")
- (set_attr "prefix" "orig,<mask_prefix>,evex")
+ (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix" "orig,<mask_prefix>")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx512bw_interleave_highv64qi<mask_name>"
@@ -13986,11 +13980,11 @@
(set_attr "mode" "XI")])
(define_insn "avx2_interleave_highv32qi<mask_name>"
- [(set (match_operand:V32QI 0 "register_operand" "=v")
+ [(set (match_operand:V32QI 0 "register_operand" "=Yw")
(vec_select:V32QI
(vec_concat:V64QI
- (match_operand:V32QI 1 "register_operand" "v")
- (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
+ (match_operand:V32QI 1 "register_operand" "Yw")
+ (match_operand:V32QI 2 "nonimmediate_operand" "Ywm"))
(parallel [(const_int 8) (const_int 40)
(const_int 9) (const_int 41)
(const_int 10) (const_int 42)
@@ -14007,18 +14001,18 @@
(const_int 29) (const_int 61)
(const_int 30) (const_int 62)
(const_int 31) (const_int 63)])))]
- "TARGET_AVX2 && <mask_avx512vl_condition>"
+ "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
"vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "<mask_prefix>")
(set_attr "mode" "OI")])
(define_insn "vec_interleave_highv16qi<mask_name>"
- [(set (match_operand:V16QI 0 "register_operand" "=x,v")
+ [(set (match_operand:V16QI 0 "register_operand" "=x,Yw")
(vec_select:V16QI
(vec_concat:V32QI
- (match_operand:V16QI 1 "register_operand" "0,v")
- (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
+ (match_operand:V16QI 1 "register_operand" "0,Yw")
+ (match_operand:V16QI 2 "vector_operand" "xBm,Ywm"))
(parallel [(const_int 8) (const_int 24)
(const_int 9) (const_int 25)
(const_int 10) (const_int 26)
@@ -14027,7 +14021,7 @@
(const_int 13) (const_int 29)
(const_int 14) (const_int 30)
(const_int 15) (const_int 31)])))]
- "TARGET_SSE2 && <mask_avx512vl_condition>"
+ "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
"@
punpckhbw\t{%2, %0|%0, %2}
vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
@@ -14082,11 +14076,11 @@
(set_attr "mode" "XI")])
(define_insn "avx2_interleave_lowv32qi<mask_name>"
- [(set (match_operand:V32QI 0 "register_operand" "=v")
+ [(set (match_operand:V32QI 0 "register_operand" "=Yw")
(vec_select:V32QI
(vec_concat:V64QI
- (match_operand:V32QI 1 "register_operand" "v")
- (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
+ (match_operand:V32QI 1 "register_operand" "Yw")
+ (match_operand:V32QI 2 "nonimmediate_operand" "Ywm"))
(parallel [(const_int 0) (const_int 32)
(const_int 1) (const_int 33)
(const_int 2) (const_int 34)
@@ -14110,11 +14104,11 @@
(set_attr "mode" "OI")])
(define_insn "vec_interleave_lowv16qi<mask_name>"
- [(set (match_operand:V16QI 0 "register_operand" "=x,v")
+ [(set (match_operand:V16QI 0 "register_operand" "=x,Yw")
(vec_select:V16QI
(vec_concat:V32QI
- (match_operand:V16QI 1 "register_operand" "0,v")
- (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
+ (match_operand:V16QI 1 "register_operand" "0,Yw")
+ (match_operand:V16QI 2 "vector_operand" "xBm,Ywm"))
(parallel [(const_int 0) (const_int 16)
(const_int 1) (const_int 17)
(const_int 2) (const_int 18)
@@ -14162,11 +14156,11 @@
(set_attr "mode" "XI")])
(define_insn "avx2_interleave_highv16hi<mask_name>"
- [(set (match_operand:V16HI 0 "register_operand" "=v")
+ [(set (match_operand:V16HI 0 "register_operand" "=Yw")
(vec_select:V16HI
(vec_concat:V32HI
- (match_operand:V16HI 1 "register_operand" "v")
- (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
+ (match_operand:V16HI 1 "register_operand" "Yw")
+ (match_operand:V16HI 2 "nonimmediate_operand" "Ywm"))
(parallel [(const_int 4) (const_int 20)
(const_int 5) (const_int 21)
(const_int 6) (const_int 22)
@@ -14182,11 +14176,11 @@
(set_attr "mode" "OI")])
(define_insn "vec_interleave_highv8hi<mask_name>"
- [(set (match_operand:V8HI 0 "register_operand" "=x,v")
+ [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
(vec_select:V8HI
(vec_concat:V16HI
- (match_operand:V8HI 1 "register_operand" "0,v")
- (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
+ (match_operand:V8HI 1 "register_operand" "0,Yw")
+ (match_operand:V8HI 2 "vector_operand" "xBm,Ywm"))
(parallel [(const_int 4) (const_int 12)
(const_int 5) (const_int 13)
(const_int 6) (const_int 14)
@@ -14230,11 +14224,11 @@
(set_attr "mode" "XI")])
(define_insn "avx2_interleave_lowv16hi<mask_name>"
- [(set (match_operand:V16HI 0 "register_operand" "=v")
+ [(set (match_operand:V16HI 0 "register_operand" "=Yw")
(vec_select:V16HI
(vec_concat:V32HI
- (match_operand:V16HI 1 "register_operand" "v")
- (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
+ (match_operand:V16HI 1 "register_operand" "Yw")
+ (match_operand:V16HI 2 "nonimmediate_operand" "Ywm"))
(parallel [(const_int 0) (const_int 16)
(const_int 1) (const_int 17)
(const_int 2) (const_int 18)
@@ -14250,11 +14244,11 @@
(set_attr "mode" "OI")])
(define_insn "vec_interleave_lowv8hi<mask_name>"
- [(set (match_operand:V8HI 0 "register_operand" "=x,v")
+ [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
(vec_select:V8HI
(vec_concat:V16HI
- (match_operand:V8HI 1 "register_operand" "0,v")
- (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
+ (match_operand:V8HI 1 "register_operand" "0,Yw")
+ (match_operand:V8HI 2 "vector_operand" "xBm,Ywm"))
(parallel [(const_int 0) (const_int 8)
(const_int 1) (const_int 9)
(const_int 2) (const_int 10)
@@ -15190,9 +15184,9 @@
})
(define_insn "avx2_pshuflw_1<mask_name>"
- [(set (match_operand:V16HI 0 "register_operand" "=v")
+ [(set (match_operand:V16HI 0 "register_operand" "=Yw")
(vec_select:V16HI
- (match_operand:V16HI 1 "nonimmediate_operand" "vm")
+ (match_operand:V16HI 1 "nonimmediate_operand" "Ywm")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
@@ -15264,9 +15258,9 @@
})
(define_insn "sse2_pshuflw_1<mask_name>"
- [(set (match_operand:V8HI 0 "register_operand" "=v")
+ [(set (match_operand:V8HI 0 "register_operand" "=Yw")
(vec_select:V8HI
- (match_operand:V8HI 1 "vector_operand" "vBm")
+ (match_operand:V8HI 1 "vector_operand" "YwBm")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
@@ -15347,9 +15341,9 @@
})
(define_insn "avx2_pshufhw_1<mask_name>"
- [(set (match_operand:V16HI 0 "register_operand" "=v")
+ [(set (match_operand:V16HI 0 "register_operand" "=Yw")
(vec_select:V16HI
- (match_operand:V16HI 1 "nonimmediate_operand" "vm")
+ (match_operand:V16HI 1 "nonimmediate_operand" "Ywm")
(parallel [(const_int 0)
(const_int 1)
(const_int 2)
@@ -15421,9 +15415,9 @@
})
(define_insn "sse2_pshufhw_1<mask_name>"
- [(set (match_operand:V8HI 0 "register_operand" "=v")
+ [(set (match_operand:V8HI 0 "register_operand" "=Yw")
(vec_select:V8HI
- (match_operand:V8HI 1 "vector_operand" "vBm")
+ (match_operand:V8HI 1 "vector_operand" "YwBm")
(parallel [(const_int 0)
(const_int 1)
(const_int 2)
@@ -15489,18 +15483,16 @@
[(V16QI "TARGET_SSE4_1") V8HI])
(define_insn "*vec_extract<mode>"
- [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m")
+ [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m")
(vec_select:<ssescalarmode>
- (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v")
+ (match_operand:PEXTR_MODE12 1 "register_operand" "YW,YW")
(parallel
[(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
"TARGET_SSE2"
"@
%vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
- %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
- vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
- vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "*,sse4,avx512bw,avx512bw")
+ %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,sse4")
(set_attr "type" "sselog1")
(set_attr "prefix_data16" "1")
(set (attr "prefix_extra")
@@ -15510,23 +15502,20 @@
(const_string "*")
(const_string "1")))
(set_attr "length_immediate" "1")
- (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex")
+ (set_attr "prefix" "maybe_vex,maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
- [(set (match_operand:SWI48 0 "register_operand" "=r,r")
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
(zero_extend:SWI48
(vec_select:<PEXTR_MODE12:ssescalarmode>
- (match_operand:PEXTR_MODE12 1 "register_operand" "x,v")
+ (match_operand:PEXTR_MODE12 1 "register_operand" "YW")
(parallel
[(match_operand:SI 2
"const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
"TARGET_SSE2"
- "@
- %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
- vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
- [(set_attr "isa" "*,avx512bw")
- (set_attr "type" "sselog1")
+ "%vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
+ [(set_attr "type" "sselog1")
(set_attr "prefix_data16" "1")
(set (attr "prefix_extra")
(if_then_else
@@ -15538,18 +15527,15 @@
(set_attr "mode" "TI")])
(define_insn "*vec_extractv16qi_zext"
- [(set (match_operand:HI 0 "register_operand" "=r,r")
+ [(set (match_operand:HI 0 "register_operand" "=r")
(zero_extend:HI
(vec_select:QI
- (match_operand:V16QI 1 "register_operand" "x,v")
+ (match_operand:V16QI 1 "register_operand" "YW")
(parallel
[(match_operand:SI 2 "const_0_to_15_operand")]))))]
"TARGET_SSE4_1"
- "@
- %vpextrb\t{%2, %1, %k0|%k0, %1, %2}
- vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
- [(set_attr "isa" "*,avx512bw")
- (set_attr "type" "sselog1")
+ "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
+ [(set_attr "type" "sselog1")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
@@ -15656,9 +15642,9 @@
"operands[1] = gen_lowpart (SImode, operands[1]);")
(define_insn "*vec_extractv4si"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv")
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,Yw")
(vec_select:SI
- (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v")
+ (match_operand:V4SI 1 "register_operand" " x, v, 0, 0,Yw")
(parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
"TARGET_SSE4_1"
{
@@ -15674,7 +15660,6 @@
return "psrldq\t{%2, %0|%0, %2}";
case 4:
- case 5:
operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
@@ -15682,14 +15667,14 @@
gcc_unreachable ();
}
}
- [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw")
- (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1")
+ [(set_attr "isa" "*,avx512dq,noavx,noavx,avx")
+ (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1")
(set (attr "prefix_extra")
(if_then_else (eq_attr "alternative" "0,1")
(const_string "1")
(const_string "*")))
(set_attr "length_immediate" "1")
- (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex")
+ (set_attr "prefix" "maybe_vex,evex,orig,orig,maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*vec_extractv4si_zext"
@@ -16213,15 +16198,15 @@
})
(define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
- [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v")
+ [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,<v_Yw>")
(truncate:VI12_AVX2_AVX512BW
(lshiftrt:<ssedoublemode>
(plus:<ssedoublemode>
(plus:<ssedoublemode>
(zero_extend:<ssedoublemode>
- (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "%0,v"))
+ (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "%0,<v_Yw>"))
(zero_extend:<ssedoublemode>
- (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))
+ (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,<v_Yw>m")))
(match_operand:<ssedoublemode> <mask_expand_op3> "const1_operand"))
(const_int 1))))]
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
@@ -16238,10 +16223,10 @@
;; The correct representation for this is absolutely enormous, and
;; surely not generally useful.
(define_insn "<sse2_avx2>_psadbw"
- [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
+ [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,YW")
(unspec:VI8_AVX2_AVX512BW
- [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
- (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
+ [(match_operand:<ssebytemode> 1 "register_operand" "0,YW")
+ (match_operand:<ssebytemode> 2 "vector_operand" "xBm,YWm")]
UNSPEC_PSADBW))]
"TARGET_SSE2"
"@
@@ -16815,12 +16800,12 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn "avx2_pmaddubsw256"
- [(set (match_operand:V16HI 0 "register_operand" "=x,v")
+ [(set (match_operand:V16HI 0 "register_operand" "=Yw")
(ss_plus:V16HI
(mult:V16HI
(zero_extend:V16HI
(vec_select:V16QI
- (match_operand:V32QI 1 "register_operand" "x,v")
+ (match_operand:V32QI 1 "register_operand" "Yw")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
@@ -16831,7 +16816,7 @@
(const_int 28) (const_int 30)])))
(sign_extend:V16HI
(vec_select:V16QI
- (match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
+ (match_operand:V32QI 2 "nonimmediate_operand" "Ywm")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
@@ -16863,10 +16848,9 @@
(const_int 29) (const_int 31)]))))))]
"TARGET_AVX2"
"vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "*,avx512bw")
- (set_attr "type" "sseiadd")
+ [(set_attr "type" "sseiadd")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex,evex")
+ (set_attr "prefix" "vex")
(set_attr "mode" "OI")])
;; The correct representation for this is absolutely enormous, and
@@ -16919,19 +16903,19 @@
(set_attr "mode" "XI")])
(define_insn "ssse3_pmaddubsw128"
- [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
+ [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
(ss_plus:V8HI
(mult:V8HI
(zero_extend:V8HI
(vec_select:V8QI
- (match_operand:V16QI 1 "register_operand" "0,x,v")
+ (match_operand:V16QI 1 "register_operand" "0,Yw")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
(const_int 12) (const_int 14)])))
(sign_extend:V8HI
(vec_select:V8QI
- (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
+ (match_operand:V16QI 2 "vector_operand" "xBm,Ywm")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
@@ -16952,14 +16936,13 @@
"TARGET_SSSE3"
"@
pmaddubsw\t{%2, %0|%0, %2}
- vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sseiadd")
(set_attr "atom_unit" "simul")
- (set_attr "prefix_data16" "1,*,*")
+ (set_attr "prefix_data16" "1,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex,evex")
+ (set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
(define_insn "ssse3_pmaddubsw"
@@ -17065,16 +17048,16 @@
})
(define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
(truncate:VI2_AVX2
(lshiftrt:<ssedoublemode>
(plus:<ssedoublemode>
(lshiftrt:<ssedoublemode>
(mult:<ssedoublemode>
(sign_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
+ (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>"))
(sign_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
+ (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))
(const_int 14))
(match_operand:VI2_AVX2 3 "const1_operand"))
(const_int 1))))]
@@ -17082,13 +17065,12 @@
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"@
pmulhrsw\t{%2, %0|%0, %2}
- vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sseimul")
- (set_attr "prefix_data16" "1,*,*")
+ (set_attr "prefix_data16" "1,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,maybe_evex,evex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
(define_expand "smulhrsv4hi3"
@@ -17160,21 +17142,20 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
- [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
+ [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
(unspec:VI1_AVX512
- [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
- (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
+ [(match_operand:VI1_AVX512 1 "register_operand" "0,<v_Yw>")
+ (match_operand:VI1_AVX512 2 "vector_operand" "xBm,<v_Yw>m")]
UNSPEC_PSHUFB))]
"TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"@
pshufb\t{%2, %0|%0, %2}
- vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog1")
- (set_attr "prefix_data16" "1,*,*")
+ (set_attr "prefix_data16" "1,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,maybe_evex,evex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "btver2_decode" "vector")
(set_attr "mode" "<sseinsnmode>")])
@@ -17274,11 +17255,11 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<ssse3_avx2>_palignr<mode>"
- [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
+ [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,<v_Yw>")
(unspec:SSESCALARMODE
- [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
- (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
- (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
+ [(match_operand:SSESCALARMODE 1 "register_operand" "0,<v_Yw>")
+ (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,<v_Yw>m")
+ (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
UNSPEC_PALIGNR))]
"TARGET_SSSE3"
{
@@ -17289,19 +17270,18 @@
case 0:
return "palignr\t{%3, %2, %0|%0, %2, %3}";
case 1:
- case 2:
return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
default:
gcc_unreachable ();
}
}
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sseishft")
(set_attr "atom_unit" "sishuf")
- (set_attr "prefix_data16" "1,*,*")
+ (set_attr "prefix_data16" "1,*")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "orig,vex,evex")
+ (set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn_and_split "ssse3_palignrdi"
@@ -17367,9 +17347,9 @@
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
(define_insn "*abs<mode>2"
- [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
+ [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=<v_Yw>")
(abs:VI1248_AVX512VL_AVX512BW
- (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
+ (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "<v_Yw>Bm")))]
"TARGET_SSSE3"
"%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
[(set_attr "type" "sselog1")
@@ -17731,22 +17711,21 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse4_1_avx2>_packusdw<mask_name>"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,<v_Yw>")
(vec_concat:VI2_AVX2
(us_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,0,<v_Yw>"))
(us_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
+ (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,<v_Yw>m"))))]
"TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"@
packusdw\t{%2, %0|%0, %2}
packusdw\t{%2, %0|%0, %2}
- vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
+ (set_attr "prefix" "orig,orig,<mask_prefix>")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse4_1_avx2>_pblendvb"
@@ -17867,9 +17846,9 @@
(set_attr "mode" "TI")])
(define_insn "avx2_<code>v16qiv16hi2<mask_name>"
- [(set (match_operand:V16HI 0 "register_operand" "=v")
+ [(set (match_operand:V16HI 0 "register_operand" "=Yw")
(any_extend:V16HI
- (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
+ (match_operand:V16QI 1 "nonimmediate_operand" "Ywm")))]
"TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
"vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
@@ -17935,10 +17914,10 @@
"TARGET_AVX512BW")
(define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
- [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
+ [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw")
(any_extend:V8HI
(vec_select:V8QI
- (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
+ (match_operand:V16QI 1 "register_operand" "Yr,*x,Yw")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
@@ -17952,7 +17931,7 @@
(set_attr "mode" "TI")])
(define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>_1"
- [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
+ [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw")
(any_extend:V8HI
(match_operand:V8QI 1 "memory_operand" "m,m,m")))]
"TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
@@ -21611,11 +21590,11 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "*ssse3_palignr<mode>_perm"
- [(set (match_operand:V_128 0 "register_operand" "=x,x,v")
+ [(set (match_operand:V_128 0 "register_operand" "=x,Yw")
(vec_select:V_128
- (match_operand:V_128 1 "register_operand" "0,x,v")
+ (match_operand:V_128 1 "register_operand" "0,Yw")
(match_parallel 2 "palignr_operand"
- [(match_operand 3 "const_int_operand" "n,n,n")])))]
+ [(match_operand 3 "const_int_operand" "n,n")])))]
"TARGET_SSSE3"
{
operands[2] = (GEN_INT (INTVAL (operands[3])
@@ -21626,19 +21605,18 @@
case 0:
return "palignr\t{%2, %1, %0|%0, %1, %2}";
case 1:
- case 2:
return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
default:
gcc_unreachable ();
}
}
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sseishft")
(set_attr "atom_unit" "sishuf")
- (set_attr "prefix_data16" "1,*,*")
+ (set_attr "prefix_data16" "1,*")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "orig,vex,evex")])
+ (set_attr "prefix" "orig,maybe_evex")])
(define_expand "avx512vl_vinsert<mode>"
[(match_operand:VI48F_256 0 "register_operand")
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index cc27c79..e655e66 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1688,6 +1688,140 @@ struct processor_costs znver2_cost = {
"16", /* Func alignment. */
};
+struct processor_costs znver3_cost = {
+ {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+
+ /* reg-reg moves are done by renaming and thus they are even cheaper than
+ 1 cycle. Because reg-reg move cost is 2 and following tables correspond
+ to doubles of latencies, we do not model this correctly. It does not
+ seem to make practical difference to bump prices up even more. */
+ 6, /* cost for loading QImode using
+ movzbl. */
+ {6, 6, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {8, 8, 8}, /* cost of storing integer
+ registers. */
+ 2, /* cost of reg,reg fld/fst. */
+ {6, 6, 16}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode. */
+ {8, 8, 16}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode. */
+ 2, /* cost of moving MMX register. */
+ {6, 6}, /* cost of loading MMX registers
+ in SImode and DImode. */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode. */
+ 2, 2, 3, /* cost of moving XMM,YMM,ZMM
+ register. */
+ {6, 6, 6, 6, 12}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit. */
+ {8, 8, 8, 8, 16}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit. */
+ 6, 6, /* SSE->integer and integer->SSE
+ moves. */
+ 8, 8, /* mask->integer and integer->mask moves */
+ {6, 6, 6}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {8, 8, 8}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
+ /* End of register allocator costs. */
+ },
+
+ COSTS_N_INSNS (1), /* cost of an add instruction. */
+ COSTS_N_INSNS (1), /* cost of a lea instruction. */
+ COSTS_N_INSNS (1), /* variable shift costs. */
+ COSTS_N_INSNS (1), /* constant shift costs. */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
+ COSTS_N_INSNS (3), /* HI. */
+ COSTS_N_INSNS (3), /* SI. */
+ COSTS_N_INSNS (3), /* DI. */
+ COSTS_N_INSNS (3)}, /* other. */
+ 0, /* cost of multiply per each bit
+ set. */
+ /* Depending on parameters, idiv can get faster on ryzen. This is upper
+ bound. */
+ {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */
+ COSTS_N_INSNS (22), /* HI. */
+ COSTS_N_INSNS (30), /* SI. */
+ COSTS_N_INSNS (45), /* DI. */
+ COSTS_N_INSNS (45)}, /* other. */
+ COSTS_N_INSNS (1), /* cost of movsx. */
+ COSTS_N_INSNS (1), /* cost of movzx. */
+ 8, /* "large" insn. */
+ 9, /* MOVE_RATIO. */
+ 6, /* CLEAR_RATIO */
+ {6, 6, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {8, 8, 8}, /* cost of storing integer
+ registers. */
+ {6, 6, 6, 6, 12}, /* cost of loading SSE registers
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {8, 8, 8, 8, 16}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {6, 6, 6, 6, 12}, /* cost of unaligned loads. */
+ {8, 8, 8, 8, 16}, /* cost of unaligned stores. */
+ 2, 2, 3, /* cost of moving XMM,YMM,ZMM
+ register. */
+ 6, /* cost of moving SSE register to integer. */
+ /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
+ throughput 12. Approx 9 uops do not depend on vector size and every load
+ is 7 uops. */
+ 18, 8, /* Gather load static, per_elt. */
+ 18, 10, /* Gather store static, per_elt. */
+ 32, /* size of l1 cache. */
+ 512, /* size of l2 cache. */
+ 64, /* size of prefetch block. */
+ /* New AMD processors never drop prefetches; if they cannot be performed
+ immediately, they are queued. We set number of simultaneous prefetches
+ to a large constant to reflect this (it probably is not a good idea not
+ to limit number of prefetches at all, as their execution also takes some
+ time). */
+ 100, /* number of parallel prefetches. */
+ 3, /* Branch cost. */
+ COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (5), /* cost of FMUL instruction. */
+ /* Latency of fdiv is 8-15. */
+ COSTS_N_INSNS (15), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (1), /* cost of FABS instruction. */
+ COSTS_N_INSNS (1), /* cost of FCHS instruction. */
+ /* Latency of fsqrt is 4-10. */
+ COSTS_N_INSNS (10), /* cost of FSQRT instruction. */
+
+ COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
+ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
+ COSTS_N_INSNS (3), /* cost of MULSS instruction. */
+ COSTS_N_INSNS (3), /* cost of MULSD instruction. */
+ COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
+ COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
+ COSTS_N_INSNS (10), /* cost of DIVSS instruction. */
+ /* 9-13. */
+ COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
+ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
+ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
+ /* Zen can execute 4 integer operations per cycle. FP operations
+ take 3 cycles and it can execute 2 integer additions and 2
+ multiplications thus reassociation may make sense up to with of 6.
+ SPEC2k6 bencharks suggests
+ that 4 works better than 6 probably due to register pressure.
+
+ Integer vector operations are taken by FP unit and execute 3 vector
+ plus/minus operations per cycle but only one multiply. This is adjusted
+ in ix86_reassociation_width. */
+ 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
+ znver2_memcpy,
+ znver2_memset,
+ COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
+ COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
+ "16", /* Loop alignment. */
+ "16", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "16", /* Func alignment. */
+};
+
/* skylake_cost should produce code tuned for Skylake familly of CPUs. */
static stringop_algs skylake_memcpy[2] = {
{libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 69f3c70..859af75 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1208,10 +1208,11 @@
(define_predicate "branch_comparison_operator"
(and (match_operand 0 "comparison_operator")
(match_test "GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC")
- (if_then_else (match_test "GET_MODE (XEXP (op, 0)) == CCFPmode
- && !flag_finite_math_only")
- (match_code "lt,gt,eq,unordered,unge,unle,ne,ordered")
- (match_code "lt,ltu,le,leu,gt,gtu,ge,geu,eq,ne"))
+ (if_then_else (match_test "GET_MODE (XEXP (op, 0)) == CCFPmode")
+ (if_then_else (match_test "flag_finite_math_only")
+ (match_code "lt,le,gt,ge,eq,ne,unordered,ordered")
+ (match_code "lt,gt,eq,unordered,unge,unle,ne,ordered"))
+ (match_code "lt,ltu,le,leu,gt,gtu,ge,geu,eq,ne"))
(match_test "validate_condition_mode (GET_CODE (op),
GET_MODE (XEXP (op, 0))),
1")))
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 151136b..f7b1c03 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -16714,13 +16714,21 @@ s390_shift_truncation_mask (machine_mode mode)
static bool
f_constraint_p (const char *constraint)
{
+ bool seen_f_p = false;
+ bool seen_v_p = false;
+
for (size_t i = 0, c_len = strlen (constraint); i < c_len;
i += CONSTRAINT_LEN (constraint[i], constraint + i))
{
if (constraint[i] == 'f')
- return true;
+ seen_f_p = true;
+ if (constraint[i] == 'v')
+ seen_v_p = true;
}
- return false;
+
+ /* Treat "fv" constraints as "v", because LRA will choose the widest register
+ * class. */
+ return seen_f_p && !seen_v_p;
}
/* Implement TARGET_MD_ASM_ADJUST hook in order to fix up "f"
diff --git a/gcc/config/sparc/constraints.md b/gcc/config/sparc/constraints.md
index 82bbba9..7ddf014 100644
--- a/gcc/config/sparc/constraints.md
+++ b/gcc/config/sparc/constraints.md
@@ -19,7 +19,7 @@
;;; Unused letters:
;;; B
-;;; a jkl uv xyz
+;;; a jkl uvwxyz
;; Register constraints
@@ -190,14 +190,7 @@
(match_test "TARGET_ARCH32")
(match_test "register_ok_for_ldd (op)")))
-;; Equivalent to 'T' but in 64-bit mode without alignment requirement
(define_memory_constraint "W"
- "Memory reference for 'e' constraint floating-point register"
- (and (match_code "mem")
- (match_test "TARGET_ARCH64")
- (match_test "memory_ok_for_ldd (op)")))
-
-(define_memory_constraint "w"
"A memory with only a base register"
(match_operand 0 "mem_noofs_operand"))
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index f150417..42ba415 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -9223,14 +9223,17 @@ register_ok_for_ldd (rtx reg)
int
memory_ok_for_ldd (rtx op)
{
- /* In 64-bit mode, we assume that the address is word-aligned. */
- if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
+ if (!mem_min_alignment (op, 8))
return 0;
- if (! can_create_pseudo_p ()
+ /* We need to perform the job of a memory constraint. */
+ if ((reload_in_progress || reload_completed)
&& !strict_memory_address_p (Pmode, XEXP (op, 0)))
return 0;
+ if (lra_in_progress && !memory_address_p (Pmode, XEXP (op, 0)))
+ return 0;
+
return 1;
}
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 02b7c8d..c5d3696 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -1869,8 +1869,8 @@ visl")
(set_attr "lra" "*,*,disabled,disabled,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
(define_insn "*movdi_insn_sp64"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r, m, r,*e,?*e,?*e, W,b,b")
- (match_operand:DI 1 "input_operand" "rI,N,m,rJ,*e, r, *e, W,?*e,J,P"))]
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r, m, r,*e,?*e,?*e, m,b,b")
+ (match_operand:DI 1 "input_operand" "rI,N,m,rJ,*e, r, *e, m,?*e,J,P"))]
"TARGET_ARCH64
&& (register_operand (operands[0], DImode)
|| register_or_zero_or_all_ones_operand (operands[1], DImode))"
@@ -2498,8 +2498,8 @@ visl")
(set_attr "lra" "*,*,*,*,*,*,*,*,*,*,disabled,disabled,*,*,*,*,*")])
(define_insn "*movdf_insn_sp64"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,*r, e, e,W, *r,*r, m,*r")
- (match_operand:DF 1 "input_operand" "G,C,e, e,*r,W#F,e,*rG, m,*rG, F"))]
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,*r, e, e,m, *r,*r, m,*r")
+ (match_operand:DF 1 "input_operand" "G,C,e, e,*r,m#F,e,*rG, m,*rG, F"))]
"TARGET_ARCH64
&& (register_operand (operands[0], DFmode)
|| register_or_zero_or_all_ones_operand (operands[1], DFmode))"
@@ -8467,8 +8467,8 @@ visl")
(set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,*,vis3,vis3")])
(define_insn "*mov<VM64:mode>_insn_sp64"
- [(set (match_operand:VM64 0 "nonimmediate_operand" "=e,e,e,e,W,m,*r, m,*r, e,*r")
- (match_operand:VM64 1 "input_operand" "Y,Z,e,W,e,Y, m,*r, e,*r,*r"))]
+ [(set (match_operand:VM64 0 "nonimmediate_operand" "=e,e,e,e,m,m,*r, m,*r, e,*r")
+ (match_operand:VM64 1 "input_operand" "Y,Z,e,m,e,Y, m,*r, e,*r,*r"))]
"TARGET_VIS
&& TARGET_ARCH64
&& (register_operand (operands[0], <VM64:MODE>mode)
diff --git a/gcc/config/sparc/sync.md b/gcc/config/sparc/sync.md
index c578e95..c0a20ef 100644
--- a/gcc/config/sparc/sync.md
+++ b/gcc/config/sparc/sync.md
@@ -202,7 +202,7 @@
(define_insn "*atomic_compare_and_swap<mode>_1"
[(set (match_operand:I48MODE 0 "register_operand" "=r")
- (match_operand:I48MODE 1 "mem_noofs_operand" "+w"))
+ (match_operand:I48MODE 1 "mem_noofs_operand" "+W"))
(set (match_dup 1)
(unspec_volatile:I48MODE
[(match_operand:I48MODE 2 "register_operand" "r")
@@ -214,7 +214,7 @@
(define_insn "atomic_compare_and_swap_leon3_1"
[(set (match_operand:SI 0 "register_operand" "=r")
- (match_operand:SI 1 "mem_noofs_operand" "+w"))
+ (match_operand:SI 1 "mem_noofs_operand" "+W"))
(set (match_dup 1)
(unspec_volatile:SI
[(match_operand:SI 2 "register_operand" "r")
@@ -235,7 +235,7 @@
(define_insn "*atomic_compare_and_swapdi_v8plus"
[(set (match_operand:DI 0 "register_operand" "=h")
- (match_operand:DI 1 "mem_noofs_operand" "+w"))
+ (match_operand:DI 1 "mem_noofs_operand" "+W"))
(set (match_dup 1)
(unspec_volatile:DI
[(match_operand:DI 2 "register_operand" "h")