aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-c.cc1
-rw-r--r--gcc/config/aarch64/aarch64-cores.def2
-rw-r--r--gcc/config/aarch64/aarch64-protos.h1
-rw-r--r--gcc/config/aarch64/aarch64.cc73
-rw-r--r--gcc/config/aarch64/aarch64.md72
-rw-r--r--gcc/config/i386/i386-expand.cc50
-rw-r--r--gcc/config/i386/i386.cc146
-rw-r--r--gcc/config/i386/i386.md4
-rw-r--r--gcc/config/i386/predicates.md14
-rw-r--r--gcc/config/i386/sse.md10
-rw-r--r--gcc/config/mips/mips.cc3
-rw-r--r--gcc/config/riscv/riscv-cores.def48
-rw-r--r--gcc/config/riscv/vector.md22
-rw-r--r--gcc/config/rs6000/rs6000.cc11
14 files changed, 380 insertions, 77 deletions
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index d1e2ab9..98337b7 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -293,6 +293,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
aarch64_def_or_undef (TARGET_SME2, "__ARM_FEATURE_SME2", pfile);
aarch64_def_or_undef (AARCH64_HAVE_ISA (SME2p1),
"__ARM_FEATURE_SME2p1", pfile);
+ aarch64_def_or_undef (TARGET_FAMINMAX, "__ARM_FEATURE_FAMINMAX", pfile);
/* Not for ACLE, but required to keep "float.h" correct if we switch
target between implementations that do or do not support ARMv8.2-A
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 7f204fd..1209630 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -224,7 +224,7 @@ AARCH64_CORE("neoverse-v3ae", neoversev3ae, cortexa57, V9_2A, (SVE2_BITPERM, RNG
AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1)
/* NVIDIA ('N') cores. */
-AARCH64_CORE("olympus", olympus, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE, FAMINMAX, FP8DOT2, LUT, SVE2_AES, SVE2_SHA3, SVE2_SM4), neoversev3, 0x4e, 0x10, -1)
+AARCH64_CORE("olympus", olympus, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE, FAMINMAX, FP8FMA, FP8DOT2, FP8DOT4, LUT, SVE2_AES, SVE2_SHA3, SVE2_SM4), neoversev3, 0x4e, 0x10, -1)
/* Generic Architecture Processors. */
AARCH64_CORE("generic", generic, cortexa53, V8A, (), generic, 0x0, 0x0, -1)
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 8f44aea..1ca86c9 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1260,6 +1260,7 @@ void aarch64_restore_za (rtx);
void aarch64_expand_crc_using_pmull (scalar_mode, scalar_mode, rtx *);
void aarch64_expand_reversed_crc_using_pmull (scalar_mode, scalar_mode, rtx *);
+void aarch64_expand_fp_spaceship (rtx, rtx, rtx, rtx);
extern bool aarch64_gcs_enabled ();
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 433ec97..38c112c 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -31294,6 +31294,79 @@ aarch64_expand_reversed_crc_using_pmull (scalar_mode crc_mode,
}
}
+/* Expand the spaceship optab for floating-point operands.
+
+ If the result is compared against (-1, 0, 1 , 2), expand into
+ fcmpe + conditional branch insns.
+
+ Otherwise (the result is just stored as an integer), expand into
+ fcmpe + a sequence of conditional select/increment/invert insns. */
+void
+aarch64_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx hint)
+{
+ rtx cc_reg = gen_rtx_REG (CCFPEmode, CC_REGNUM);
+ emit_set_insn (cc_reg, gen_rtx_COMPARE (CCFPEmode, op0, op1));
+
+ rtx cc_gt = gen_rtx_GT (VOIDmode, cc_reg, const0_rtx);
+ rtx cc_lt = gen_rtx_LT (VOIDmode, cc_reg, const0_rtx);
+ rtx cc_un = gen_rtx_UNORDERED (VOIDmode, cc_reg, const0_rtx);
+
+ if (hint == const0_rtx)
+ {
+ rtx un_label = gen_label_rtx ();
+ rtx lt_label = gen_label_rtx ();
+ rtx gt_label = gen_label_rtx ();
+ rtx end_label = gen_label_rtx ();
+
+ rtx temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_un,
+ gen_rtx_LABEL_REF (Pmode, un_label), pc_rtx);
+ aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, temp));
+
+ temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_lt,
+ gen_rtx_LABEL_REF (Pmode, lt_label), pc_rtx);
+ emit_jump_insn (gen_rtx_SET (pc_rtx, temp));
+
+ temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_gt,
+ gen_rtx_LABEL_REF (Pmode, gt_label), pc_rtx);
+ emit_jump_insn (gen_rtx_SET (pc_rtx, temp));
+
+ /* Equality. */
+ emit_move_insn (dest, const0_rtx);
+ emit_jump (end_label);
+
+ emit_label (un_label);
+ emit_move_insn (dest, const2_rtx);
+ emit_jump (end_label);
+
+ emit_label (gt_label);
+ emit_move_insn (dest, const1_rtx);
+ emit_jump (end_label);
+
+ emit_label (lt_label);
+ emit_move_insn (dest, constm1_rtx);
+
+ emit_label (end_label);
+ }
+ else
+ {
+ rtx temp0 = gen_reg_rtx (SImode);
+ rtx temp1 = gen_reg_rtx (SImode);
+ rtx cc_ungt = gen_rtx_UNGT (VOIDmode, cc_reg, const0_rtx);
+
+ /* The value of hint is stored if the operands are unordered. */
+ rtx temp_un = gen_int_mode (UINTVAL (hint) - 1, SImode);
+ if (!aarch64_reg_zero_or_m1_or_1 (temp_un, SImode))
+ temp_un = force_reg (SImode, temp_un);
+
+ emit_set_insn (temp0, gen_rtx_IF_THEN_ELSE (SImode, cc_lt,
+ constm1_rtx, const0_rtx));
+ emit_set_insn (temp1, gen_rtx_IF_THEN_ELSE (SImode, cc_un,
+ temp_un, const0_rtx));
+ emit_set_insn (dest, gen_rtx_IF_THEN_ELSE (SImode, cc_ungt,
+ gen_rtx_PLUS (SImode, temp1, const1_rtx), temp0));
+ }
+}
+
/* Target-specific selftests. */
#if CHECKING_P
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 031e621..c678f7a 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -707,11 +707,12 @@
)
(define_expand "cbranch<mode>4"
- [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
- [(match_operand:GPF 1 "register_operand")
- (match_operand:GPF 2 "aarch64_fp_compare_operand")])
- (label_ref (match_operand 3 "" ""))
- (pc)))]
+ [(set (pc) (if_then_else
+ (match_operator 0 "aarch64_comparison_operator"
+ [(match_operand:GPF_F16 1 "register_operand")
+ (match_operand:GPF_F16 2 "aarch64_fp_compare_operand")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))]
""
"
operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
@@ -4337,26 +4338,28 @@
(define_insn "fcmp<mode>"
[(set (reg:CCFP CC_REGNUM)
- (compare:CCFP (match_operand:GPF 0 "register_operand")
- (match_operand:GPF 1 "aarch64_fp_compare_operand")))]
+ (compare:CCFP
+ (match_operand:GPF_F16 0 "register_operand")
+ (match_operand:GPF_F16 1 "aarch64_fp_compare_operand")))]
"TARGET_FLOAT"
{@ [ cons: 0 , 1 ]
[ w , Y ] fcmp\t%<s>0, #0.0
[ w , w ] fcmp\t%<s>0, %<s>1
}
- [(set_attr "type" "fcmp<s>")]
+ [(set_attr "type" "fcmp<stype>")]
)
(define_insn "fcmpe<mode>"
[(set (reg:CCFPE CC_REGNUM)
- (compare:CCFPE (match_operand:GPF 0 "register_operand")
- (match_operand:GPF 1 "aarch64_fp_compare_operand")))]
+ (compare:CCFPE
+ (match_operand:GPF_F16 0 "register_operand")
+ (match_operand:GPF_F16 1 "aarch64_fp_compare_operand")))]
"TARGET_FLOAT"
{@ [ cons: 0 , 1 ]
[ w , Y ] fcmpe\t%<s>0, #0.0
[ w , w ] fcmpe\t%<s>0, %<s>1
}
- [(set_attr "type" "fcmp<s>")]
+ [(set_attr "type" "fcmp<stype>")]
)
(define_insn "*cmp_swp_<shift>_reg<mode>"
@@ -4392,6 +4395,49 @@
[(set_attr "type" "alus_ext")]
)
+;; <=> operator pattern (integer)
+;; (a == b) ? 0 : (a < b) ? -1 : 1.
+(define_expand "spaceship<mode>4"
+ [(match_operand:SI 0 "register_operand")
+ (match_operand:GPI 1 "register_operand")
+ (match_operand:GPI 2 "register_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ ""
+ {
+ // 1 indicates unsigned comparison, -1 indicates signed.
+ gcc_assert (operands[3] == constm1_rtx || operands[3] == const1_rtx);
+
+ rtx cc_reg = aarch64_gen_compare_reg (EQ, operands[1], operands[2]);
+ RTX_CODE code_gt = operands[3] == const1_rtx ? GTU : GT;
+ RTX_CODE code_lt = operands[3] == const1_rtx ? LTU : LT;
+
+ rtx cc_gt = gen_rtx_fmt_ee (code_gt, VOIDmode, cc_reg, const0_rtx);
+ rtx cc_lt = gen_rtx_fmt_ee (code_lt, VOIDmode, cc_reg, const0_rtx);
+
+ rtx temp = gen_reg_rtx (SImode);
+ emit_insn (gen_rtx_SET (temp, gen_rtx_IF_THEN_ELSE (SImode, cc_gt,
+ const1_rtx, const0_rtx)));
+ emit_insn (gen_rtx_SET (operands[0], gen_rtx_IF_THEN_ELSE (SImode, cc_lt,
+ constm1_rtx, temp)));
+ DONE;
+ }
+)
+
+;; <=> operator pattern (floating-point)
+;; (a == b) ? 0 : (a < b) ? -1 : (a > b) ? 1 : UNORDERED.
+(define_expand "spaceship<mode>4"
+ [(match_operand:SI 0 "register_operand")
+ (match_operand:GPF 1 "register_operand")
+ (match_operand:GPF 2 "register_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ "TARGET_FLOAT"
+ {
+ aarch64_expand_fp_spaceship (operands[0], operands[1], operands[2],
+ operands[3]);
+ DONE;
+ }
+)
+
;; -------------------------------------------------------------------
;; Store-flag and conditional select insns
;; -------------------------------------------------------------------
@@ -4424,8 +4470,8 @@
(define_expand "cstore<mode>4"
[(set (match_operand:SI 0 "register_operand")
(match_operator:SI 1 "aarch64_comparison_operator_mode"
- [(match_operand:GPF 2 "register_operand")
- (match_operand:GPF 3 "aarch64_fp_compare_operand")]))]
+ [(match_operand:GPF_F16 2 "register_operand")
+ (match_operand:GPF_F16 3 "aarch64_fp_compare_operand")]))]
""
"
operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index cdfd94d..a314800 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -4138,6 +4138,10 @@ ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
return false;
mode = GET_MODE (dest);
+ if (immediate_operand (if_false, mode))
+ if_false = force_reg (mode, if_false);
+ if (immediate_operand (if_true, mode))
+ if_true = force_reg (mode, if_true);
/* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
but MODE may be a vector mode and thus not appropriate. */
@@ -4687,6 +4691,8 @@ ix86_expand_fp_movcc (rtx operands[])
compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
}
+ operands[2] = force_reg (mode, operands[2]);
+ operands[3] = force_reg (mode, operands[3]);
emit_insn (gen_rtx_SET (operands[0],
gen_rtx_IF_THEN_ELSE (mode, compare_op,
operands[2], operands[3])));
@@ -19256,8 +19262,6 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
e1 = gen_reg_rtx (mode);
x1 = gen_reg_rtx (mode);
- /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
-
b = force_reg (mode, b);
/* x0 = rcp(b) estimate */
@@ -19270,20 +19274,42 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
UNSPEC_RCP)));
- /* e0 = x0 * b */
- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
+ unsigned vector_size = GET_MODE_SIZE (mode);
+
+ /* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a
+ N-R step with 2 fma implementation. */
+ if (TARGET_FMA
+ || (TARGET_AVX512F && vector_size == 64)
+ || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
+ {
+ /* e0 = x0 * a */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
+ /* e1 = e0 * b - a */
+ emit_insn (gen_rtx_SET (e1, gen_rtx_FMA (mode, e0, b,
+ gen_rtx_NEG (mode, a))));
+ /* res = - e1 * x0 + e0 */
+ emit_insn (gen_rtx_SET (res, gen_rtx_FMA (mode,
+ gen_rtx_NEG (mode, e1),
+ x0, e0)));
+ }
+ else
+ /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
+ {
+ /* e0 = x0 * b */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
- /* e0 = x0 * e0 */
- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
+ /* e1 = x0 + x0 */
+ emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
- /* e1 = x0 + x0 */
- emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
+ /* e0 = x0 * e0 */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
- /* x1 = e1 - e0 */
- emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
+ /* x1 = e1 - e0 */
+ emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
- /* res = a * x1 */
- emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+ /* res = a * x1 */
+ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+ }
}
/* Output code to perform a Newton-Rhapson approximation of a
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 28603c2..aef4145 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -25257,32 +25257,6 @@ ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
return new ix86_vector_costs (vinfo, costing_for_scalar);
}
-/* Return cost of statement doing FP conversion. */
-
-static unsigned
-fp_conversion_stmt_cost (machine_mode mode, gimple *stmt, bool scalar_p)
-{
- int outer_size
- = tree_to_uhwi
- (TYPE_SIZE
- (TREE_TYPE (gimple_assign_lhs (stmt))));
- int inner_size
- = tree_to_uhwi
- (TYPE_SIZE
- (TREE_TYPE (gimple_assign_rhs1 (stmt))));
- int stmt_cost = vec_fp_conversion_cost
- (ix86_tune_cost, GET_MODE_BITSIZE (mode));
- /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will end
- up doing two conversions and packing them. */
- if (!scalar_p && inner_size > outer_size)
- {
- int n = inner_size / outer_size;
- stmt_cost = stmt_cost * n
- + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op);
- }
- return stmt_cost;
-}
-
unsigned
ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
stmt_vec_info stmt_info, slp_tree node,
@@ -25326,7 +25300,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
else if (X87_FLOAT_MODE_P (mode))
stmt_cost = ix86_cost->fadd;
else
- stmt_cost = ix86_cost->add;
+ stmt_cost = ix86_cost->add;
}
else
stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
@@ -25381,7 +25355,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
(subcode == RSHIFT_EXPR
&& !TYPE_UNSIGNED (TREE_TYPE (op1)))
? ASHIFTRT : LSHIFTRT, mode,
- TREE_CODE (op2) == INTEGER_CST,
+ TREE_CODE (op2) == INTEGER_CST,
cst_and_fits_in_hwi (op2)
? int_cst_value (op2) : -1,
false, false, NULL, NULL);
@@ -25390,25 +25364,102 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
case NOP_EXPR:
/* Only sign-conversions are free. */
if (tree_nop_conversion_p
- (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
+ (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
stmt_cost = 0;
else if (fp)
- stmt_cost = fp_conversion_stmt_cost (mode, stmt_info->stmt,
- scalar_p);
+ stmt_cost = vec_fp_conversion_cost
+ (ix86_tune_cost, GET_MODE_BITSIZE (mode));
+ break;
+
+ case COND_EXPR:
+ {
+ /* SSE2 conditinal move sequence is:
+ pcmpgtd %xmm5, %xmm0
+ pand %xmm0, %xmm2
+ pandn %xmm1, %xmm0
+ por %xmm2, %xmm0
+ while SSE4 uses cmp + blend
+ and AVX512 masked moves. */
+
+ int ninsns = TARGET_SSE4_1 ? 2 : 4;
+
+ if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ stmt_cost = ninsns * ix86_cost->sse_op;
+ else if (X87_FLOAT_MODE_P (mode))
+ /* x87 requires conditional branch. We don't have cost for
+ that. */
+ ;
+ else if (VECTOR_MODE_P (mode))
+ stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op);
+ else
+ /* compare + cmov. */
+ stmt_cost = ix86_cost->add * 2;
+ }
break;
- case BIT_IOR_EXPR:
- case ABS_EXPR:
- case ABSU_EXPR:
case MIN_EXPR:
case MAX_EXPR:
+ if (fp)
+ {
+ if (X87_FLOAT_MODE_P (mode))
+ /* x87 requires conditional branch. We don't have cost for
+ that. */
+ ;
+ else
+ /* minss */
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ }
+ else
+ {
+ if (VECTOR_MODE_P (mode))
+ {
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ /* vpmin was introduced in SSE3.
+ SSE2 needs pcmpgtd + pand + pandn + pxor. */
+ if (!TARGET_SSSE3)
+ stmt_cost *= 4;
+ }
+ else
+ /* cmp + cmov. */
+ stmt_cost = ix86_cost->add * 2;
+ }
+ break;
+
+ case ABS_EXPR:
+ case ABSU_EXPR:
+ if (fp)
+ {
+ if (X87_FLOAT_MODE_P (mode))
+ /* fabs. */
+ stmt_cost = ix86_cost->fabs;
+ else
+ /* andss of sign bit. */
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ }
+ else
+ {
+ if (VECTOR_MODE_P (mode))
+ {
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ /* vabs was introduced in SSE3.
+ SSE3 uses psrat + pxor + psub. */
+ if (!TARGET_SSSE3)
+ stmt_cost *= 3;
+ }
+ else
+ /* neg + cmov. */
+ stmt_cost = ix86_cost->add * 2;
+ }
+ break;
+
+ case BIT_IOR_EXPR:
case BIT_XOR_EXPR:
case BIT_AND_EXPR:
case BIT_NOT_EXPR:
- if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
- stmt_cost = ix86_cost->sse_op;
- else if (VECTOR_MODE_P (mode))
+ gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)
+ && !X87_FLOAT_MODE_P (mode));
+ if (VECTOR_MODE_P (mode))
stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
else
stmt_cost = ix86_cost->add;
@@ -25439,7 +25490,26 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
if (kind == vec_promote_demote
&& fp && FLOAT_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
- stmt_cost = fp_conversion_stmt_cost (mode, stmt_info->stmt, scalar_p);
+ {
+ int outer_size
+ = tree_to_uhwi
+ (TYPE_SIZE
+ (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
+ int inner_size
+ = tree_to_uhwi
+ (TYPE_SIZE
+ (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
+ int stmt_cost = vec_fp_conversion_cost
+ (ix86_tune_cost, GET_MODE_BITSIZE (mode));
+ /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will end
+ up doing two conversions and packing them. */
+ if (inner_size > outer_size)
+ {
+ int n = inner_size / outer_size;
+ stmt_cost = stmt_cost * n
+ + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op);
+ }
+ }
/* If we do elementwise loads into a vector then we are bound by
latency and execution resources for the many scalar loads
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d6b2f29..e170da3 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -26592,8 +26592,8 @@
[(set (match_operand:X87MODEF 0 "register_operand")
(if_then_else:X87MODEF
(match_operand 1 "comparison_operator")
- (match_operand:X87MODEF 2 "register_operand")
- (match_operand:X87MODEF 3 "register_operand")))]
+ (match_operand:X87MODEF 2 "nonimm_or_0_or_1s_operand")
+ (match_operand:X87MODEF 3 "nonimm_or_0_operand")))]
"(TARGET_80387 && TARGET_CMOVE)
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
"if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 3d3848c..4b23e18 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1267,6 +1267,14 @@
(match_operand 0 "vector_memory_operand")
(match_code "const_vector")))
+; Return true when OP is register_operand, vector_memory_operand,
+; const_vector zero or const_vector all ones.
+(define_predicate "vector_or_0_or_1s_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_operand 0 "vector_memory_operand")
+ (match_operand 0 "const0_operand")
+ (match_operand 0 "int_float_vector_all_ones_operand")))
+
(define_predicate "bcst_mem_operand"
(and (match_code "vec_duplicate")
(and (match_test "TARGET_AVX512F")
@@ -1333,6 +1341,12 @@
(ior (match_operand 0 "nonimmediate_operand")
(match_operand 0 "const0_operand")))
+; Return true when OP is a nonimmediate or zero or all ones.
+(define_predicate "nonimm_or_0_or_1s_operand"
+ (ior (match_operand 0 "nonimmediate_operand")
+ (match_operand 0 "const0_operand")
+ (match_operand 0 "int_float_vector_all_ones_operand")))
+
;; Return true for RTX codes that force SImode address.
(define_predicate "SImode_address_operand"
(match_code "subreg,zero_extend,and"))
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b280676..20b35a1 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5142,7 +5142,7 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VI_256_AVX2 0 "register_operand")
(vec_merge:VI_256_AVX2
- (match_operand:VI_256_AVX2 1 "nonimmediate_operand")
+ (match_operand:VI_256_AVX2 1 "nonimm_or_0_or_1s_operand")
(match_operand:VI_256_AVX2 2 "nonimm_or_0_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_AVX"
@@ -5155,7 +5155,7 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VI_128 0 "register_operand")
(vec_merge:VI_128
- (match_operand:VI_128 1 "vector_operand")
+ (match_operand:VI_128 1 "vector_or_0_or_1s_operand")
(match_operand:VI_128 2 "nonimm_or_0_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_SSE2"
@@ -5168,7 +5168,7 @@
(define_expand "vcond_mask_v1tiv1ti"
[(set (match_operand:V1TI 0 "register_operand")
(vec_merge:V1TI
- (match_operand:V1TI 1 "vector_operand")
+ (match_operand:V1TI 1 "vector_or_0_or_1s_operand")
(match_operand:V1TI 2 "nonimm_or_0_operand")
(match_operand:V1TI 3 "register_operand")))]
"TARGET_SSE2"
@@ -5181,7 +5181,7 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VF_256 0 "register_operand")
(vec_merge:VF_256
- (match_operand:VF_256 1 "nonimmediate_operand")
+ (match_operand:VF_256 1 "nonimm_or_0_or_1s_operand")
(match_operand:VF_256 2 "nonimm_or_0_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_AVX"
@@ -5194,7 +5194,7 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VF_128 0 "register_operand")
(vec_merge:VF_128
- (match_operand:VF_128 1 "vector_operand")
+ (match_operand:VF_128 1 "vector_or_0_or_1s_operand")
(match_operand:VF_128 2 "nonimm_or_0_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_SSE"
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 24a28dc..0d3d026 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -20678,6 +20678,9 @@ mips_option_override (void)
"-mcompact-branches=never");
}
+ if (is_micromips && TARGET_MSA)
+ error ("unsupported combination: %s", "-mmicromips -mmsa");
+
/* Require explicit relocs for MIPS R6 onwards. This enables simplification
of the compact branch and jump support through the backend. */
if (!TARGET_EXPLICIT_RELOCS && mips_isa_rev >= 6)
diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def
index 2918496..e31afc3 100644
--- a/gcc/config/riscv/riscv-cores.def
+++ b/gcc/config/riscv/riscv-cores.def
@@ -41,6 +41,12 @@ RISCV_TUNE("sifive-p400-series", sifive_p400, sifive_p400_tune_info)
RISCV_TUNE("sifive-p600-series", sifive_p600, sifive_p600_tune_info)
RISCV_TUNE("tt-ascalon-d8", generic_ooo, tt_ascalon_d8_tune_info)
RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
+RISCV_TUNE("xt-c908", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c908v", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c910", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c910v2", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c920", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c920v2", generic, generic_ooo_tune_info)
RISCV_TUNE("xiangshan-nanhu", xiangshan, xiangshan_nanhu_tune_info)
RISCV_TUNE("generic-ooo", generic_ooo, generic_ooo_tune_info)
RISCV_TUNE("size", generic, optimize_size_tune_info)
@@ -93,6 +99,48 @@ RISCV_CORE("thead-c906", "rv64imafdc_xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
"xtheadmemidx_xtheadmempair_xtheadsync",
"thead-c906")
+RISCV_CORE("xt-c908", "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicsr_"
+ "zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_"
+ "sstc_svinval_svnapot_svpbmt_xtheadba_xtheadbb_"
+ "xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_"
+ "xtheadmac_xtheadmemidx_xtheadmempair_xtheadsync",
+ "xt-c908")
+RISCV_CORE("xt-c908v", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicsr_"
+ "zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_"
+ "zvfh_sstc_svinval_svnapot_svpbmt__xtheadba_"
+ "xtheadbb_xtheadbs_xtheadcmo_xtheadcondmov_"
+ "xtheadfmemidx_xtheadmac_xtheadmemidx_"
+ "xtheadmempair_xtheadsync_xtheadvdot",
+ "xt-c908")
+RISCV_CORE("xt-c910", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_"
+ "xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
+ "xtheadcondmov_xtheadfmemidx_xtheadmac_"
+ "xtheadmemidx_xtheadmempair_xtheadsync",
+ "xt-c910")
+RISCV_CORE("xt-c910v2", "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicond_"
+ "zicsr_zifencei _zihintntl_zihintpause_zihpm_"
+ "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_"
+ "zbs_sscofpmf_sstc_svinval_svnapot_svpbmt_"
+ "xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
+ "xtheadcondmov_xtheadfmemidx_xtheadmac_"
+ "xtheadmemidx_xtheadmempair_xtheadsync",
+ "xt-c910v2")
+RISCV_CORE("xt-c920", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_"
+ "xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
+ "xtheadcondmov_xtheadfmemidx_xtheadmac_"
+ "xtheadmemidx_xtheadmempair_xtheadsync_"
+ "xtheadvector",
+ "xt-c910")
+RISCV_CORE("xt-c920v2", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_"
+ "zicsr_zifencei _zihintntl_zihintpause_zihpm_"
+ "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_"
+ "zbs_zvfbfmin_zvfbfwma_zvfh_sscofpmf_sstc_"
+ "svinval_svnapot_svpbmt_xtheadba_xtheadbb_"
+ "xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_"
+ "xtheadmac_xtheadmemidx_xtheadmempair_"
+ "xtheadsync_xtheadvdot",
+ "xt-c920v2")
+
RISCV_CORE("tt-ascalon-d8", "rv64imafdcv_zic64b_zicbom_zicbop_zicboz_"
"ziccamoa_ziccif_zicclsm_ziccrse_zicond_zicsr_"
"zifencei_zihintntl_zihintpause_zimop_za64rs_"
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 51eb64f..3ab4d76 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -2136,18 +2136,34 @@
(match_operand 7 "const_int_operand")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
- (vec_duplicate:V_VLS
- (match_operand:<VEL> 3 "direct_broadcast_operand"))
+ ;; (vec_duplicate:V_VLS ;; wrapper activated by wrap_vec_dup below.
+ (match_operand:<VEL> 3 "direct_broadcast_operand") ;; )
(match_operand:V_VLS 2 "vector_merge_operand")))]
"TARGET_VECTOR"
{
/* Transform vmv.v.x/vfmv.v.f (avl = 1) into vmv.s.x since vmv.s.x/vfmv.s.f
has better chances to do vsetvl fusion in vsetvl pass. */
+ bool wrap_vec_dup = true;
+ rtx vec_cst = NULL_RTX;
if (riscv_vector::splat_to_scalar_move_p (operands))
{
operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode);
operands[3] = force_reg (<VEL>mode, operands[3]);
}
+ else if (immediate_operand (operands[3], <VEL>mode)
+ && (vec_cst = gen_const_vec_duplicate (<MODE>mode, operands[3]))
+ && (/* -> pred_broadcast<mode>_zero */
+ (vector_least_significant_set_mask_operand (operands[1],
+ <VM>mode)
+ && vector_const_0_operand (vec_cst, <MODE>mode))
+ || (/* pred_broadcast<mode>_imm */
+ vector_all_trues_mask_operand (operands[1], <VM>mode)
+ && vector_const_int_or_double_0_operand (vec_cst,
+ <MODE>mode))))
+ {
+ operands[3] = vec_cst;
+ wrap_vec_dup = false;
+ }
/* Handle vmv.s.x instruction (Wb1 mask) which has memory scalar. */
else if (satisfies_constraint_Wdm (operands[3]))
{
@@ -2191,6 +2207,8 @@
;
else
operands[3] = force_reg (<VEL>mode, operands[3]);
+ if (wrap_vec_dup)
+ operands[3] = gen_rtx_VEC_DUPLICATE (<MODE>mode, operands[3]);
})
(define_insn_and_split "*pred_broadcast<mode>"
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 737c3d6..12dbde2 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -25765,10 +25765,13 @@ rs6000_can_inline_p (tree caller, tree callee)
}
}
- /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
- purposes. */
- callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
- explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
+ /* Ignore -mpower8-fusion, -mpower10-fusion and -msave-toc-indirect options
+ for inlining purposes. */
+ HOST_WIDE_INT ignored_isas = (OPTION_MASK_P8_FUSION
+ | OPTION_MASK_P10_FUSION
+ | OPTION_MASK_SAVE_TOC_INDIRECT);
+ callee_isa &= ~ignored_isas;
+ explicit_isa &= ~ignored_isas;
/* The callee's options must be a subset of the caller's options, i.e.
a vsx function may inline an altivec function, but a no-vsx function