aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-simd.md180
-rw-r--r--gcc/config/aarch64/aarch64-sve.md97
-rw-r--r--gcc/config/aarch64/aarch64.opt.urls3
-rw-r--r--gcc/config/aarch64/iterators.md3
-rw-r--r--gcc/config/gnu.h4
-rw-r--r--gcc/config/i386/driver-i386.cc24
-rw-r--r--gcc/config/i386/i386-c.cc7
-rw-r--r--gcc/config/i386/i386-expand.cc6
-rw-r--r--gcc/config/i386/i386-features.cc141
-rw-r--r--gcc/config/i386/i386-jit.cc12
-rw-r--r--gcc/config/i386/i386-options.cc4
-rw-r--r--gcc/config/i386/i386.h2
-rw-r--r--gcc/config/i386/sse.md27
-rw-r--r--gcc/config/rs6000/vxworks.h16
-rw-r--r--gcc/config/vxworks.h27
15 files changed, 526 insertions, 27 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index eaa8d57..a121a18 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3469,6 +3469,186 @@
DONE;
})
+;; AND tree reductions.
+;; Check if after a min pairwise reduction that all the lanes are 1.
+;;
+;; uminp v1.4s, v1.4s, v1.4s
+;; fmov x1, d1
+;; cmn x1, #1
+;; cset w0, eq
+;;
+;; or with SVE enabled
+;;
+;; ptrue p1.b, vl16
+;; cmpeq p0.b, p1/z, z1.b, #0
+;; cset w0, none
+;;
+(define_expand "reduc_sbool_and_scal_<mode>"
+ [(set (match_operand:QI 0 "register_operand")
+ (unspec:QI [(match_operand:VALLI 1 "register_operand")]
+ UNSPEC_ANDV))]
+ "TARGET_SIMD"
+{
+ if (TARGET_SVE)
+ {
+ machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require ();
+ rtx in = force_lowpart_subreg (full_mode, operands[1], <MODE>mode);
+ unsigned lanes
+ = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant ();
+ machine_mode pred_mode = aarch64_sve_pred_mode (full_mode);
+ rtx pred_res = gen_reg_rtx (pred_mode);
+ rtx gp = aarch64_ptrue_reg (VNx16BImode, lanes);
+ rtx cast_gp = lowpart_subreg (pred_mode, gp, VNx16BImode);
+ rtx gp_flag = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode);
+ emit_insn (
+ gen_aarch64_pred_cmp_ptest (EQ, full_mode, pred_res, gp, in,
+ CONST0_RTX (full_mode), cast_gp,
+ gp_flag, cast_gp, gp_flag));
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
+ rtx tmp2 = gen_reg_rtx (SImode);
+ emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg));
+ emit_move_insn (operands[0], gen_lowpart (QImode, tmp2));
+ DONE;
+ }
+
+ rtx tmp = operands[1];
+ /* 128-bit vectors need to be compressed to 64-bits first. */
+ if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode)))
+ {
+ /* Always reduce using a V4SI. */
+ rtx reduc = gen_lowpart (V4SImode, tmp);
+ rtx res = gen_reg_rtx (V4SImode);
+ emit_insn (gen_aarch64_uminpv4si (res, reduc, reduc));
+ emit_move_insn (tmp, gen_lowpart (<MODE>mode, res));
+ }
+ rtx val = gen_reg_rtx (DImode);
+ emit_move_insn (val, gen_lowpart (DImode, tmp));
+ rtx cc_reg = aarch64_gen_compare_reg (EQ, val, constm1_rtx);
+ rtx cmp = gen_rtx_fmt_ee (EQ, SImode, cc_reg, constm1_rtx);
+ rtx tmp2 = gen_reg_rtx (SImode);
+ emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg));
+ emit_move_insn (operands[0], gen_lowpart (QImode, tmp2));
+ DONE;
+})
+
+;; IOR tree reductions.
+;; Check that after a MAX pairwise reduction any lane is not 0
+;;
+;; umaxp v1.4s, v1.4s, v1.4s
+;; fmov x1, d1
+;; cmp x1, 0
+;; cset w0, ne
+;;
+;; or with SVE enabled
+;;
+;; ptrue p1.b, vl16
+;; cmpne p0.b, p1/z, z1.b, #0
+;; cset w0, any
+;;
+(define_expand "reduc_sbool_ior_scal_<mode>"
+ [(set (match_operand:QI 0 "register_operand")
+ (unspec:QI [(match_operand:VALLI 1 "register_operand")]
+ UNSPEC_IORV))]
+ "TARGET_SIMD"
+{
+ if (TARGET_SVE)
+ {
+ machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require ();
+ rtx in = force_lowpart_subreg (full_mode, operands[1], <MODE>mode);
+ unsigned lanes
+ = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant ();
+ machine_mode pred_mode = aarch64_sve_pred_mode (full_mode);
+ rtx pred_res = gen_reg_rtx (pred_mode);
+ rtx gp = aarch64_ptrue_reg (VNx16BImode, lanes);
+ rtx cast_gp = lowpart_subreg (pred_mode, gp, VNx16BImode);
+ rtx gp_flag = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode);
+ emit_insn (
+ gen_aarch64_pred_cmp_ptest (NE, full_mode, pred_res, gp, in,
+ CONST0_RTX (full_mode), cast_gp,
+ gp_flag, cast_gp, gp_flag));
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx);
+ rtx tmp2 = gen_reg_rtx (SImode);
+ emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg));
+ emit_move_insn (operands[0], gen_lowpart (QImode, tmp2));
+ DONE;
+ }
+
+ rtx tmp = operands[1];
+ /* 128-bit vectors need to be compressed to 64-bits first. */
+ if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode)))
+ {
+ /* Always reduce using a V4SI. */
+ rtx reduc = gen_lowpart (V4SImode, tmp);
+ rtx res = gen_reg_rtx (V4SImode);
+ emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc));
+ emit_move_insn (tmp, gen_lowpart (<MODE>mode, res));
+ }
+ rtx val = gen_reg_rtx (DImode);
+ emit_move_insn (val, gen_lowpart (DImode, tmp));
+ rtx cc_reg = aarch64_gen_compare_reg (NE, val, const0_rtx);
+ rtx cmp = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx);
+ rtx tmp2 = gen_reg_rtx (SImode);
+ emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg));
+ emit_move_insn (operands[0], gen_lowpart (QImode, tmp2));
+ DONE;
+})
+
+;; Unpredicated predicate XOR tree reductions.
+;; Check to see if the number of active lanes in the predicates is a multiple
+;; of 2. We use a normal reduction after masking with 0x1.
+;;
+;; movi v1.16b, 0x1
+;; and v2.16b, v2.16b, v2.16b
+;; addv b3, v2.16b
+;; fmov w1, s3
+;; and w0, w1, 1
+;;
+;; or with SVE enabled
+;;
+;; ptrue p1.b, vl16
+;; cmpne p0.b, p1/z, z1+.b, #0
+;; cntp x1, p0, p0.b
+;; and w0, w1, 1
+;;
+(define_expand "reduc_sbool_xor_scal_<mode>"
+ [(set (match_operand:QI 0 "register_operand")
+ (unspec:QI [(match_operand:VALLI 1 "register_operand")]
+ UNSPEC_XORV))]
+ "TARGET_SIMD"
+{
+ if (TARGET_SVE)
+ {
+ machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require ();
+ rtx in = force_lowpart_subreg (full_mode, operands[1], <MODE>mode);
+ unsigned lanes
+ = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant ();
+ machine_mode pred_mode = aarch64_sve_pred_mode (full_mode);
+ rtx pred_res = gen_reg_rtx (pred_mode);
+ rtx gp = aarch64_ptrue_reg (VNx16BImode, lanes);
+ rtx cast_gp = lowpart_subreg (pred_mode, gp, VNx16BImode);
+ rtx gp_flag = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode);
+ emit_insn (
+ gen_aarch64_pred_cmp (NE, full_mode, pred_res, cast_gp, gp_flag, in,
+ CONST0_RTX (full_mode)));
+ emit_insn (gen_reduc_sbool_xor_scal (pred_mode, operands[0], pred_res));
+ DONE;
+ }
+
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ rtx one_reg = force_reg (<MODE>mode, CONST1_RTX (<MODE>mode));
+ emit_move_insn (tmp, gen_rtx_AND (<MODE>mode, operands[1], one_reg));
+ rtx tmp2 = gen_reg_rtx (<VEL>mode);
+ emit_insn (gen_reduc_plus_scal_<mode> (tmp2, tmp));
+ rtx tmp3 = gen_reg_rtx (DImode);
+ emit_move_insn (tmp3, gen_rtx_AND (DImode,
+ lowpart_subreg (DImode, tmp2, <VEL>mode),
+ const1_rtx));
+ emit_move_insn (operands[0], gen_lowpart (QImode, tmp2));
+ DONE;
+})
+
;; SADDLV and UADDLV can be expressed as an ADDV instruction that first
;; sign or zero-extends its elements.
(define_insn "aarch64_<su>addlv<mode>"
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 550ff0a..f459f63 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -135,6 +135,7 @@
;; ---- [INT,FP] Conditional reductions
;; ---- [INT] Tree reductions
;; ---- [FP] Tree reductions
+;; ---- [Predicate] Tree reductions
;; ---- [FP] Left-to-right reductions
;;
;; == Permutes
@@ -8744,7 +8745,7 @@
;; Predicated integer comparisons in which only the flags result is
;; interesting.
-(define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
+(define_insn_and_rewrite "@aarch64_pred_cmp<cmp_op><mode>_ptest"
[(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC
[(match_operand:VNx16BI 1 "register_operand")
@@ -9888,6 +9889,100 @@
)
;; -------------------------------------------------------------------------
+;; ---- [Predicate] Tree reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - IORV
+;; - XORV
+;; - ANDV
+;; -------------------------------------------------------------------------
+
+;; Unpredicated predicate AND tree reductions.
+;; Invert the predicate and check across all lanes
+;; that the Zero flag is set.
+;;
+;; ptrue p3.b, all
+;; nots p3.b, p3/z, p0.b
+;; cset w0, none
+;;
+(define_expand "reduc_sbool_and_scal_<mode>"
+ [(set (match_operand:QI 0 "register_operand")
+ (unspec:QI [(match_operand:PRED_ALL 1 "register_operand")]
+ UNSPEC_ANDV))]
+ "TARGET_SVE"
+ {
+ rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
+ rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_aarch64_pred_one_cmpl_z (<MODE>mode, tmp, cast_ptrue,
+ operands[1]));
+ emit_insn (
+ gen_aarch64_ptest<mode> (ptrue, cast_ptrue,
+ gen_int_mode (SVE_KNOWN_PTRUE, SImode),
+ tmp));
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
+ rtx tmp2 = gen_reg_rtx (SImode);
+ emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg));
+ emit_move_insn (operands[0], gen_lowpart (QImode, tmp2));
+ DONE;
+ }
+)
+
+;; Unpredicated predicate IOR tree reductions.
+;; We need to make sure the results are in the CC flags, so execute a ptest
+;; on the same predicate.
+;;
+;; ptest p0, p0.b
+;; cset w0, any
+;;
+(define_expand "reduc_sbool_ior_scal_<mode>"
+ [(set (match_operand:QI 0 "register_operand")
+ (unspec:QI [(match_operand:PRED_ALL 1 "register_operand")]
+ UNSPEC_IORV))]
+ "TARGET_SVE"
+ {
+ rtx ptrue = lowpart_subreg (VNx16BImode, operands[1], <MODE>mode);
+ emit_insn (
+ gen_aarch64_ptest<mode> (ptrue, operands[1],
+ gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode),
+ operands[1]));
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx);
+ rtx tmp = gen_reg_rtx (SImode);
+ emit_insn (gen_aarch64_cstoresi (tmp, cmp, cc_reg));
+ emit_move_insn (operands[0], gen_lowpart (QImode, tmp));
+ DONE;
+ }
+)
+
+;; Unpredicated predicate XOR tree reductions.
+;; Check to see if the number of active lanes in the predicates is a multiple
+;; of 2. This generates:
+;;
+;; cntp x0, p0, p0.b
+;; and w0, w0, 1
+;;
+(define_expand "@reduc_sbool_xor_scal_<mode>"
+ [(set (match_dup 2)
+ (zero_extend:DI
+ (unspec:SI [(match_dup 1)
+ (const_int SVE_MAYBE_NOT_PTRUE)
+ (match_operand:PRED_ALL 1 "register_operand")]
+ UNSPEC_CNTP)))
+ (set (match_dup 4)
+ (and:DI (match_dup 2)
+ (const_int 1)))
+ (set (match_operand:QI 0 "register_operand")
+ (subreg:QI (match_dup 4) 0))]
+ "TARGET_SVE"
+ {
+ operands[2] = gen_reg_rtx (DImode);
+ operands[4] = gen_reg_rtx (DImode);
+ }
+)
+
+;; -------------------------------------------------------------------------
;; ---- [FP] Left-to-right reductions
;; -------------------------------------------------------------------------
;; Includes:
diff --git a/gcc/config/aarch64/aarch64.opt.urls b/gcc/config/aarch64/aarch64.opt.urls
index 7ec14a9..993e0fc 100644
--- a/gcc/config/aarch64/aarch64.opt.urls
+++ b/gcc/config/aarch64/aarch64.opt.urls
@@ -3,6 +3,9 @@
mbig-endian
UrlSuffix(gcc/AArch64-Options.html#index-mbig-endian)
+menable-sysreg-checking
+UrlSuffix(gcc/AArch64-Options.html#index-menable-sysreg-checking)
+
mgeneral-regs-only
UrlSuffix(gcc/AArch64-Options.html#index-mgeneral-regs-only)
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 3757998..517b280 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -217,6 +217,9 @@
;; All Advanced SIMD modes on which we support any arithmetic operations.
(define_mode_iterator VALL [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF V2DF])
+;; All Advanced SIMD integer modes
+(define_mode_iterator VALLI [VDQ_BHSI V2DI])
+
;; All Advanced SIMD modes suitable for moving, loading, and storing.
(define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
V4HF V8HF V4BF V8BF V2SF V4SF V2DF])
diff --git a/gcc/config/gnu.h b/gcc/config/gnu.h
index 6b8f36b..825e743 100644
--- a/gcc/config/gnu.h
+++ b/gcc/config/gnu.h
@@ -19,6 +19,10 @@ You should have received a copy of the GNU General Public License
along with GCC. If not, see <http://www.gnu.org/licenses/>.
*/
+/* C libraries used on GNU/Hurd. */
+#define OPTION_GLIBC_P(opts) (DEFAULT_LIBC == LIBC_GLIBC)
+#define OPTION_GLIBC OPTION_GLIBC_P (&global_options)
+
#undef GNU_USER_TARGET_OS_CPP_BUILTINS
#define GNU_USER_TARGET_OS_CPP_BUILTINS() \
do { \
diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
index fe71f55..0557df9 100644
--- a/gcc/config/i386/driver-i386.cc
+++ b/gcc/config/i386/driver-i386.cc
@@ -553,6 +553,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
processor = PROCESSOR_PENTIUM;
break;
case 6:
+ case 18:
case 19:
processor = PROCESSOR_PENTIUMPRO;
break;
@@ -639,18 +640,27 @@ const char *host_detect_local_cpu (int argc, const char **argv)
}
else if (has_feature (FEATURE_AVX))
{
- /* Assume Panther Lake. */
- if (has_feature (FEATURE_PREFETCHI))
- cpu = "pantherlake";
/* Assume Clearwater Forest. */
- else if (has_feature (FEATURE_USER_MSR))
+ if (has_feature (FEATURE_USER_MSR))
cpu = "clearwaterforest";
- /* Assume Arrow Lake S. */
+ /* Assume Nova Lake. */
+ else if (has_feature (FEATURE_PREFETCHI))
+ cpu = "novalake";
else if (has_feature (FEATURE_SM3))
- cpu = "arrowlake-s";
+ {
+ if (has_feature (FEATURE_KL))
+ /* Assume Arrow Lake S. */
+ cpu = "arrowlake-s";
+ else
+ /* Assume Panther Lake. */
+ cpu = "pantherlake";
+ }
/* Assume Sierra Forest. */
- else if (has_feature (FEATURE_AVXVNNIINT8))
+ else if (has_feature (FEATURE_CLDEMOTE))
cpu = "sierraforest";
+ /* Assume Arrow Lake. */
+ else if (has_feature (FEATURE_AVXVNNIINT8))
+ cpu = "arrowlake";
/* Assume Alder Lake. */
else if (has_feature (FEATURE_SERIALIZE))
cpu = "alderlake";
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index 0037465..2d92cee 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -295,6 +295,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__diamondrapids");
def_or_undef (parse_in, "__diamondrapids__");
break;
+ case PROCESSOR_NOVALAKE:
+ def_or_undef (parse_in, "__novalake");
+ def_or_undef (parse_in, "__novalake__");
+ break;
/* use PROCESSOR_max to not set/unset the arch macro. */
case PROCESSOR_max:
@@ -498,6 +502,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
case PROCESSOR_DIAMONDRAPIDS:
def_or_undef (parse_in, "__tune_diamondrapids__");
break;
+ case PROCESSOR_NOVALAKE:
+ def_or_undef (parse_in, "__tune_novalake__");
+ break;
case PROCESSOR_INTEL:
case PROCESSOR_GENERIC:
break;
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 5bcc35c..a1f1b26 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -9515,9 +9515,9 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
machine_mode move_mode = VOIDmode;
int unroll_factor = 1;
/* TODO: Once value ranges are available, fill in proper data. */
- unsigned HOST_WIDE_INT min_size = 0;
- unsigned HOST_WIDE_INT max_size = -1;
- unsigned HOST_WIDE_INT probable_max_size = -1;
+ unsigned HOST_WIDE_INT min_size = HOST_WIDE_INT_0U;
+ unsigned HOST_WIDE_INT max_size = HOST_WIDE_INT_M1U;
+ unsigned HOST_WIDE_INT probable_max_size = HOST_WIDE_INT_M1U;
bool misaligned_prologue_used = false;
addr_space_t dst_as, src_as = ADDR_SPACE_GENERIC;
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 9348f55..8e27784 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -449,6 +449,30 @@ scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref,
return true;
}
+/* Check whether X is a convertible *concatditi_? variant. X is known
+ to be any_or_plus:TI, i.e. PLUS:TI, IOR:TI or XOR:TI. */
+
+static bool
+timode_concatdi_p (rtx x)
+{
+ rtx op0 = XEXP (x, 0);
+ rtx op1 = XEXP (x, 1);
+
+ if (GET_CODE (op1) == ASHIFT)
+ std::swap (op0, op1);
+
+ return GET_CODE (op0) == ASHIFT
+ && GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
+ && GET_MODE (XEXP (XEXP (op0, 0), 0)) == DImode
+ && REG_P (XEXP (XEXP (op0, 0), 0))
+ && CONST_INT_P (XEXP (op0, 1))
+ && INTVAL (XEXP (op0, 1)) == 64
+ && GET_CODE (op1) == ZERO_EXTEND
+ && GET_MODE (XEXP (op1, 0)) == DImode
+ && REG_P (XEXP (op1, 0));
+}
+
+
/* Add instruction into a chain. Return true if OK, false if the search
was aborted. */
@@ -477,9 +501,26 @@ scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid,
if (!analyze_register_chain (candidates, ref, disallowed))
return false;
- /* The operand(s) of VEC_SELECT don't need to be converted/convertible. */
- if (def_set && GET_CODE (SET_SRC (def_set)) == VEC_SELECT)
- return true;
+ /* The operand(s) of VEC_SELECT, ZERO_EXTEND and similar ops don't need
+ to be converted/convertible. */
+ if (def_set)
+ switch (GET_CODE (SET_SRC (def_set)))
+ {
+ case VEC_SELECT:
+ return true;
+ case ZERO_EXTEND:
+ if (GET_MODE (XEXP (SET_SRC (def_set), 0)) == DImode)
+ return true;
+ break;
+ case PLUS:
+ case IOR:
+ case XOR:
+ if (smode == TImode && timode_concatdi_p (SET_SRC (def_set)))
+ return true;
+ break;
+ default:
+ break;
+ }
for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
if (!DF_REF_REG_MEM_P (ref))
@@ -1628,14 +1669,34 @@ timode_scalar_chain::compute_convert_gain ()
break;
case AND:
+ if (!MEM_P (dst))
+ igain = COSTS_N_INSNS (1);
+ if (CONST_SCALAR_INT_P (XEXP (src, 1)))
+ igain += timode_immed_const_gain (XEXP (src, 1), bb);
+ break;
+
case XOR:
case IOR:
+ if (timode_concatdi_p (src))
+ {
+ /* vmovq;vpinsrq (11 bytes). */
+ igain = speed_p ? -2 * ix86_cost->sse_to_integer
+ : -COSTS_N_BYTES (11);
+ break;
+ }
if (!MEM_P (dst))
igain = COSTS_N_INSNS (1);
if (CONST_SCALAR_INT_P (XEXP (src, 1)))
igain += timode_immed_const_gain (XEXP (src, 1), bb);
break;
+ case PLUS:
+ if (timode_concatdi_p (src))
+ /* vmovq;vpinsrq (11 bytes). */
+ igain = speed_p ? -2 * ix86_cost->sse_to_integer
+ : -COSTS_N_BYTES (11);
+ break;
+
case ASHIFT:
case LSHIFTRT:
/* See ix86_expand_v1ti_shift. */
@@ -1794,6 +1855,13 @@ timode_scalar_chain::compute_convert_gain ()
igain = !speed_p ? -COSTS_N_BYTES (6) : -COSTS_N_INSNS (1);
break;
+ case ZERO_EXTEND:
+ if (GET_MODE (XEXP (src, 0)) == DImode)
+ /* xor (2 bytes) vs. vmovq (5 bytes). */
+ igain = speed_p ? COSTS_N_INSNS (1) - ix86_cost->sse_to_integer
+ : -COSTS_N_BYTES (3);
+ break;
+
default:
break;
}
@@ -1858,6 +1926,28 @@ timode_scalar_chain::fix_debug_reg_uses (rtx reg)
}
}
+/* Convert SRC, a *concatditi3 pattern, into a vec_concatv2di instruction.
+ Insert this before INSN, and return the result as a V1TImode subreg. */
+
+static rtx
+timode_convert_concatdi (rtx src, rtx_insn *insn)
+{
+ rtx hi, lo;
+ rtx tmp = gen_reg_rtx (V2DImode);
+ if (GET_CODE (XEXP (src, 0)) == ASHIFT)
+ {
+ hi = XEXP (XEXP (XEXP (src, 0), 0), 0);
+ lo = XEXP (XEXP (src, 1), 0);
+ }
+ else
+ {
+ hi = XEXP (XEXP (XEXP (src, 1), 0), 0);
+ lo = XEXP (XEXP (src, 0), 0);
+ }
+ emit_insn_before (gen_vec_concatv2di (tmp, lo, hi), insn);
+ return gen_rtx_SUBREG (V1TImode, tmp, 0);
+}
+
/* Convert INSN from TImode to V1T1mode. */
void
@@ -1967,10 +2057,24 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)
PUT_MODE (src, V1TImode);
break;
}
- /* FALLTHRU */
+ convert_op (&XEXP (src, 0), insn);
+ convert_op (&XEXP (src, 1), insn);
+ PUT_MODE (src, V1TImode);
+ if (MEM_P (dst))
+ {
+ tmp = gen_reg_rtx (V1TImode);
+ emit_insn_before (gen_rtx_SET (tmp, src), insn);
+ src = tmp;
+ }
+ break;
case XOR:
case IOR:
+ if (timode_concatdi_p (src))
+ {
+ src = timode_convert_concatdi (src, insn);
+ break;
+ }
convert_op (&XEXP (src, 0), insn);
convert_op (&XEXP (src, 1), insn);
PUT_MODE (src, V1TImode);
@@ -2010,6 +2114,26 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)
PUT_MODE (src, V1TImode);
break;
+ case ZERO_EXTEND:
+ if (GET_MODE (XEXP (src, 0)) == DImode)
+ {
+ /* Convert to *vec_concatv2di_0. */
+ rtx tmp = gen_reg_rtx (V2DImode);
+ rtx pat = gen_rtx_VEC_CONCAT (V2DImode, XEXP (src, 0), const0_rtx);
+ emit_insn_before (gen_move_insn (tmp, pat), insn);
+ src = gen_rtx_SUBREG (vmode, tmp, 0);
+ }
+ else
+ gcc_unreachable ();
+ break;
+
+ case PLUS:
+ if (timode_concatdi_p (src))
+ src = timode_convert_concatdi (src, insn);
+ else
+ gcc_unreachable ();
+ break;
+
default:
gcc_unreachable ();
}
@@ -2389,6 +2513,8 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn)
case IOR:
case XOR:
+ if (timode_concatdi_p (src))
+ return true;
return (REG_P (XEXP (src, 0))
|| timode_mem_p (XEXP (src, 0)))
&& (REG_P (XEXP (src, 1))
@@ -2408,6 +2534,13 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn)
&& CONST_INT_P (XEXP (src, 1))
&& (INTVAL (XEXP (src, 1)) & ~0x7f) == 0;
+ case PLUS:
+ return timode_concatdi_p (src);
+
+ case ZERO_EXTEND:
+ return REG_P (XEXP (src, 0))
+ && GET_MODE (XEXP (src, 0)) == DImode;
+
default:
return false;
}
diff --git a/gcc/config/i386/i386-jit.cc b/gcc/config/i386/i386-jit.cc
index c1e2929..73ca590 100644
--- a/gcc/config/i386/i386-jit.cc
+++ b/gcc/config/i386/i386-jit.cc
@@ -65,6 +65,18 @@ ix86_jit_register_target_info (void)
jit_target_add_supported_target_dependent_type (GCC_JIT_TYPE_INT128_T);
}
+ if (float16_type_node != NULL && TYPE_PRECISION (float16_type_node) == 16)
+ jit_target_add_supported_target_dependent_type (GCC_JIT_TYPE_FLOAT16);
+
+ if (float32_type_node != NULL && TYPE_PRECISION (float32_type_node) == 32)
+ jit_target_add_supported_target_dependent_type (GCC_JIT_TYPE_FLOAT32);
+
+ if (float64_type_node != NULL && TYPE_PRECISION (float64_type_node) == 64)
+ jit_target_add_supported_target_dependent_type (GCC_JIT_TYPE_FLOAT64);
+
+ if (float128_type_node != NULL && TYPE_PRECISION (float128_type_node) == 128)
+ jit_target_add_supported_target_dependent_type (GCC_JIT_TYPE_FLOAT128);
+
#define ADD_TARGET_INFO jit_add_target_info
#include "i386-rust-and-jit.inc"
#undef ADD_TARGET_INFO
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 35cba3f..dadcf76 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -132,6 +132,7 @@ along with GCC; see the file COPYING3. If not see
#define m_ARROWLAKE_S (HOST_WIDE_INT_1U<<PROCESSOR_ARROWLAKE_S)
#define m_PANTHERLAKE (HOST_WIDE_INT_1U<<PROCESSOR_PANTHERLAKE)
#define m_DIAMONDRAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_DIAMONDRAPIDS)
+#define m_NOVALAKE (HOST_WIDE_INT_1U<<PROCESSOR_NOVALAKE)
#define m_CORE_AVX512 (m_SKYLAKE_AVX512 | m_CANNONLAKE \
| m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \
| m_TIGERLAKE | m_COOPERLAKE | m_SAPPHIRERAPIDS \
@@ -140,7 +141,7 @@ along with GCC; see the file COPYING3. If not see
#define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512)
#define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2)
#define m_CORE_HYBRID (m_ALDERLAKE | m_ARROWLAKE | m_ARROWLAKE_S \
- | m_PANTHERLAKE)
+ | m_PANTHERLAKE | m_NOVALAKE)
#define m_GOLDMONT (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT)
#define m_GOLDMONT_PLUS (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT_PLUS)
#define m_TREMONT (HOST_WIDE_INT_1U<<PROCESSOR_TREMONT)
@@ -790,6 +791,7 @@ static const struct processor_costs *processor_cost_table[] =
&alderlake_cost, /* PROCESSOR_ARROWLAKE_S. */
&alderlake_cost, /* PROCESSOR_PANTHERLAKE. */
&icelake_cost, /* PROCESSOR_DIAMONDRAPIDS. */
+ &alderlake_cost, /* PROCESSOR_NOVALAKE. */
&alderlake_cost, /* PROCESSOR_INTEL. */
&lujiazui_cost, /* PROCESSOR_LUJIAZUI. */
&yongfeng_cost, /* PROCESSOR_YONGFENG. */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 3a66d78..94f335f 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2356,6 +2356,7 @@ enum processor_type
PROCESSOR_ARROWLAKE_S,
PROCESSOR_PANTHERLAKE,
PROCESSOR_DIAMONDRAPIDS,
+ PROCESSOR_NOVALAKE,
PROCESSOR_INTEL,
PROCESSOR_LUJIAZUI,
PROCESSOR_YONGFENG,
@@ -2487,6 +2488,7 @@ constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_GRANITERAPIDS_D
| PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_AVX10_2
| PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32 | PTA_MOVRS
| PTA_AMX_MOVRS;
+constexpr wide_int_bitmask PTA_NOVALAKE = PTA_PANTHERLAKE | PTA_PREFETCHI;
constexpr wide_int_bitmask PTA_BDVER1 = PTA_64BIT | PTA_MMX | PTA_SSE
| PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_POPCNT | PTA_LZCNT
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 8b28c8e..4ad17f6 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4632,6 +4632,33 @@
UNSPEC_PCMP_ITER))]
"operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
+(define_insn_and_split "*<avx512>_cmp<mode>3_dup_op"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI1248_AVX512VLBW 1 "general_operand")
+ (match_operand:VI1248_AVX512VLBW 2 "general_operand")
+ (match_operand:SI 3 "<cmp_imm_predicate>")]
+ UNSPEC_PCMP_ITER))]
+ "TARGET_AVX512F && ix86_pre_reload_split ()
+ && rtx_equal_p (operands[1], operands[2])"
+ "#"
+ "&& 1"
+ [(set (match_dup 0) (match_dup 4))]
+{
+ int cmp_imm = INTVAL (operands[3]);
+ rtx res = CONST0_RTX (<avx512fmaskmode>mode);
+ /* EQ/LE/NLT. */
+ if (cmp_imm == 0 || cmp_imm == 2 || cmp_imm == 5)
+ {
+ int nelts = GET_MODE_NUNITS (<MODE>mode);
+ if (nelts >= 8)
+ res = CONSTM1_RTX (<avx512fmaskmode>mode);
+ else
+ res = gen_int_mode ((1u << nelts) - 1, QImode);
+ }
+ operands[4] = res;
+})
+
(define_insn "*<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
(unspec:<avx512fmaskmode>
diff --git a/gcc/config/rs6000/vxworks.h b/gcc/config/rs6000/vxworks.h
index 9eb074b..13c706b 100644
--- a/gcc/config/rs6000/vxworks.h
+++ b/gcc/config/rs6000/vxworks.h
@@ -290,5 +290,21 @@ along with GCC; see the file COPYING3. If not see
trigger visible link errors (hence remain harmless) if the support isn't
really there. */
+/* Select a format to encode pointers in exception handling data. CODE
+ is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
+ true if the symbol may be affected by dynamic relocations.
+
+ This is essentially the linux64.h version with an extra guard on
+ TARGET_VXWORKS_RTP to avoid DW_EH_PE_indirect in 64bit DKMs as they
+ could result in references from one DKM to resolve to symbols exposed
+ by a previsouly loaded DKM even if the symbol is also provided by the
+ DKM where the reference takes place. */
+#undef ASM_PREFERRED_EH_DATA_FORMAT
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+ ((TARGET_64BIT && TARGET_VXWORKS_RTP) || flag_pic \
+ ? (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel \
+ | (TARGET_64BIT ? DW_EH_PE_udata8 : DW_EH_PE_sdata4)) \
+ : DW_EH_PE_absptr)
+
#endif /* TARGET_VXWORKS7 */
diff --git a/gcc/config/vxworks.h b/gcc/config/vxworks.h
index 7268ace..bfad070 100644
--- a/gcc/config/vxworks.h
+++ b/gcc/config/vxworks.h
@@ -75,22 +75,27 @@ extern void vxworks_driver_init (unsigned int *, struct cl_decoded_option **);
#if TARGET_VXWORKS7
-/* We arrange not rely on fixed includes for vx7 and the headers spread over
- common kernel/rtp directories in addition to specific ones for each mode.
- Setup sysroot_headers_suffix_spec to deal with kernel/rtp distinction. */
+/* We arrange not to rely on fixed includes for vx7 and the headers spread
+ over common kernel/rtp directories in addition to specific ones for each
+ mode. Setup sysroot_headers_suffix_spec to deal with the kernel/rtp
+ distinction. */
#undef SYSROOT_HEADERS_SUFFIX_SPEC
#define SYSROOT_HEADERS_SUFFIX_SPEC "%{mrtp:/usr/h;:/krnl/h}"
+/* Now expand everything using sysroot(+suffix) relative references. The
+ absence of %getenv(VSB_DIR) allows all-gcc builds with possible self-tests
+ to succeed without having to define the variable at all. */
+
#undef VXWORKS_ADDITIONAL_CPP_SPEC
-#define VXWORKS_ADDITIONAL_CPP_SPEC \
- "%{!nostdinc:%{!fself-test=*: \
- %{isystem*} \
- -idirafter %:getenv(VSB_DIR /h) \
- -idirafter %:getenv(VSB_DIR /share/h) \
- -idirafter =/system \
- -idirafter =/public \
- }}"
+#define VXWORKS_ADDITIONAL_CPP_SPEC \
+ "%{!nostdinc: \
+ %{isystem*} \
+ -idirafter =/../../h \
+ -idirafter =/../../share/h \
+ -idirafter =/system \
+ -idirafter =/public \
+ }"
#else /* TARGET_VXWORKS7 */