diff options
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 180 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 97 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.opt.urls | 3 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 3 | ||||
-rw-r--r-- | gcc/config/gnu.h | 4 | ||||
-rw-r--r-- | gcc/config/i386/driver-i386.cc | 24 | ||||
-rw-r--r-- | gcc/config/i386/i386-c.cc | 7 | ||||
-rw-r--r-- | gcc/config/i386/i386-expand.cc | 6 | ||||
-rw-r--r-- | gcc/config/i386/i386-features.cc | 141 | ||||
-rw-r--r-- | gcc/config/i386/i386-jit.cc | 12 | ||||
-rw-r--r-- | gcc/config/i386/i386-options.cc | 4 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 27 | ||||
-rw-r--r-- | gcc/config/rs6000/vxworks.h | 16 | ||||
-rw-r--r-- | gcc/config/vxworks.h | 27 |
15 files changed, 526 insertions, 27 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index eaa8d57..a121a18 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3469,6 +3469,186 @@ DONE; }) +;; AND tree reductions. +;; Check if after a min pairwise reduction that all the lanes are 1. +;; +;; uminp v1.4s, v1.4s, v1.4s +;; fmov x1, d1 +;; cmn x1, #1 +;; cset w0, eq +;; +;; or with SVE enabled +;; +;; ptrue p1.b, vl16 +;; cmpeq p0.b, p1/z, z1.b, #0 +;; cset w0, none +;; +(define_expand "reduc_sbool_and_scal_<mode>" + [(set (match_operand:QI 0 "register_operand") + (unspec:QI [(match_operand:VALLI 1 "register_operand")] + UNSPEC_ANDV))] + "TARGET_SIMD" +{ + if (TARGET_SVE) + { + machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require (); + rtx in = force_lowpart_subreg (full_mode, operands[1], <MODE>mode); + unsigned lanes + = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant (); + machine_mode pred_mode = aarch64_sve_pred_mode (full_mode); + rtx pred_res = gen_reg_rtx (pred_mode); + rtx gp = aarch64_ptrue_reg (VNx16BImode, lanes); + rtx cast_gp = lowpart_subreg (pred_mode, gp, VNx16BImode); + rtx gp_flag = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode); + emit_insn ( + gen_aarch64_pred_cmp_ptest (EQ, full_mode, pred_res, gp, in, + CONST0_RTX (full_mode), cast_gp, + gp_flag, cast_gp, gp_flag)); + rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); + rtx cmp = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx); + rtx tmp2 = gen_reg_rtx (SImode); + emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg)); + emit_move_insn (operands[0], gen_lowpart (QImode, tmp2)); + DONE; + } + + rtx tmp = operands[1]; + /* 128-bit vectors need to be compressed to 64-bits first. */ + if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode))) + { + /* Always reduce using a V4SI. */ + rtx reduc = gen_lowpart (V4SImode, tmp); + rtx res = gen_reg_rtx (V4SImode); + emit_insn (gen_aarch64_uminpv4si (res, reduc, reduc)); + emit_move_insn (tmp, gen_lowpart (<MODE>mode, res)); + } + rtx val = gen_reg_rtx (DImode); + emit_move_insn (val, gen_lowpart (DImode, tmp)); + rtx cc_reg = aarch64_gen_compare_reg (EQ, val, constm1_rtx); + rtx cmp = gen_rtx_fmt_ee (EQ, SImode, cc_reg, constm1_rtx); + rtx tmp2 = gen_reg_rtx (SImode); + emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg)); + emit_move_insn (operands[0], gen_lowpart (QImode, tmp2)); + DONE; +}) + +;; IOR tree reductions. +;; Check that after a MAX pairwise reduction any lane is not 0 +;; +;; umaxp v1.4s, v1.4s, v1.4s +;; fmov x1, d1 +;; cmp x1, 0 +;; cset w0, ne +;; +;; or with SVE enabled +;; +;; ptrue p1.b, vl16 +;; cmpne p0.b, p1/z, z1.b, #0 +;; cset w0, any +;; +(define_expand "reduc_sbool_ior_scal_<mode>" + [(set (match_operand:QI 0 "register_operand") + (unspec:QI [(match_operand:VALLI 1 "register_operand")] + UNSPEC_IORV))] + "TARGET_SIMD" +{ + if (TARGET_SVE) + { + machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require (); + rtx in = force_lowpart_subreg (full_mode, operands[1], <MODE>mode); + unsigned lanes + = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant (); + machine_mode pred_mode = aarch64_sve_pred_mode (full_mode); + rtx pred_res = gen_reg_rtx (pred_mode); + rtx gp = aarch64_ptrue_reg (VNx16BImode, lanes); + rtx cast_gp = lowpart_subreg (pred_mode, gp, VNx16BImode); + rtx gp_flag = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode); + emit_insn ( + gen_aarch64_pred_cmp_ptest (NE, full_mode, pred_res, gp, in, + CONST0_RTX (full_mode), cast_gp, + gp_flag, cast_gp, gp_flag)); + rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); + rtx cmp = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx); + rtx tmp2 = gen_reg_rtx (SImode); + emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg)); + emit_move_insn (operands[0], gen_lowpart (QImode, tmp2)); + DONE; + } + + rtx tmp = operands[1]; + /* 128-bit vectors need to be compressed to 64-bits first. */ + if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode))) + { + /* Always reduce using a V4SI. */ + rtx reduc = gen_lowpart (V4SImode, tmp); + rtx res = gen_reg_rtx (V4SImode); + emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc)); + emit_move_insn (tmp, gen_lowpart (<MODE>mode, res)); + } + rtx val = gen_reg_rtx (DImode); + emit_move_insn (val, gen_lowpart (DImode, tmp)); + rtx cc_reg = aarch64_gen_compare_reg (NE, val, const0_rtx); + rtx cmp = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx); + rtx tmp2 = gen_reg_rtx (SImode); + emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg)); + emit_move_insn (operands[0], gen_lowpart (QImode, tmp2)); + DONE; +}) + +;; Unpredicated predicate XOR tree reductions. +;; Check to see if the number of active lanes in the predicates is a multiple +;; of 2. We use a normal reduction after masking with 0x1. +;; +;; movi v1.16b, 0x1 +;; and v2.16b, v2.16b, v2.16b +;; addv b3, v2.16b +;; fmov w1, s3 +;; and w0, w1, 1 +;; +;; or with SVE enabled +;; +;; ptrue p1.b, vl16 +;; cmpne p0.b, p1/z, z1+.b, #0 +;; cntp x1, p0, p0.b +;; and w0, w1, 1 +;; +(define_expand "reduc_sbool_xor_scal_<mode>" + [(set (match_operand:QI 0 "register_operand") + (unspec:QI [(match_operand:VALLI 1 "register_operand")] + UNSPEC_XORV))] + "TARGET_SIMD" +{ + if (TARGET_SVE) + { + machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require (); + rtx in = force_lowpart_subreg (full_mode, operands[1], <MODE>mode); + unsigned lanes + = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant (); + machine_mode pred_mode = aarch64_sve_pred_mode (full_mode); + rtx pred_res = gen_reg_rtx (pred_mode); + rtx gp = aarch64_ptrue_reg (VNx16BImode, lanes); + rtx cast_gp = lowpart_subreg (pred_mode, gp, VNx16BImode); + rtx gp_flag = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode); + emit_insn ( + gen_aarch64_pred_cmp (NE, full_mode, pred_res, cast_gp, gp_flag, in, + CONST0_RTX (full_mode))); + emit_insn (gen_reduc_sbool_xor_scal (pred_mode, operands[0], pred_res)); + DONE; + } + + rtx tmp = gen_reg_rtx (<MODE>mode); + rtx one_reg = force_reg (<MODE>mode, CONST1_RTX (<MODE>mode)); + emit_move_insn (tmp, gen_rtx_AND (<MODE>mode, operands[1], one_reg)); + rtx tmp2 = gen_reg_rtx (<VEL>mode); + emit_insn (gen_reduc_plus_scal_<mode> (tmp2, tmp)); + rtx tmp3 = gen_reg_rtx (DImode); + emit_move_insn (tmp3, gen_rtx_AND (DImode, + lowpart_subreg (DImode, tmp2, <VEL>mode), + const1_rtx)); + emit_move_insn (operands[0], gen_lowpart (QImode, tmp2)); + DONE; +}) + ;; SADDLV and UADDLV can be expressed as an ADDV instruction that first ;; sign or zero-extends its elements. (define_insn "aarch64_<su>addlv<mode>" diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 550ff0a..f459f63 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -135,6 +135,7 @@ ;; ---- [INT,FP] Conditional reductions ;; ---- [INT] Tree reductions ;; ---- [FP] Tree reductions +;; ---- [Predicate] Tree reductions ;; ---- [FP] Left-to-right reductions ;; ;; == Permutes @@ -8744,7 +8745,7 @@ ;; Predicated integer comparisons in which only the flags result is ;; interesting. -(define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest" +(define_insn_and_rewrite "@aarch64_pred_cmp<cmp_op><mode>_ptest" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC [(match_operand:VNx16BI 1 "register_operand") @@ -9888,6 +9889,100 @@ ) ;; ------------------------------------------------------------------------- +;; ---- [Predicate] Tree reductions +;; ------------------------------------------------------------------------- +;; Includes: +;; - IORV +;; - XORV +;; - ANDV +;; ------------------------------------------------------------------------- + +;; Unpredicated predicate AND tree reductions. +;; Invert the predicate and check across all lanes +;; that the Zero flag is set. +;; +;; ptrue p3.b, all +;; nots p3.b, p3/z, p0.b +;; cset w0, none +;; +(define_expand "reduc_sbool_and_scal_<mode>" + [(set (match_operand:QI 0 "register_operand") + (unspec:QI [(match_operand:PRED_ALL 1 "register_operand")] + UNSPEC_ANDV))] + "TARGET_SVE" + { + rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>)); + rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue); + rtx tmp = gen_reg_rtx (<MODE>mode); + emit_insn (gen_aarch64_pred_one_cmpl_z (<MODE>mode, tmp, cast_ptrue, + operands[1])); + emit_insn ( + gen_aarch64_ptest<mode> (ptrue, cast_ptrue, + gen_int_mode (SVE_KNOWN_PTRUE, SImode), + tmp)); + rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); + rtx cmp = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx); + rtx tmp2 = gen_reg_rtx (SImode); + emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg)); + emit_move_insn (operands[0], gen_lowpart (QImode, tmp2)); + DONE; + } +) + +;; Unpredicated predicate IOR tree reductions. +;; We need to make sure the results are in the CC flags, so execute a ptest +;; on the same predicate. +;; +;; ptest p0, p0.b +;; cset w0, any +;; +(define_expand "reduc_sbool_ior_scal_<mode>" + [(set (match_operand:QI 0 "register_operand") + (unspec:QI [(match_operand:PRED_ALL 1 "register_operand")] + UNSPEC_IORV))] + "TARGET_SVE" + { + rtx ptrue = lowpart_subreg (VNx16BImode, operands[1], <MODE>mode); + emit_insn ( + gen_aarch64_ptest<mode> (ptrue, operands[1], + gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode), + operands[1])); + rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); + rtx cmp = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx); + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_aarch64_cstoresi (tmp, cmp, cc_reg)); + emit_move_insn (operands[0], gen_lowpart (QImode, tmp)); + DONE; + } +) + +;; Unpredicated predicate XOR tree reductions. +;; Check to see if the number of active lanes in the predicates is a multiple +;; of 2. This generates: +;; +;; cntp x0, p0, p0.b +;; and w0, w0, 1 +;; +(define_expand "@reduc_sbool_xor_scal_<mode>" + [(set (match_dup 2) + (zero_extend:DI + (unspec:SI [(match_dup 1) + (const_int SVE_MAYBE_NOT_PTRUE) + (match_operand:PRED_ALL 1 "register_operand")] + UNSPEC_CNTP))) + (set (match_dup 4) + (and:DI (match_dup 2) + (const_int 1))) + (set (match_operand:QI 0 "register_operand") + (subreg:QI (match_dup 4) 0))] + "TARGET_SVE" + { + operands[2] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); + } +) + +;; ------------------------------------------------------------------------- ;; ---- [FP] Left-to-right reductions ;; ------------------------------------------------------------------------- ;; Includes: diff --git a/gcc/config/aarch64/aarch64.opt.urls b/gcc/config/aarch64/aarch64.opt.urls index 7ec14a9..993e0fc 100644 --- a/gcc/config/aarch64/aarch64.opt.urls +++ b/gcc/config/aarch64/aarch64.opt.urls @@ -3,6 +3,9 @@ mbig-endian UrlSuffix(gcc/AArch64-Options.html#index-mbig-endian) +menable-sysreg-checking +UrlSuffix(gcc/AArch64-Options.html#index-menable-sysreg-checking) + mgeneral-regs-only UrlSuffix(gcc/AArch64-Options.html#index-mgeneral-regs-only) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 3757998..517b280 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -217,6 +217,9 @@ ;; All Advanced SIMD modes on which we support any arithmetic operations. (define_mode_iterator VALL [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF V2DF]) +;; All Advanced SIMD integer modes +(define_mode_iterator VALLI [VDQ_BHSI V2DI]) + ;; All Advanced SIMD modes suitable for moving, loading, and storing. (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V4HF V8HF V4BF V8BF V2SF V4SF V2DF]) diff --git a/gcc/config/gnu.h b/gcc/config/gnu.h index 6b8f36b..825e743 100644 --- a/gcc/config/gnu.h +++ b/gcc/config/gnu.h @@ -19,6 +19,10 @@ You should have received a copy of the GNU General Public License along with GCC. If not, see <http://www.gnu.org/licenses/>. */ +/* C libraries used on GNU/Hurd. */ +#define OPTION_GLIBC_P(opts) (DEFAULT_LIBC == LIBC_GLIBC) +#define OPTION_GLIBC OPTION_GLIBC_P (&global_options) + #undef GNU_USER_TARGET_OS_CPP_BUILTINS #define GNU_USER_TARGET_OS_CPP_BUILTINS() \ do { \ diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc index fe71f55..0557df9 100644 --- a/gcc/config/i386/driver-i386.cc +++ b/gcc/config/i386/driver-i386.cc @@ -553,6 +553,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) processor = PROCESSOR_PENTIUM; break; case 6: + case 18: case 19: processor = PROCESSOR_PENTIUMPRO; break; @@ -639,18 +640,27 @@ const char *host_detect_local_cpu (int argc, const char **argv) } else if (has_feature (FEATURE_AVX)) { - /* Assume Panther Lake. */ - if (has_feature (FEATURE_PREFETCHI)) - cpu = "pantherlake"; /* Assume Clearwater Forest. */ - else if (has_feature (FEATURE_USER_MSR)) + if (has_feature (FEATURE_USER_MSR)) cpu = "clearwaterforest"; - /* Assume Arrow Lake S. */ + /* Assume Nova Lake. */ + else if (has_feature (FEATURE_PREFETCHI)) + cpu = "novalake"; else if (has_feature (FEATURE_SM3)) - cpu = "arrowlake-s"; + { + if (has_feature (FEATURE_KL)) + /* Assume Arrow Lake S. */ + cpu = "arrowlake-s"; + else + /* Assume Panther Lake. */ + cpu = "pantherlake"; + } /* Assume Sierra Forest. */ - else if (has_feature (FEATURE_AVXVNNIINT8)) + else if (has_feature (FEATURE_CLDEMOTE)) cpu = "sierraforest"; + /* Assume Arrow Lake. */ + else if (has_feature (FEATURE_AVXVNNIINT8)) + cpu = "arrowlake"; /* Assume Alder Lake. */ else if (has_feature (FEATURE_SERIALIZE)) cpu = "alderlake"; diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc index 0037465..2d92cee 100644 --- a/gcc/config/i386/i386-c.cc +++ b/gcc/config/i386/i386-c.cc @@ -295,6 +295,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__diamondrapids"); def_or_undef (parse_in, "__diamondrapids__"); break; + case PROCESSOR_NOVALAKE: + def_or_undef (parse_in, "__novalake"); + def_or_undef (parse_in, "__novalake__"); + break; /* use PROCESSOR_max to not set/unset the arch macro. */ case PROCESSOR_max: @@ -498,6 +502,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, case PROCESSOR_DIAMONDRAPIDS: def_or_undef (parse_in, "__tune_diamondrapids__"); break; + case PROCESSOR_NOVALAKE: + def_or_undef (parse_in, "__tune_novalake__"); + break; case PROCESSOR_INTEL: case PROCESSOR_GENERIC: break; diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 5bcc35c..a1f1b26 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -9515,9 +9515,9 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, machine_mode move_mode = VOIDmode; int unroll_factor = 1; /* TODO: Once value ranges are available, fill in proper data. */ - unsigned HOST_WIDE_INT min_size = 0; - unsigned HOST_WIDE_INT max_size = -1; - unsigned HOST_WIDE_INT probable_max_size = -1; + unsigned HOST_WIDE_INT min_size = HOST_WIDE_INT_0U; + unsigned HOST_WIDE_INT max_size = HOST_WIDE_INT_M1U; + unsigned HOST_WIDE_INT probable_max_size = HOST_WIDE_INT_M1U; bool misaligned_prologue_used = false; addr_space_t dst_as, src_as = ADDR_SPACE_GENERIC; diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index 9348f55..8e27784 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -449,6 +449,30 @@ scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref, return true; } +/* Check whether X is a convertible *concatditi_? variant. X is known + to be any_or_plus:TI, i.e. PLUS:TI, IOR:TI or XOR:TI. */ + +static bool +timode_concatdi_p (rtx x) +{ + rtx op0 = XEXP (x, 0); + rtx op1 = XEXP (x, 1); + + if (GET_CODE (op1) == ASHIFT) + std::swap (op0, op1); + + return GET_CODE (op0) == ASHIFT + && GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND + && GET_MODE (XEXP (XEXP (op0, 0), 0)) == DImode + && REG_P (XEXP (XEXP (op0, 0), 0)) + && CONST_INT_P (XEXP (op0, 1)) + && INTVAL (XEXP (op0, 1)) == 64 + && GET_CODE (op1) == ZERO_EXTEND + && GET_MODE (XEXP (op1, 0)) == DImode + && REG_P (XEXP (op1, 0)); +} + + /* Add instruction into a chain. Return true if OK, false if the search was aborted. */ @@ -477,9 +501,26 @@ scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid, if (!analyze_register_chain (candidates, ref, disallowed)) return false; - /* The operand(s) of VEC_SELECT don't need to be converted/convertible. */ - if (def_set && GET_CODE (SET_SRC (def_set)) == VEC_SELECT) - return true; + /* The operand(s) of VEC_SELECT, ZERO_EXTEND and similar ops don't need + to be converted/convertible. */ + if (def_set) + switch (GET_CODE (SET_SRC (def_set))) + { + case VEC_SELECT: + return true; + case ZERO_EXTEND: + if (GET_MODE (XEXP (SET_SRC (def_set), 0)) == DImode) + return true; + break; + case PLUS: + case IOR: + case XOR: + if (smode == TImode && timode_concatdi_p (SET_SRC (def_set))) + return true; + break; + default: + break; + } for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) if (!DF_REF_REG_MEM_P (ref)) @@ -1628,14 +1669,34 @@ timode_scalar_chain::compute_convert_gain () break; case AND: + if (!MEM_P (dst)) + igain = COSTS_N_INSNS (1); + if (CONST_SCALAR_INT_P (XEXP (src, 1))) + igain += timode_immed_const_gain (XEXP (src, 1), bb); + break; + case XOR: case IOR: + if (timode_concatdi_p (src)) + { + /* vmovq;vpinsrq (11 bytes). */ + igain = speed_p ? -2 * ix86_cost->sse_to_integer + : -COSTS_N_BYTES (11); + break; + } if (!MEM_P (dst)) igain = COSTS_N_INSNS (1); if (CONST_SCALAR_INT_P (XEXP (src, 1))) igain += timode_immed_const_gain (XEXP (src, 1), bb); break; + case PLUS: + if (timode_concatdi_p (src)) + /* vmovq;vpinsrq (11 bytes). */ + igain = speed_p ? -2 * ix86_cost->sse_to_integer + : -COSTS_N_BYTES (11); + break; + case ASHIFT: case LSHIFTRT: /* See ix86_expand_v1ti_shift. */ @@ -1794,6 +1855,13 @@ timode_scalar_chain::compute_convert_gain () igain = !speed_p ? -COSTS_N_BYTES (6) : -COSTS_N_INSNS (1); break; + case ZERO_EXTEND: + if (GET_MODE (XEXP (src, 0)) == DImode) + /* xor (2 bytes) vs. vmovq (5 bytes). */ + igain = speed_p ? COSTS_N_INSNS (1) - ix86_cost->sse_to_integer + : -COSTS_N_BYTES (3); + break; + default: break; } @@ -1858,6 +1926,28 @@ timode_scalar_chain::fix_debug_reg_uses (rtx reg) } } +/* Convert SRC, a *concatditi3 pattern, into a vec_concatv2di instruction. + Insert this before INSN, and return the result as a V1TImode subreg. */ + +static rtx +timode_convert_concatdi (rtx src, rtx_insn *insn) +{ + rtx hi, lo; + rtx tmp = gen_reg_rtx (V2DImode); + if (GET_CODE (XEXP (src, 0)) == ASHIFT) + { + hi = XEXP (XEXP (XEXP (src, 0), 0), 0); + lo = XEXP (XEXP (src, 1), 0); + } + else + { + hi = XEXP (XEXP (XEXP (src, 1), 0), 0); + lo = XEXP (XEXP (src, 0), 0); + } + emit_insn_before (gen_vec_concatv2di (tmp, lo, hi), insn); + return gen_rtx_SUBREG (V1TImode, tmp, 0); +} + /* Convert INSN from TImode to V1T1mode. */ void @@ -1967,10 +2057,24 @@ timode_scalar_chain::convert_insn (rtx_insn *insn) PUT_MODE (src, V1TImode); break; } - /* FALLTHRU */ + convert_op (&XEXP (src, 0), insn); + convert_op (&XEXP (src, 1), insn); + PUT_MODE (src, V1TImode); + if (MEM_P (dst)) + { + tmp = gen_reg_rtx (V1TImode); + emit_insn_before (gen_rtx_SET (tmp, src), insn); + src = tmp; + } + break; case XOR: case IOR: + if (timode_concatdi_p (src)) + { + src = timode_convert_concatdi (src, insn); + break; + } convert_op (&XEXP (src, 0), insn); convert_op (&XEXP (src, 1), insn); PUT_MODE (src, V1TImode); @@ -2010,6 +2114,26 @@ timode_scalar_chain::convert_insn (rtx_insn *insn) PUT_MODE (src, V1TImode); break; + case ZERO_EXTEND: + if (GET_MODE (XEXP (src, 0)) == DImode) + { + /* Convert to *vec_concatv2di_0. */ + rtx tmp = gen_reg_rtx (V2DImode); + rtx pat = gen_rtx_VEC_CONCAT (V2DImode, XEXP (src, 0), const0_rtx); + emit_insn_before (gen_move_insn (tmp, pat), insn); + src = gen_rtx_SUBREG (vmode, tmp, 0); + } + else + gcc_unreachable (); + break; + + case PLUS: + if (timode_concatdi_p (src)) + src = timode_convert_concatdi (src, insn); + else + gcc_unreachable (); + break; + default: gcc_unreachable (); } @@ -2389,6 +2513,8 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn) case IOR: case XOR: + if (timode_concatdi_p (src)) + return true; return (REG_P (XEXP (src, 0)) || timode_mem_p (XEXP (src, 0))) && (REG_P (XEXP (src, 1)) @@ -2408,6 +2534,13 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn) && CONST_INT_P (XEXP (src, 1)) && (INTVAL (XEXP (src, 1)) & ~0x7f) == 0; + case PLUS: + return timode_concatdi_p (src); + + case ZERO_EXTEND: + return REG_P (XEXP (src, 0)) + && GET_MODE (XEXP (src, 0)) == DImode; + default: return false; } diff --git a/gcc/config/i386/i386-jit.cc b/gcc/config/i386/i386-jit.cc index c1e2929..73ca590 100644 --- a/gcc/config/i386/i386-jit.cc +++ b/gcc/config/i386/i386-jit.cc @@ -65,6 +65,18 @@ ix86_jit_register_target_info (void) jit_target_add_supported_target_dependent_type (GCC_JIT_TYPE_INT128_T); } + if (float16_type_node != NULL && TYPE_PRECISION (float16_type_node) == 16) + jit_target_add_supported_target_dependent_type (GCC_JIT_TYPE_FLOAT16); + + if (float32_type_node != NULL && TYPE_PRECISION (float32_type_node) == 32) + jit_target_add_supported_target_dependent_type (GCC_JIT_TYPE_FLOAT32); + + if (float64_type_node != NULL && TYPE_PRECISION (float64_type_node) == 64) + jit_target_add_supported_target_dependent_type (GCC_JIT_TYPE_FLOAT64); + + if (float128_type_node != NULL && TYPE_PRECISION (float128_type_node) == 128) + jit_target_add_supported_target_dependent_type (GCC_JIT_TYPE_FLOAT128); + #define ADD_TARGET_INFO jit_add_target_info #include "i386-rust-and-jit.inc" #undef ADD_TARGET_INFO diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 35cba3f..dadcf76 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -132,6 +132,7 @@ along with GCC; see the file COPYING3. If not see #define m_ARROWLAKE_S (HOST_WIDE_INT_1U<<PROCESSOR_ARROWLAKE_S) #define m_PANTHERLAKE (HOST_WIDE_INT_1U<<PROCESSOR_PANTHERLAKE) #define m_DIAMONDRAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_DIAMONDRAPIDS) +#define m_NOVALAKE (HOST_WIDE_INT_1U<<PROCESSOR_NOVALAKE) #define m_CORE_AVX512 (m_SKYLAKE_AVX512 | m_CANNONLAKE \ | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \ | m_TIGERLAKE | m_COOPERLAKE | m_SAPPHIRERAPIDS \ @@ -140,7 +141,7 @@ along with GCC; see the file COPYING3. If not see #define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512) #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2) #define m_CORE_HYBRID (m_ALDERLAKE | m_ARROWLAKE | m_ARROWLAKE_S \ - | m_PANTHERLAKE) + | m_PANTHERLAKE | m_NOVALAKE) #define m_GOLDMONT (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT) #define m_GOLDMONT_PLUS (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT_PLUS) #define m_TREMONT (HOST_WIDE_INT_1U<<PROCESSOR_TREMONT) @@ -790,6 +791,7 @@ static const struct processor_costs *processor_cost_table[] = &alderlake_cost, /* PROCESSOR_ARROWLAKE_S. */ &alderlake_cost, /* PROCESSOR_PANTHERLAKE. */ &icelake_cost, /* PROCESSOR_DIAMONDRAPIDS. */ + &alderlake_cost, /* PROCESSOR_NOVALAKE. */ &alderlake_cost, /* PROCESSOR_INTEL. */ &lujiazui_cost, /* PROCESSOR_LUJIAZUI. */ &yongfeng_cost, /* PROCESSOR_YONGFENG. */ diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 3a66d78..94f335f 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2356,6 +2356,7 @@ enum processor_type PROCESSOR_ARROWLAKE_S, PROCESSOR_PANTHERLAKE, PROCESSOR_DIAMONDRAPIDS, + PROCESSOR_NOVALAKE, PROCESSOR_INTEL, PROCESSOR_LUJIAZUI, PROCESSOR_YONGFENG, @@ -2487,6 +2488,7 @@ constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_GRANITERAPIDS_D | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_AVX10_2 | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32 | PTA_MOVRS | PTA_AMX_MOVRS; +constexpr wide_int_bitmask PTA_NOVALAKE = PTA_PANTHERLAKE | PTA_PREFETCHI; constexpr wide_int_bitmask PTA_BDVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_POPCNT | PTA_LZCNT diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 8b28c8e..4ad17f6 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4632,6 +4632,33 @@ UNSPEC_PCMP_ITER))] "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);") +(define_insn_and_split "*<avx512>_cmp<mode>3_dup_op" + [(set (match_operand:<avx512fmaskmode> 0 "register_operand") + (unspec:<avx512fmaskmode> + [(match_operand:VI1248_AVX512VLBW 1 "general_operand") + (match_operand:VI1248_AVX512VLBW 2 "general_operand") + (match_operand:SI 3 "<cmp_imm_predicate>")] + UNSPEC_PCMP_ITER))] + "TARGET_AVX512F && ix86_pre_reload_split () + && rtx_equal_p (operands[1], operands[2])" + "#" + "&& 1" + [(set (match_dup 0) (match_dup 4))] +{ + int cmp_imm = INTVAL (operands[3]); + rtx res = CONST0_RTX (<avx512fmaskmode>mode); + /* EQ/LE/NLT. */ + if (cmp_imm == 0 || cmp_imm == 2 || cmp_imm == 5) + { + int nelts = GET_MODE_NUNITS (<MODE>mode); + if (nelts >= 8) + res = CONSTM1_RTX (<avx512fmaskmode>mode); + else + res = gen_int_mode ((1u << nelts) - 1, QImode); + } + operands[4] = res; +}) + (define_insn "*<avx512>_eq<mode>3<mask_scalar_merge_name>_1" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k") (unspec:<avx512fmaskmode> diff --git a/gcc/config/rs6000/vxworks.h b/gcc/config/rs6000/vxworks.h index 9eb074b..13c706b 100644 --- a/gcc/config/rs6000/vxworks.h +++ b/gcc/config/rs6000/vxworks.h @@ -290,5 +290,21 @@ along with GCC; see the file COPYING3. If not see trigger visible link errors (hence remain harmless) if the support isn't really there. */ +/* Select a format to encode pointers in exception handling data. CODE + is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is + true if the symbol may be affected by dynamic relocations. + + This is essentially the linux64.h version with an extra guard on + TARGET_VXWORKS_RTP to avoid DW_EH_PE_indirect in 64bit DKMs as they + could result in references from one DKM to resolve to symbols exposed + by a previsouly loaded DKM even if the symbol is also provided by the + DKM where the reference takes place. */ +#undef ASM_PREFERRED_EH_DATA_FORMAT +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \ + ((TARGET_64BIT && TARGET_VXWORKS_RTP) || flag_pic \ + ? (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel \ + | (TARGET_64BIT ? DW_EH_PE_udata8 : DW_EH_PE_sdata4)) \ + : DW_EH_PE_absptr) + #endif /* TARGET_VXWORKS7 */ diff --git a/gcc/config/vxworks.h b/gcc/config/vxworks.h index 7268ace..bfad070 100644 --- a/gcc/config/vxworks.h +++ b/gcc/config/vxworks.h @@ -75,22 +75,27 @@ extern void vxworks_driver_init (unsigned int *, struct cl_decoded_option **); #if TARGET_VXWORKS7 -/* We arrange not rely on fixed includes for vx7 and the headers spread over - common kernel/rtp directories in addition to specific ones for each mode. - Setup sysroot_headers_suffix_spec to deal with kernel/rtp distinction. */ +/* We arrange not to rely on fixed includes for vx7 and the headers spread + over common kernel/rtp directories in addition to specific ones for each + mode. Setup sysroot_headers_suffix_spec to deal with the kernel/rtp + distinction. */ #undef SYSROOT_HEADERS_SUFFIX_SPEC #define SYSROOT_HEADERS_SUFFIX_SPEC "%{mrtp:/usr/h;:/krnl/h}" +/* Now expand everything using sysroot(+suffix) relative references. The + absence of %getenv(VSB_DIR) allows all-gcc builds with possible self-tests + to succeed without having to define the variable at all. */ + #undef VXWORKS_ADDITIONAL_CPP_SPEC -#define VXWORKS_ADDITIONAL_CPP_SPEC \ - "%{!nostdinc:%{!fself-test=*: \ - %{isystem*} \ - -idirafter %:getenv(VSB_DIR /h) \ - -idirafter %:getenv(VSB_DIR /share/h) \ - -idirafter =/system \ - -idirafter =/public \ - }}" +#define VXWORKS_ADDITIONAL_CPP_SPEC \ + "%{!nostdinc: \ + %{isystem*} \ + -idirafter =/../../h \ + -idirafter =/../../share/h \ + -idirafter =/system \ + -idirafter =/public \ + }" #else /* TARGET_VXWORKS7 */ |