diff options
Diffstat (limited to 'gcc/config')
48 files changed, 1213 insertions, 180 deletions
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index d1e2ab9..98337b7 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -293,6 +293,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) aarch64_def_or_undef (TARGET_SME2, "__ARM_FEATURE_SME2", pfile); aarch64_def_or_undef (AARCH64_HAVE_ISA (SME2p1), "__ARM_FEATURE_SME2p1", pfile); + aarch64_def_or_undef (TARGET_FAMINMAX, "__ARM_FEATURE_FAMINMAX", pfile); /* Not for ACLE, but required to keep "float.h" correct if we switch target between implementations that do or do not support ARMv8.2-A diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index 7f204fd..1209630 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -224,7 +224,7 @@ AARCH64_CORE("neoverse-v3ae", neoversev3ae, cortexa57, V9_2A, (SVE2_BITPERM, RNG AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) /* NVIDIA ('N') cores. */ -AARCH64_CORE("olympus", olympus, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE, FAMINMAX, FP8DOT2, LUT, SVE2_AES, SVE2_SHA3, SVE2_SM4), neoversev3, 0x4e, 0x10, -1) +AARCH64_CORE("olympus", olympus, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE, FAMINMAX, FP8FMA, FP8DOT2, FP8DOT4, LUT, SVE2_AES, SVE2_SHA3, SVE2_SM4), neoversev3, 0x4e, 0x10, -1) /* Generic Architecture Processors. */ AARCH64_CORE("generic", generic, cortexa53, V8A, (), generic, 0x0, 0x0, -1) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 8f44aea..1ca86c9 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1260,6 +1260,7 @@ void aarch64_restore_za (rtx); void aarch64_expand_crc_using_pmull (scalar_mode, scalar_mode, rtx *); void aarch64_expand_reversed_crc_using_pmull (scalar_mode, scalar_mode, rtx *); +void aarch64_expand_fp_spaceship (rtx, rtx, rtx, rtx); extern bool aarch64_gcs_enabled (); diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 3dbd659..d4af370 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3133,9 +3133,9 @@ "TARGET_SVE" { rtx tmp = gen_reg_rtx (<MODE>mode); - emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1], - CONST1_RTX (<MODE>mode), - CONST0_RTX (<MODE>mode))); + emit_insn (gen_vcond_mask_<mode><vpred> (tmp, CONST1_RTX (<MODE>mode), + CONST0_RTX (<MODE>mode), + operands[1])); emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2])); DONE; } diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 4e80114..38c112c 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -31073,8 +31073,6 @@ aarch64_valid_sysreg_name_p (const char *regname) const sysreg_t *sysreg = aarch64_lookup_sysreg_map (regname); if (sysreg == NULL) return aarch64_is_implem_def_reg (regname); - if (sysreg->arch_reqs) - return bool (aarch64_isa_flags & sysreg->arch_reqs); return true; } @@ -31098,8 +31096,6 @@ aarch64_retrieve_sysreg (const char *regname, bool write_p, bool is128op) if ((write_p && (sysreg->properties & F_REG_READ)) || (!write_p && (sysreg->properties & F_REG_WRITE))) return NULL; - if ((~aarch64_isa_flags & sysreg->arch_reqs) != 0) - return NULL; return sysreg->encoding; } @@ -31298,6 +31294,79 @@ aarch64_expand_reversed_crc_using_pmull (scalar_mode crc_mode, } } +/* Expand the spaceship optab for floating-point operands. + + If the result is compared against (-1, 0, 1 , 2), expand into + fcmpe + conditional branch insns. + + Otherwise (the result is just stored as an integer), expand into + fcmpe + a sequence of conditional select/increment/invert insns. */ +void +aarch64_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx hint) +{ + rtx cc_reg = gen_rtx_REG (CCFPEmode, CC_REGNUM); + emit_set_insn (cc_reg, gen_rtx_COMPARE (CCFPEmode, op0, op1)); + + rtx cc_gt = gen_rtx_GT (VOIDmode, cc_reg, const0_rtx); + rtx cc_lt = gen_rtx_LT (VOIDmode, cc_reg, const0_rtx); + rtx cc_un = gen_rtx_UNORDERED (VOIDmode, cc_reg, const0_rtx); + + if (hint == const0_rtx) + { + rtx un_label = gen_label_rtx (); + rtx lt_label = gen_label_rtx (); + rtx gt_label = gen_label_rtx (); + rtx end_label = gen_label_rtx (); + + rtx temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_un, + gen_rtx_LABEL_REF (Pmode, un_label), pc_rtx); + aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, temp)); + + temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_lt, + gen_rtx_LABEL_REF (Pmode, lt_label), pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, temp)); + + temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_gt, + gen_rtx_LABEL_REF (Pmode, gt_label), pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, temp)); + + /* Equality. */ + emit_move_insn (dest, const0_rtx); + emit_jump (end_label); + + emit_label (un_label); + emit_move_insn (dest, const2_rtx); + emit_jump (end_label); + + emit_label (gt_label); + emit_move_insn (dest, const1_rtx); + emit_jump (end_label); + + emit_label (lt_label); + emit_move_insn (dest, constm1_rtx); + + emit_label (end_label); + } + else + { + rtx temp0 = gen_reg_rtx (SImode); + rtx temp1 = gen_reg_rtx (SImode); + rtx cc_ungt = gen_rtx_UNGT (VOIDmode, cc_reg, const0_rtx); + + /* The value of hint is stored if the operands are unordered. */ + rtx temp_un = gen_int_mode (UINTVAL (hint) - 1, SImode); + if (!aarch64_reg_zero_or_m1_or_1 (temp_un, SImode)) + temp_un = force_reg (SImode, temp_un); + + emit_set_insn (temp0, gen_rtx_IF_THEN_ELSE (SImode, cc_lt, + constm1_rtx, const0_rtx)); + emit_set_insn (temp1, gen_rtx_IF_THEN_ELSE (SImode, cc_un, + temp_un, const0_rtx)); + emit_set_insn (dest, gen_rtx_IF_THEN_ELSE (SImode, cc_ungt, + gen_rtx_PLUS (SImode, temp1, const1_rtx), temp0)); + } +} + /* Target-specific selftests. */ #if CHECKING_P diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 031e621..c678f7a 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -707,11 +707,12 @@ ) (define_expand "cbranch<mode>4" - [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" - [(match_operand:GPF 1 "register_operand") - (match_operand:GPF 2 "aarch64_fp_compare_operand")]) - (label_ref (match_operand 3 "" "")) - (pc)))] + [(set (pc) (if_then_else + (match_operator 0 "aarch64_comparison_operator" + [(match_operand:GPF_F16 1 "register_operand") + (match_operand:GPF_F16 2 "aarch64_fp_compare_operand")]) + (label_ref (match_operand 3 "" "")) + (pc)))] "" " operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1], @@ -4337,26 +4338,28 @@ (define_insn "fcmp<mode>" [(set (reg:CCFP CC_REGNUM) - (compare:CCFP (match_operand:GPF 0 "register_operand") - (match_operand:GPF 1 "aarch64_fp_compare_operand")))] + (compare:CCFP + (match_operand:GPF_F16 0 "register_operand") + (match_operand:GPF_F16 1 "aarch64_fp_compare_operand")))] "TARGET_FLOAT" {@ [ cons: 0 , 1 ] [ w , Y ] fcmp\t%<s>0, #0.0 [ w , w ] fcmp\t%<s>0, %<s>1 } - [(set_attr "type" "fcmp<s>")] + [(set_attr "type" "fcmp<stype>")] ) (define_insn "fcmpe<mode>" [(set (reg:CCFPE CC_REGNUM) - (compare:CCFPE (match_operand:GPF 0 "register_operand") - (match_operand:GPF 1 "aarch64_fp_compare_operand")))] + (compare:CCFPE + (match_operand:GPF_F16 0 "register_operand") + (match_operand:GPF_F16 1 "aarch64_fp_compare_operand")))] "TARGET_FLOAT" {@ [ cons: 0 , 1 ] [ w , Y ] fcmpe\t%<s>0, #0.0 [ w , w ] fcmpe\t%<s>0, %<s>1 } - [(set_attr "type" "fcmp<s>")] + [(set_attr "type" "fcmp<stype>")] ) (define_insn "*cmp_swp_<shift>_reg<mode>" @@ -4392,6 +4395,49 @@ [(set_attr "type" "alus_ext")] ) +;; <=> operator pattern (integer) +;; (a == b) ? 0 : (a < b) ? -1 : 1. +(define_expand "spaceship<mode>4" + [(match_operand:SI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (match_operand:SI 3 "const_int_operand")] + "" + { + // 1 indicates unsigned comparison, -1 indicates signed. + gcc_assert (operands[3] == constm1_rtx || operands[3] == const1_rtx); + + rtx cc_reg = aarch64_gen_compare_reg (EQ, operands[1], operands[2]); + RTX_CODE code_gt = operands[3] == const1_rtx ? GTU : GT; + RTX_CODE code_lt = operands[3] == const1_rtx ? LTU : LT; + + rtx cc_gt = gen_rtx_fmt_ee (code_gt, VOIDmode, cc_reg, const0_rtx); + rtx cc_lt = gen_rtx_fmt_ee (code_lt, VOIDmode, cc_reg, const0_rtx); + + rtx temp = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (temp, gen_rtx_IF_THEN_ELSE (SImode, cc_gt, + const1_rtx, const0_rtx))); + emit_insn (gen_rtx_SET (operands[0], gen_rtx_IF_THEN_ELSE (SImode, cc_lt, + constm1_rtx, temp))); + DONE; + } +) + +;; <=> operator pattern (floating-point) +;; (a == b) ? 0 : (a < b) ? -1 : (a > b) ? 1 : UNORDERED. +(define_expand "spaceship<mode>4" + [(match_operand:SI 0 "register_operand") + (match_operand:GPF 1 "register_operand") + (match_operand:GPF 2 "register_operand") + (match_operand:SI 3 "const_int_operand")] + "TARGET_FLOAT" + { + aarch64_expand_fp_spaceship (operands[0], operands[1], operands[2], + operands[3]); + DONE; + } +) + ;; ------------------------------------------------------------------- ;; Store-flag and conditional select insns ;; ------------------------------------------------------------------- @@ -4424,8 +4470,8 @@ (define_expand "cstore<mode>4" [(set (match_operand:SI 0 "register_operand") (match_operator:SI 1 "aarch64_comparison_operator_mode" - [(match_operand:GPF 2 "register_operand") - (match_operand:GPF 3 "aarch64_fp_compare_operand")]))] + [(match_operand:GPF_F16 2 "register_operand") + (match_operand:GPF_F16 3 "aarch64_fp_compare_operand")]))] "" " operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc index ba470d9..14e7da5 100644 --- a/gcc/config/alpha/alpha.cc +++ b/gcc/config/alpha/alpha.cc @@ -4291,14 +4291,10 @@ alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o) tree mem = MEM_EXPR (expr); if (mem != NULL_TREE) - switch (TREE_CODE (mem)) - { - case MEM_REF: - tree_offset = mem_ref_offset (mem).force_shwi (); - tree_align = get_object_alignment (get_base_address (mem)); - break; + { + HOST_WIDE_INT comp_offset = 0; - case COMPONENT_REF: + for (; TREE_CODE (mem) == COMPONENT_REF; mem = TREE_OPERAND (mem, 0)) { tree byte_offset = component_ref_field_offset (mem); tree bit_offset = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (mem, 1)); @@ -4307,14 +4303,15 @@ alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o) || !poly_int_tree_p (byte_offset, &offset) || !tree_fits_shwi_p (bit_offset)) break; - tree_offset = offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT; + comp_offset += offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT; } - tree_align = get_object_alignment (get_base_address (mem)); - break; - default: - break; - } + if (TREE_CODE (mem) == MEM_REF) + { + tree_offset = comp_offset + mem_ref_offset (mem).force_shwi (); + tree_align = get_object_alignment (get_base_address (mem)); + } + } if (reg_align > mem_align) { diff --git a/gcc/config/c6x/c6x.h b/gcc/config/c6x/c6x.h index e7da250..50bad27 100644 --- a/gcc/config/c6x/c6x.h +++ b/gcc/config/c6x/c6x.h @@ -444,11 +444,9 @@ struct GTY(()) machine_function #define TARG_VEC_PERMUTE_COST 1 #endif -/* ttype entries (the only interesting data references used) are - sb-relative got-indirect (aka .ehtype). */ +/* .ehtype ttype entries are sb-relative. */ #define ASM_PREFERRED_EH_DATA_FORMAT(code, data) \ - (((code) == 0 && (data) == 1) ? (DW_EH_PE_datarel | DW_EH_PE_indirect) \ - : DW_EH_PE_absptr) + (((code) == 0 && (data) == 1) ? DW_EH_PE_datarel : DW_EH_PE_absptr) /* This should be the same as the definition in elfos.h, plus the call to output special unwinding directives. */ diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h index 8c164fd..9b9a3fe 100644 --- a/gcc/config/darwin.h +++ b/gcc/config/darwin.h @@ -504,6 +504,7 @@ extern GTY(()) int darwin_ms_struct; %{static|static-libgcc|static-libgfortran:%:replace-outfile(-lgfortran libgfortran.a%s)}\ %{static|static-libgcc|static-libquadmath:%:replace-outfile(-lquadmath libquadmath.a%s)}\ %{static|static-libgcc|static-libphobos:%:replace-outfile(-lgphobos libgphobos.a%s)}\ + %{static|static-libgcc|static-libgcobol:%:replace-outfile(-lgcobol libgcobol.a%s)}\ %{static|static-libgcc|static-libstdc++|static-libgfortran:%:replace-outfile(-lgomp libgomp.a%s)}\ %{static|static-libgcc|static-libstdc++:%:replace-outfile(-lstdc++ libstdc++.a%s)}\ %{static|static-libgm2:%:replace-outfile(-lm2pim libm2pim.a%s)}\ diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index 695656f..e0fb735 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -1018,7 +1018,9 @@ [(const_int 0)] "" { - sorry ("exception handling not supported"); + if (!fake_exceptions) + sorry ("exception handling not supported"); + DONE; }) ;; }}} diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt index 142b439..99d6aeb 100644 --- a/gcc/config/gcn/gcn.opt +++ b/gcc/config/gcn/gcn.opt @@ -101,3 +101,11 @@ Enum(gcn_preferred_vectorization_factor) String(32) Value(32) EnumValue Enum(gcn_preferred_vectorization_factor) String(64) Value(64) + +mfake-exceptions +Target Var(fake_exceptions) Init(0) Undocumented +; With '-mfake-exceptions' enabled, the user-visible behavior in presence of +; exception handling constructs changes such that the compile-time +; 'sorry, unimplemented: exception handling not supported' is skipped, code +; generation proceeds, and instead, exception handling constructs 'abort' at +; run time. (..., or don't, if they're in dead code.) diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc index f5b89c9..b284ff4 100644 --- a/gcc/config/gcn/mkoffload.cc +++ b/gcc/config/gcn/mkoffload.cc @@ -1160,6 +1160,9 @@ main (int argc, char **argv) obstack_ptr_grow (&cc_argv_obstack, "-xlto"); if (fopenmp) obstack_ptr_grow (&cc_argv_obstack, "-mgomp"); + /* The host code may contain exception handling constructs. + Handle these as good as we can. */ + obstack_ptr_grow (&cc_argv_obstack, "-mfake-exceptions"); for (int ix = 1; ix != argc; ix++) { diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index cdfd94d..a314800 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -4138,6 +4138,10 @@ ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, return false; mode = GET_MODE (dest); + if (immediate_operand (if_false, mode)) + if_false = force_reg (mode, if_false); + if (immediate_operand (if_true, mode)) + if_true = force_reg (mode, if_true); /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, but MODE may be a vector mode and thus not appropriate. */ @@ -4687,6 +4691,8 @@ ix86_expand_fp_movcc (rtx operands[]) compare_op = ix86_expand_compare (NE, tmp, const0_rtx); } + operands[2] = force_reg (mode, operands[2]); + operands[3] = force_reg (mode, operands[3]); emit_insn (gen_rtx_SET (operands[0], gen_rtx_IF_THEN_ELSE (mode, compare_op, operands[2], operands[3]))); @@ -19256,8 +19262,6 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) e1 = gen_reg_rtx (mode); x1 = gen_reg_rtx (mode); - /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */ - b = force_reg (mode, b); /* x0 = rcp(b) estimate */ @@ -19270,20 +19274,42 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), UNSPEC_RCP))); - /* e0 = x0 * b */ - emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b))); + unsigned vector_size = GET_MODE_SIZE (mode); + + /* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a + N-R step with 2 fma implementation. */ + if (TARGET_FMA + || (TARGET_AVX512F && vector_size == 64) + || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))) + { + /* e0 = x0 * a */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a))); + /* e1 = e0 * b - a */ + emit_insn (gen_rtx_SET (e1, gen_rtx_FMA (mode, e0, b, + gen_rtx_NEG (mode, a)))); + /* res = - e1 * x0 + e0 */ + emit_insn (gen_rtx_SET (res, gen_rtx_FMA (mode, + gen_rtx_NEG (mode, e1), + x0, e0))); + } + else + /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */ + { + /* e0 = x0 * b */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b))); - /* e0 = x0 * e0 */ - emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0))); + /* e1 = x0 + x0 */ + emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0))); - /* e1 = x0 + x0 */ - emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0))); + /* e0 = x0 * e0 */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0))); - /* x1 = e1 - e0 */ - emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0))); + /* x1 = e1 - e0 */ + emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0))); - /* res = a * x1 */ - emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); + /* res = a * x1 */ + emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); + } } /* Output code to perform a Newton-Rhapson approximation of a diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index a9fac01..964449f 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -2828,8 +2828,8 @@ ix86_option_override_internal (bool main_args_p, if (flag_nop_mcount) error ("%<-mnop-mcount%> is not compatible with this target"); #endif - if (flag_nop_mcount && flag_pic) - error ("%<-mnop-mcount%> is not implemented for %<-fPIC%>"); + if (flag_nop_mcount && flag_pic && !flag_plt) + error ("%<-mnop-mcount%> is not implemented for %<-fno-plt%>"); /* Accept -msseregparm only if at least SSE support is enabled. */ if (TARGET_SSEREGPARM_P (opts->x_target_flags) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 4f8380c4..aef4145 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -100,6 +100,7 @@ along with GCC; see the file COPYING3. If not see #include "i386-features.h" #include "function-abi.h" #include "rtl-error.h" +#include "gimple-pretty-print.h" /* This file should be included last. */ #include "target-def.h" @@ -458,6 +459,9 @@ int ix86_arch_specified; indirect thunk pushes the return address onto stack, destroying red-zone. + NB: Don't use red-zone for functions with no_caller_saved_registers + and 32 GPRs since 128-byte red-zone is too small for 31 GPRs. + TODO: If we can reserve the first 2 WORDs, for PUSH and, another for CALL, in red-zone, we can allow local indirect jumps with indirect thunk. */ @@ -467,6 +471,9 @@ ix86_using_red_zone (void) { return (TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI + && (!TARGET_APX_EGPR + || (cfun->machine->call_saved_registers + != TYPE_NO_CALLER_SAVED_REGISTERS)) && (!cfun->machine->has_local_indirect_jump || cfun->machine->indirect_branch_type == indirect_branch_keep)); } @@ -21810,6 +21817,25 @@ ix86_insn_cost (rtx_insn *insn, bool speed) return insn_cost + pattern_cost (PATTERN (insn), speed); } +/* Return cost of SSE/AVX FP->FP conversion (extensions and truncates). */ + +static int +vec_fp_conversion_cost (const struct processor_costs *cost, int size) +{ + if (size < 128) + return cost->cvtss2sd; + else if (size < 256) + { + if (TARGET_SSE_SPLIT_REGS) + return cost->cvtss2sd * size / 64; + return cost->cvtss2sd; + } + if (size < 512) + return cost->vcvtps2pd256; + else + return cost->vcvtps2pd512; +} + /* Compute a (partial) cost for rtx X. Return true if the complete cost has been computed, and false if subexpressions should be scanned. In either case, *TOTAL contains the cost result. */ @@ -22473,17 +22499,18 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, return false; case FLOAT_EXTEND: + /* x87 represents all values extended to 80bit. */ if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) *total = 0; else - *total = ix86_vec_cost (mode, cost->addss); + *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode)); return false; case FLOAT_TRUNCATE: if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) *total = cost->fadd; else - *total = ix86_vec_cost (mode, cost->addss); + *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode)); return false; case ABS: @@ -23158,6 +23185,12 @@ x86_print_call_or_nop (FILE *file, const char *target) if (flag_nop_mcount || !strcmp (target, "nop")) /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); + else if (!TARGET_PECOFF && flag_pic) + { + gcc_assert (flag_plt); + + fprintf (file, "1:\tcall\t%s@PLT\n", target); + } else fprintf (file, "1:\tcall\t%s\n", target); } @@ -23321,7 +23354,7 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) break; case CM_SMALL_PIC: case CM_MEDIUM_PIC: - if (!ix86_direct_extern_access) + if (!flag_plt) { if (ASSEMBLER_DIALECT == ASM_INTEL) fprintf (file, "1:\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n", @@ -23352,7 +23385,9 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n", LPREFIX, labelno); #endif - if (ASSEMBLER_DIALECT == ASM_INTEL) + if (flag_plt) + x86_print_call_or_nop (file, mcount_name); + else if (ASSEMBLER_DIALECT == ASM_INTEL) fprintf (file, "1:\tcall\t[DWORD PTR %s@GOT[ebx]]\n", mcount_name); else fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name); @@ -24669,7 +24704,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, switch (type_of_cost) { case scalar_stmt: - return fp ? ix86_cost->addss : COSTS_N_INSNS (1); + return fp ? ix86_cost->addss : COSTS_N_INSNS (1); case scalar_load: /* load/store costs are relative to register move which is 2. Recompute @@ -24740,7 +24775,11 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, return ix86_cost->cond_not_taken_branch_cost; case vec_perm: + return ix86_vec_cost (mode, ix86_cost->sse_op); + case vec_promote_demote: + if (fp) + return vec_fp_conversion_cost (ix86_tune_cost, mode); return ix86_vec_cost (mode, ix86_cost->sse_op); case vec_construct: @@ -25261,7 +25300,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, else if (X87_FLOAT_MODE_P (mode)) stmt_cost = ix86_cost->fadd; else - stmt_cost = ix86_cost->add; + stmt_cost = ix86_cost->add; } else stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss @@ -25316,7 +25355,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, (subcode == RSHIFT_EXPR && !TYPE_UNSIGNED (TREE_TYPE (op1))) ? ASHIFTRT : LSHIFTRT, mode, - TREE_CODE (op2) == INTEGER_CST, + TREE_CODE (op2) == INTEGER_CST, cst_and_fits_in_hwi (op2) ? int_cst_value (op2) : -1, false, false, NULL, NULL); @@ -25325,22 +25364,102 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, case NOP_EXPR: /* Only sign-conversions are free. */ if (tree_nop_conversion_p - (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)), + (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)), TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) stmt_cost = 0; + else if (fp) + stmt_cost = vec_fp_conversion_cost + (ix86_tune_cost, GET_MODE_BITSIZE (mode)); + break; + + case COND_EXPR: + { + /* SSE2 conditinal move sequence is: + pcmpgtd %xmm5, %xmm0 + pand %xmm0, %xmm2 + pandn %xmm1, %xmm0 + por %xmm2, %xmm0 + while SSE4 uses cmp + blend + and AVX512 masked moves. */ + + int ninsns = TARGET_SSE4_1 ? 2 : 4; + + if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + stmt_cost = ninsns * ix86_cost->sse_op; + else if (X87_FLOAT_MODE_P (mode)) + /* x87 requires conditional branch. We don't have cost for + that. */ + ; + else if (VECTOR_MODE_P (mode)) + stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op); + else + /* compare + cmov. */ + stmt_cost = ix86_cost->add * 2; + } break; - case BIT_IOR_EXPR: - case ABS_EXPR: - case ABSU_EXPR: case MIN_EXPR: case MAX_EXPR: + if (fp) + { + if (X87_FLOAT_MODE_P (mode)) + /* x87 requires conditional branch. We don't have cost for + that. */ + ; + else + /* minss */ + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + } + else + { + if (VECTOR_MODE_P (mode)) + { + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + /* vpmin was introduced in SSE3. + SSE2 needs pcmpgtd + pand + pandn + pxor. */ + if (!TARGET_SSSE3) + stmt_cost *= 4; + } + else + /* cmp + cmov. */ + stmt_cost = ix86_cost->add * 2; + } + break; + + case ABS_EXPR: + case ABSU_EXPR: + if (fp) + { + if (X87_FLOAT_MODE_P (mode)) + /* fabs. */ + stmt_cost = ix86_cost->fabs; + else + /* andss of sign bit. */ + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + } + else + { + if (VECTOR_MODE_P (mode)) + { + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + /* vabs was introduced in SSE3. + SSE3 uses psrat + pxor + psub. */ + if (!TARGET_SSSE3) + stmt_cost *= 3; + } + else + /* neg + cmov. */ + stmt_cost = ix86_cost->add * 2; + } + break; + + case BIT_IOR_EXPR: case BIT_XOR_EXPR: case BIT_AND_EXPR: case BIT_NOT_EXPR: - if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) - stmt_cost = ix86_cost->sse_op; - else if (VECTOR_MODE_P (mode)) + gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode) + && !X87_FLOAT_MODE_P (mode)); + if (VECTOR_MODE_P (mode)) stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); else stmt_cost = ix86_cost->add; @@ -25369,6 +25488,29 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, break; } + if (kind == vec_promote_demote + && fp && FLOAT_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) + { + int outer_size + = tree_to_uhwi + (TYPE_SIZE + (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)))); + int inner_size + = tree_to_uhwi + (TYPE_SIZE + (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))); + int stmt_cost = vec_fp_conversion_cost + (ix86_tune_cost, GET_MODE_BITSIZE (mode)); + /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will end + up doing two conversions and packing them. */ + if (inner_size > outer_size) + { + int n = inner_size / outer_size; + stmt_cost = stmt_cost * n + + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op); + } + } + /* If we do elementwise loads into a vector then we are bound by latency and execution resources for the many scalar loads (AGU and load ports). Try to account for this by scaling the diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 8507243..18aa42d 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -207,6 +207,12 @@ struct processor_costs { const int divsd; /* cost of DIVSD instructions. */ const int sqrtss; /* cost of SQRTSS instructions. */ const int sqrtsd; /* cost of SQRTSD instructions. */ + const int cvtss2sd; /* cost SSE FP conversions, + such as CVTSS2SD. */ + const int vcvtps2pd256; /* cost 256bit packed FP conversions, + such as VCVTPD2PS with larger reg in ymm. */ + const int vcvtps2pd512; /* cost 512bit packed FP conversions, + such as VCVTPD2PS with larger reg in zmm. */ const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp; /* Specify reassociation width for integer, fp, vector integer and vector fp diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d6b2f29..e170da3 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -26592,8 +26592,8 @@ [(set (match_operand:X87MODEF 0 "register_operand") (if_then_else:X87MODEF (match_operand 1 "comparison_operator") - (match_operand:X87MODEF 2 "register_operand") - (match_operand:X87MODEF 3 "register_operand")))] + (match_operand:X87MODEF 2 "nonimm_or_0_or_1s_operand") + (match_operand:X87MODEF 3 "nonimm_or_0_operand")))] "(TARGET_80387 && TARGET_CMOVE) || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;") diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 3d3848c..4b23e18 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1267,6 +1267,14 @@ (match_operand 0 "vector_memory_operand") (match_code "const_vector"))) +; Return true when OP is register_operand, vector_memory_operand, +; const_vector zero or const_vector all ones. +(define_predicate "vector_or_0_or_1s_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "vector_memory_operand") + (match_operand 0 "const0_operand") + (match_operand 0 "int_float_vector_all_ones_operand"))) + (define_predicate "bcst_mem_operand" (and (match_code "vec_duplicate") (and (match_test "TARGET_AVX512F") @@ -1333,6 +1341,12 @@ (ior (match_operand 0 "nonimmediate_operand") (match_operand 0 "const0_operand"))) +; Return true when OP is a nonimmediate or zero or all ones. +(define_predicate "nonimm_or_0_or_1s_operand" + (ior (match_operand 0 "nonimmediate_operand") + (match_operand 0 "const0_operand") + (match_operand 0 "int_float_vector_all_ones_operand"))) + ;; Return true for RTX codes that force SImode address. (define_predicate "SImode_address_operand" (match_code "subreg,zero_extend,and")) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b280676..20b35a1 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -5142,7 +5142,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VI_256_AVX2 0 "register_operand") (vec_merge:VI_256_AVX2 - (match_operand:VI_256_AVX2 1 "nonimmediate_operand") + (match_operand:VI_256_AVX2 1 "nonimm_or_0_or_1s_operand") (match_operand:VI_256_AVX2 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_AVX" @@ -5155,7 +5155,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VI_128 0 "register_operand") (vec_merge:VI_128 - (match_operand:VI_128 1 "vector_operand") + (match_operand:VI_128 1 "vector_or_0_or_1s_operand") (match_operand:VI_128 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_SSE2" @@ -5168,7 +5168,7 @@ (define_expand "vcond_mask_v1tiv1ti" [(set (match_operand:V1TI 0 "register_operand") (vec_merge:V1TI - (match_operand:V1TI 1 "vector_operand") + (match_operand:V1TI 1 "vector_or_0_or_1s_operand") (match_operand:V1TI 2 "nonimm_or_0_operand") (match_operand:V1TI 3 "register_operand")))] "TARGET_SSE2" @@ -5181,7 +5181,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VF_256 0 "register_operand") (vec_merge:VF_256 - (match_operand:VF_256 1 "nonimmediate_operand") + (match_operand:VF_256 1 "nonimm_or_0_or_1s_operand") (match_operand:VF_256 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_AVX" @@ -5194,7 +5194,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VF_128 0 "register_operand") (vec_merge:VF_128 - (match_operand:VF_128 1 "vector_operand") + (match_operand:VF_128 1 "vector_or_0_or_1s_operand") (match_operand:VF_128 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_SSE" diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 7c8cb73..cddcf61 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -121,16 +121,19 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ COSTS_N_BYTES (2), /* cost of FCHS instruction. */ COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ - COSTS_N_BYTES (2), /* cost of cheap SSE instruction. */ - COSTS_N_BYTES (2), /* cost of ADDSS/SD SUBSS/SD insns. */ - COSTS_N_BYTES (2), /* cost of MULSS instruction. */ - COSTS_N_BYTES (2), /* cost of MULSD instruction. */ - COSTS_N_BYTES (2), /* cost of FMA SS instruction. */ - COSTS_N_BYTES (2), /* cost of FMA SD instruction. */ - COSTS_N_BYTES (2), /* cost of DIVSS instruction. */ - COSTS_N_BYTES (2), /* cost of DIVSD instruction. */ - COSTS_N_BYTES (2), /* cost of SQRTSS instruction. */ - COSTS_N_BYTES (2), /* cost of SQRTSD instruction. */ + COSTS_N_BYTES (4), /* cost of cheap SSE instruction. */ + COSTS_N_BYTES (4), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_BYTES (4), /* cost of MULSS instruction. */ + COSTS_N_BYTES (4), /* cost of MULSD instruction. */ + COSTS_N_BYTES (4), /* cost of FMA SS instruction. */ + COSTS_N_BYTES (4), /* cost of FMA SD instruction. */ + COSTS_N_BYTES (4), /* cost of DIVSS instruction. */ + COSTS_N_BYTES (4), /* cost of DIVSD instruction. */ + COSTS_N_BYTES (4), /* cost of SQRTSS instruction. */ + COSTS_N_BYTES (4), /* cost of SQRTSD instruction. */ + COSTS_N_BYTES (4), /* cost of CVTSS2SD etc. */ + COSTS_N_BYTES (4), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_BYTES (6), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ ix86_size_memcpy, ix86_size_memset, @@ -243,6 +246,9 @@ struct processor_costs i386_cost = { /* 386 specific costs */ COSTS_N_INSNS (88), /* cost of DIVSD instruction. */ COSTS_N_INSNS (122), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (122), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (27), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (54), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (108), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i386_memcpy, i386_memset, @@ -356,6 +362,9 @@ struct processor_costs i486_cost = { /* 486 specific costs */ COSTS_N_INSNS (74), /* cost of DIVSD instruction. */ COSTS_N_INSNS (83), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (83), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i486_memcpy, i486_memset, @@ -467,6 +476,9 @@ struct processor_costs pentium_cost = { COSTS_N_INSNS (39), /* cost of DIVSD instruction. */ COSTS_N_INSNS (70), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (70), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -571,6 +583,9 @@ struct processor_costs lakemont_cost = { COSTS_N_INSNS (60), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (5), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (10), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (20), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -690,6 +705,9 @@ struct processor_costs pentiumpro_cost = { COSTS_N_INSNS (18), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentiumpro_memcpy, pentiumpro_memset, @@ -800,6 +818,9 @@ struct processor_costs geode_cost = { COSTS_N_INSNS (47), /* cost of DIVSD instruction. */ COSTS_N_INSNS (54), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (54), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ geode_memcpy, geode_memset, @@ -913,6 +934,9 @@ struct processor_costs k6_cost = { COSTS_N_INSNS (56), /* cost of DIVSD instruction. */ COSTS_N_INSNS (56), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (56), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (8), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k6_memcpy, k6_memset, @@ -1027,6 +1051,9 @@ struct processor_costs athlon_cost = { COSTS_N_INSNS (24), /* cost of DIVSD instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ athlon_memcpy, athlon_memset, @@ -1150,6 +1177,9 @@ struct processor_costs k8_cost = { COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k8_memcpy, k8_memset, @@ -1281,6 +1311,9 @@ struct processor_costs amdfam10_cost = { COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ amdfam10_memcpy, amdfam10_memset, @@ -1405,6 +1438,9 @@ const struct processor_costs bdver_cost = { COSTS_N_INSNS (27), /* cost of DIVSD instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver_memcpy, bdver_memset, @@ -1553,6 +1589,10 @@ struct processor_costs znver1_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + /* Real latency is 4, but for split regs multiply cost of half op by 2. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests @@ -1712,6 +1752,9 @@ struct processor_costs znver2_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -1847,6 +1890,9 @@ struct processor_costs znver3_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -1984,6 +2030,10 @@ struct processor_costs znver4_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + /* Real latency is 6, but for split regs multiply cost of half op by 2. */ + COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -2120,7 +2170,7 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ /* ADDSS has throughput 2 and latency 2 (in some cases when source is another addition). */ - COSTS_N_INSNS (2), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ /* MULSS has throughput 2 and latency 3. */ COSTS_N_INSNS (3), /* cost of MULSS instruction. */ COSTS_N_INSNS (3), /* cost of MULSD instruction. */ @@ -2135,6 +2185,9 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ /* DIVSD has throughtput 0.13 and latency 20. */ COSTS_N_INSNS (20), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */ /* Zen5 can execute: - integer ops: 6 per cycle, at most 3 multiplications. latency 1 for additions, 3 for multiplications (pipelined) @@ -2274,6 +2327,9 @@ struct processor_costs skylake_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (4), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ skylake_memcpy, skylake_memset, @@ -2403,6 +2459,9 @@ struct processor_costs icelake_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ icelake_memcpy, icelake_memset, @@ -2526,6 +2585,9 @@ struct processor_costs alderlake_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ alderlake_memcpy, alderlake_memset, @@ -2642,6 +2704,9 @@ const struct processor_costs btver1_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (48), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver1_memcpy, btver1_memset, @@ -2755,6 +2820,9 @@ const struct processor_costs btver2_cost = { COSTS_N_INSNS (19), /* cost of DIVSD instruction. */ COSTS_N_INSNS (16), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver2_memcpy, btver2_memset, @@ -2867,6 +2935,9 @@ struct processor_costs pentium4_cost = { COSTS_N_INSNS (38), /* cost of DIVSD instruction. */ COSTS_N_INSNS (23), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (38), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium4_memcpy, pentium4_memset, @@ -2982,6 +3053,9 @@ struct processor_costs nocona_cost = { COSTS_N_INSNS (40), /* cost of DIVSD instruction. */ COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (41), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ nocona_memcpy, nocona_memset, @@ -3095,6 +3169,9 @@ struct processor_costs atom_cost = { COSTS_N_INSNS (60), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */ 2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ atom_memcpy, atom_memset, @@ -3208,6 +3285,9 @@ struct processor_costs slm_cost = { COSTS_N_INSNS (69), /* cost of DIVSD instruction. */ COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ slm_memcpy, slm_memset, @@ -3335,6 +3415,9 @@ struct processor_costs tremont_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ tremont_memcpy, tremont_memset, @@ -3448,6 +3531,9 @@ struct processor_costs intel_cost = { COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ intel_memcpy, intel_memset, @@ -3566,6 +3652,9 @@ struct processor_costs lujiazui_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (60), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ lujiazui_memcpy, lujiazui_memset, @@ -3682,6 +3771,9 @@ struct processor_costs yongfeng_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ yongfeng_memcpy, yongfeng_memset, @@ -3798,6 +3890,9 @@ struct processor_costs shijidadao_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (11), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ shijidadao_memcpy, shijidadao_memset, @@ -3922,6 +4017,9 @@ struct processor_costs generic_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ generic_memcpy, generic_memset, @@ -4051,6 +4149,9 @@ struct processor_costs core_cost = { COSTS_N_INSNS (32), /* cost of DIVSD instruction. */ COSTS_N_INSNS (30), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (58), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ core_memcpy, core_memset, diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc index 685a83c..15d3d91 100644 --- a/gcc/config/i386/x86-tune-sched.cc +++ b/gcc/config/i386/x86-tune-sched.cc @@ -81,6 +81,14 @@ ix86_issue_rate (void) case PROCESSOR_YONGFENG: case PROCESSOR_SHIJIDADAO: case PROCESSOR_GENERIC: + /* For znver5 decoder can handle 4 or 8 instructions per cycle, + op cache 12 instruction/cycle, dispatch 8 instructions + integer rename 8 instructions and Fp 6 instructions. + + The scheduler, without understanding out of order nature of the CPU + is not going to be able to use more than 4 instructions since that + is limits of the decoders. */ + case PROCESSOR_ZNVER5: return 4; case PROCESSOR_ICELAKE_CLIENT: @@ -91,13 +99,6 @@ ix86_issue_rate (void) return 5; case PROCESSOR_SAPPHIRERAPIDS: - /* For znver5 decoder can handle 4 or 8 instructions per cycle, - op cache 12 instruction/cycle, dispatch 8 instructions - integer rename 8 instructions and Fp 6 instructions. - - The scheduler, without understanding out of order nature of the CPU - is unlikely going to be able to fill all of these. */ - case PROCESSOR_ZNVER5: return 6; default: diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc index 24a28dc..0d3d026 100644 --- a/gcc/config/mips/mips.cc +++ b/gcc/config/mips/mips.cc @@ -20678,6 +20678,9 @@ mips_option_override (void) "-mcompact-branches=never"); } + if (is_micromips && TARGET_MSA) + error ("unsupported combination: %s", "-mmicromips -mmsa"); + /* Require explicit relocs for MIPS R6 onwards. This enables simplification of the compact branch and jump support through the backend. */ if (!TARGET_EXPLICIT_RELOCS && mips_isa_rev >= 6) diff --git a/gcc/config/nvptx/mkoffload.cc b/gcc/config/nvptx/mkoffload.cc index bdfe7f5..e7ec0ef 100644 --- a/gcc/config/nvptx/mkoffload.cc +++ b/gcc/config/nvptx/mkoffload.cc @@ -778,6 +778,9 @@ main (int argc, char **argv) } if (fopenmp) obstack_ptr_grow (&argv_obstack, "-mgomp"); + /* The host code may contain exception handling constructs. + Handle these as good as we can. */ + obstack_ptr_grow (&argv_obstack, "-mfake-exceptions"); for (int ix = 1; ix != argc; ix++) { diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index 87364bf..f893971 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -2359,7 +2359,25 @@ nvptx_assemble_integer (rtx x, unsigned int size, int ARG_UNUSED (aligned_p)) { gcc_checking_assert (!init_frag.active); /* Just use the default machinery; it's not getting used, anyway. */ - return default_assemble_integer (x, size, aligned_p); + bool ok = default_assemble_integer (x, size, aligned_p); + /* ..., but a few cases need special handling. */ + switch (GET_CODE (x)) + { + case SYMBOL_REF: + /* The default machinery won't work: we don't define the necessary + operations; don't use them outside of this. */ + gcc_checking_assert (!ok); + { + /* Just emit something; it's not getting used, anyway. */ + const char *op = "\t.symbol_ref\t"; + ok = (assemble_integer_with_op (op, x), true); + } + break; + + default: + break; + } + return ok; } gcc_checking_assert (init_frag.active); @@ -7771,6 +7789,18 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name, #endif cgraph_node *cnode = cgraph_node::get (name); +#ifdef ACCEL_COMPILER + /* For nvptx offloading, make sure to emit C++ constructor, destructor aliases [PR97106] + + For some reason (yet to be analyzed), they're not 'cnode->referred_to_p ()'. + (..., or that's not the right approach at all; + <https://inbox.sourceware.org/87v7rx8lbx.fsf@euler.schwinge.ddns.net> + "Re: [committed][nvptx] Use .alias directive for mptx >= 6.3"). */ + if (DECL_CXX_CONSTRUCTOR_P (name) + || DECL_CXX_DESTRUCTOR_P (name)) + ; + else +#endif if (!cnode->referred_to_p ()) /* Prevent "Internal error: reference to deleted section". */ return; @@ -7875,8 +7905,6 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name, #define TARGET_ASM_DECLARE_CONSTANT_NAME nvptx_asm_declare_constant_name #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true -#undef TARGET_ASM_NEED_VAR_DECL_BEFORE_USE -#define TARGET_ASM_NEED_VAR_DECL_BEFORE_USE true #undef TARGET_MACHINE_DEPENDENT_REORG #define TARGET_MACHINE_DEPENDENT_REORG nvptx_reorg diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 3201247..7c3bd69 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -1644,7 +1644,9 @@ [(const_int 0)] "" { - sorry ("exception handling not supported"); + if (!fake_exceptions) + sorry ("exception handling not supported"); + DONE; }) (define_expand "nonlocal_goto" diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt index 9be81ae..ce9fbc7 100644 --- a/gcc/config/nvptx/nvptx.opt +++ b/gcc/config/nvptx/nvptx.opt @@ -168,6 +168,14 @@ Target Var(nvptx_alias) Init(0) Undocumented mexperimental Target Var(nvptx_experimental) Init(0) Undocumented +mfake-exceptions +Target Var(fake_exceptions) Init(0) Undocumented +; With '-mfake-exceptions' enabled, the user-visible behavior in presence of +; exception handling constructs changes such that the compile-time +; 'sorry, unimplemented: exception handling not supported' is skipped, code +; generation proceeds, and instead, exception handling constructs 'abort' at +; run time. (..., or don't, if they're in dead code.) + mfake-ptx-alloca Target Var(nvptx_fake_ptx_alloca) Init(0) Undocumented ; With '-mfake-ptx-alloca' enabled, the user-visible behavior changes only diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 5ed5e18..d0919ec 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -908,6 +908,24 @@ "bext\t%0,%1,%2" [(set_attr "type" "bitmanip")]) +;; We do not define SHIFT_COUNT_TRUNCATED, so we have to have variants +;; that mask/extend the count if we want to eliminate those ops +;; +;; We could (in theory) use GPR for the various modes, but I haven't +;; seen those cases appear in practice. Without a testcase I've +;; elected to keep the modes X which is easy to reason about. +(define_insn "*bext<mode>_mask_pos" + [(set (match_operand:X 0 "register_operand" "=r") + (zero_extract:X (match_operand:X 1 "register_operand" "r") + (const_int 1) + (and:X + (match_operand:X 2 "register_operand" "r") + (match_operand 3 "const_int_operand"))))] + "(TARGET_ZBS + && INTVAL (operands[3]) + 1 == GET_MODE_BITSIZE (<MODE>mode))" + "bext\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + ;; This is a bext followed by a seqz. Normally this would be a 3->2 split ;; But the and-not pattern with a constant operand is a define_insn_and_split, ;; so this looks like a 2->2 split, which combine rejects. So implement it @@ -1245,3 +1263,41 @@ expand_crc_using_clmul (<SUBX:MODE>mode, <SUBX1:MODE>mode, operands); DONE; }) + +;; If we have an XOR/IOR with a constant operand (C) and the we can +;; synthesize ~C more efficiently than C, then synthesize ~C and use +;; xnor/orn instead. +;; +;; The same can be done for AND, but mvconst_internal's issues get in +;; the way. That's future work. +(define_split + [(set (match_operand:X 0 "register_operand") + (any_or:X (match_operand:X 1 "register_operand") + (match_operand:X 2 "const_int_operand"))) + (clobber (match_operand:X 3 "register_operand"))] + "TARGET_ZBB + && (riscv_const_insns (operands[2], true) + > riscv_const_insns (GEN_INT (~INTVAL (operands[2])), true))" + [(const_int 0)] +{ + /* Get the inverted constant into the temporary register. */ + riscv_emit_move (operands[3], GEN_INT (~INTVAL (operands[2]))); + + /* For xnor, the NOT operation is in a different position. So + we have to customize the split code we generate a bit. + + It is expected that AND will be handled like IOR in the future. */ + if (<CODE> == XOR) + { + rtx x = gen_rtx_XOR (<X:MODE>mode, operands[1], operands[3]); + x = gen_rtx_NOT (<X:MODE>mode, x); + emit_insn (gen_rtx_SET (operands[0], x)); + } + else + { + rtx x = gen_rtx_NOT (<X:MODE>mode, operands[3]); + x = gen_rtx_IOR (<X:MODE>mode, x, operands[1]); + emit_insn (gen_rtx_SET (operands[0], x)); + } + DONE; +}) diff --git a/gcc/config/riscv/gnu.h b/gcc/config/riscv/gnu.h new file mode 100644 index 0000000..047399b --- /dev/null +++ b/gcc/config/riscv/gnu.h @@ -0,0 +1,59 @@ +/* Definitions for RISC-V GNU/Hurd systems with ELF format. + Copyright (C) 1998-2025 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + GNU_USER_TARGET_OS_CPP_BUILTINS(); \ + } while (0) + +#define GNU_USER_DYNAMIC_LINKER "/lib/ld-riscv" XLEN_SPEC "-" ABI_SPEC ".so.1" + +#define ICACHE_FLUSH_FUNC "__riscv_flush_icache" + +#define CPP_SPEC "%{pthread:-D_REENTRANT}" + +#define LD_EMUL_SUFFIX \ + "%{mabi=lp64d:}" \ + "%{mabi=lp64f:_lp64f}" \ + "%{mabi=lp64:_lp64}" \ + "%{mabi=ilp32d:}" \ + "%{mabi=ilp32f:_ilp32f}" \ + "%{mabi=ilp32:_ilp32}" + +#define LINK_SPEC "\ +-melf" XLEN_SPEC DEFAULT_ENDIAN_SPEC "riscv" LD_EMUL_SUFFIX " \ +%{mno-relax:--no-relax} \ +-X \ +%{mbig-endian:-EB} \ +%{mlittle-endian:-EL} \ +%{shared} \ + %{!shared: \ + %{!static: \ + %{!static-pie: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} \ + %{static:-static} %{static-pie:-static -pie --no-dynamic-linker -z text}}" + +#define STARTFILE_PREFIX_SPEC \ + "/lib" XLEN_SPEC "/" ABI_SPEC "/ " \ + "/usr/lib" XLEN_SPEC "/" ABI_SPEC "/ " \ + "/lib/ " \ + "/usr/lib/ " + +#define RISCV_USE_CUSTOMISED_MULTI_LIB select_by_abi diff --git a/gcc/config/riscv/multilib-generator b/gcc/config/riscv/multilib-generator index 4828016..6ad1cf0 100755 --- a/gcc/config/riscv/multilib-generator +++ b/gcc/config/riscv/multilib-generator @@ -159,8 +159,8 @@ for cmodel in cmodels: "e.g. rv32imafd-ilp32--" % cfg) sys.exit(1) - # Compact code model only support rv64. - if cmodel == "compact" and arch.startswith("rv32"): + # Large code model only support rv64. + if cmodel == "large" and arch.startswith("rv32"): continue arch = arch_canonicalize (arch, args.misa_spec) diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def index 2918496..e31afc3 100644 --- a/gcc/config/riscv/riscv-cores.def +++ b/gcc/config/riscv/riscv-cores.def @@ -41,6 +41,12 @@ RISCV_TUNE("sifive-p400-series", sifive_p400, sifive_p400_tune_info) RISCV_TUNE("sifive-p600-series", sifive_p600, sifive_p600_tune_info) RISCV_TUNE("tt-ascalon-d8", generic_ooo, tt_ascalon_d8_tune_info) RISCV_TUNE("thead-c906", generic, thead_c906_tune_info) +RISCV_TUNE("xt-c908", generic, generic_ooo_tune_info) +RISCV_TUNE("xt-c908v", generic, generic_ooo_tune_info) +RISCV_TUNE("xt-c910", generic, generic_ooo_tune_info) +RISCV_TUNE("xt-c910v2", generic, generic_ooo_tune_info) +RISCV_TUNE("xt-c920", generic, generic_ooo_tune_info) +RISCV_TUNE("xt-c920v2", generic, generic_ooo_tune_info) RISCV_TUNE("xiangshan-nanhu", xiangshan, xiangshan_nanhu_tune_info) RISCV_TUNE("generic-ooo", generic_ooo, generic_ooo_tune_info) RISCV_TUNE("size", generic, optimize_size_tune_info) @@ -93,6 +99,48 @@ RISCV_CORE("thead-c906", "rv64imafdc_xtheadba_xtheadbb_xtheadbs_xtheadcmo_" "xtheadmemidx_xtheadmempair_xtheadsync", "thead-c906") +RISCV_CORE("xt-c908", "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicsr_" + "zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_" + "sstc_svinval_svnapot_svpbmt_xtheadba_xtheadbb_" + "xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_" + "xtheadmac_xtheadmemidx_xtheadmempair_xtheadsync", + "xt-c908") +RISCV_CORE("xt-c908v", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicsr_" + "zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_" + "zvfh_sstc_svinval_svnapot_svpbmt__xtheadba_" + "xtheadbb_xtheadbs_xtheadcmo_xtheadcondmov_" + "xtheadfmemidx_xtheadmac_xtheadmemidx_" + "xtheadmempair_xtheadsync_xtheadvdot", + "xt-c908") +RISCV_CORE("xt-c910", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_" + "xtheadba_xtheadbb_xtheadbs_xtheadcmo_" + "xtheadcondmov_xtheadfmemidx_xtheadmac_" + "xtheadmemidx_xtheadmempair_xtheadsync", + "xt-c910") +RISCV_CORE("xt-c910v2", "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicond_" + "zicsr_zifencei _zihintntl_zihintpause_zihpm_" + "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_" + "zbs_sscofpmf_sstc_svinval_svnapot_svpbmt_" + "xtheadba_xtheadbb_xtheadbs_xtheadcmo_" + "xtheadcondmov_xtheadfmemidx_xtheadmac_" + "xtheadmemidx_xtheadmempair_xtheadsync", + "xt-c910v2") +RISCV_CORE("xt-c920", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_" + "xtheadba_xtheadbb_xtheadbs_xtheadcmo_" + "xtheadcondmov_xtheadfmemidx_xtheadmac_" + "xtheadmemidx_xtheadmempair_xtheadsync_" + "xtheadvector", + "xt-c910") +RISCV_CORE("xt-c920v2", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_" + "zicsr_zifencei _zihintntl_zihintpause_zihpm_" + "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_" + "zbs_zvfbfmin_zvfbfwma_zvfh_sscofpmf_sstc_" + "svinval_svnapot_svpbmt_xtheadba_xtheadbb_" + "xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_" + "xtheadmac_xtheadmemidx_xtheadmempair_" + "xtheadsync_xtheadvdot", + "xt-c920v2") + RISCV_CORE("tt-ascalon-d8", "rv64imafdcv_zic64b_zicbom_zicbop_zicboz_" "ziccamoa_ziccif_zicclsm_ziccrse_zicond_zicsr_" "zifencei_zihintntl_zihintpause_zimop_za64rs_" diff --git a/gcc/config/riscv/riscv-target-attr.cc b/gcc/config/riscv/riscv-target-attr.cc index 1d96865..8ad3025 100644 --- a/gcc/config/riscv/riscv-target-attr.cc +++ b/gcc/config/riscv/riscv-target-attr.cc @@ -257,11 +257,7 @@ riscv_target_attr_parser::update_settings (struct gcc_options *opts) const { std::string local_arch = m_subset_list->to_string (true); const char* local_arch_str = local_arch.c_str (); - struct cl_target_option *default_opts - = TREE_TARGET_OPTION (target_option_default_node); - if (opts->x_riscv_arch_string != default_opts->x_riscv_arch_string) - free (CONST_CAST (void *, (const void *) opts->x_riscv_arch_string)); - opts->x_riscv_arch_string = xstrdup (local_arch_str); + opts->x_riscv_arch_string = ggc_strdup (local_arch_str); riscv_set_arch_by_subset_list (m_subset_list, opts); } diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 0ac2538..a8c9256 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -685,7 +685,7 @@ invalid_opt_bb_p (basic_block cfg_bb) /* We only do LCM optimizations on blocks that are post dominated by EXIT block, that is, we don't do LCM optimizations on infinite loop. */ FOR_EACH_EDGE (e, ei, cfg_bb->succs) - if (e->flags & EDGE_FAKE) + if ((e->flags & EDGE_FAKE) || (e->flags & EDGE_ABNORMAL)) return true; return false; @@ -2698,6 +2698,7 @@ pre_vsetvl::compute_lcm_local_properties () m_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun)); + bitmap_vector_clear (m_kill, last_basic_block_for_fn (cfun)); bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun)); bitmap_vector_ones (m_transp, last_basic_block_for_fn (cfun)); @@ -2749,6 +2750,10 @@ pre_vsetvl::compute_lcm_local_properties () if (invalid_opt_bb_p (bb->cfg_bb ())) { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\n --- skipping bb %u due to weird edge", + bb->index ()); + bitmap_clear (m_antloc[bb_index]); bitmap_clear (m_transp[bb_index]); } @@ -3022,6 +3027,18 @@ pre_vsetvl::earliest_fuse_vsetvl_info (int iter) continue; } + /* We cannot lift a vsetvl into the source block if the block is + not transparent WRT to it. + This is too restrictive for blocks where a register's use only + feeds into vsetvls and no regular insns. One example is the + test rvv/vsetvl/avl_single-68.c which is currently XFAILed for + that reason. + In order to support this case we'd need to check the vsetvl's + AVL operand's uses in the source block and make sure they are + only used in other vsetvls. */ + if (!bitmap_bit_p (m_transp[eg->src->index], expr_index)) + continue; + if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 38f3ae7..bad59e2 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -10382,7 +10382,7 @@ riscv_file_end () fprintf (asm_out_file, "1:\n"); /* pr_type. */ - fprintf (asm_out_file, "\t.p2align\t3\n"); + fprintf (asm_out_file, "\t.p2align\t%u\n", p2align); fprintf (asm_out_file, "2:\n"); fprintf (asm_out_file, "\t.long\t0xc0000000\n"); /* pr_datasz. */ @@ -13136,9 +13136,6 @@ parse_features_for_version (tree decl, DECL_SOURCE_LOCATION (decl)); gcc_assert (parse_res); - if (arch_string != default_opts->x_riscv_arch_string) - free (CONST_CAST (void *, (const void *) arch_string)); - cl_target_option_restore (&global_options, &global_options_set, &cur_target); } diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 2bcabd0..2759a4c 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -888,7 +888,7 @@ extern enum riscv_cc get_riscv_cc (const rtx use); #define ASM_OUTPUT_OPCODE(STREAM, PTR) \ (PTR) = riscv_asm_output_opcode(STREAM, PTR) -#define JUMP_TABLES_IN_TEXT_SECTION 0 +#define JUMP_TABLES_IN_TEXT_SECTION (riscv_cmodel == CM_LARGE) #define CASE_VECTOR_MODE SImode #define CASE_VECTOR_PC_RELATIVE (riscv_cmodel != CM_MEDLOW) diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 26a247c..eec9687 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -789,7 +789,7 @@ rtx t5 = gen_reg_rtx (DImode); rtx t6 = gen_reg_rtx (DImode); - riscv_emit_binary (PLUS, operands[0], operands[1], operands[2]); + emit_insn (gen_addsi3_extended (t6, operands[1], operands[2])); if (GET_CODE (operands[1]) != CONST_INT) emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); else @@ -799,7 +799,10 @@ else t5 = operands[2]; emit_insn (gen_adddi3 (t3, t4, t5)); - emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); + rtx t7 = gen_lowpart (SImode, t6); + SUBREG_PROMOTED_VAR_P (t7) = 1; + SUBREG_PROMOTED_SET (t7, SRP_SIGNED); + emit_move_insn (operands[0], t7); riscv_expand_conditional_branch (operands[3], NE, t6, t3); } @@ -835,8 +838,11 @@ emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); else t3 = operands[1]; - riscv_emit_binary (PLUS, operands[0], operands[1], operands[2]); - emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0)); + emit_insn (gen_addsi3_extended (t4, operands[1], operands[2])); + rtx t5 = gen_lowpart (SImode, t4); + SUBREG_PROMOTED_VAR_P (t5) = 1; + SUBREG_PROMOTED_SET (t5, SRP_SIGNED); + emit_move_insn (operands[0], t5); riscv_expand_conditional_branch (operands[3], LTU, t4, t3); } @@ -966,7 +972,7 @@ rtx t5 = gen_reg_rtx (DImode); rtx t6 = gen_reg_rtx (DImode); - riscv_emit_binary (MINUS, operands[0], operands[1], operands[2]); + emit_insn (gen_subsi3_extended (t6, operands[1], operands[2])); if (GET_CODE (operands[1]) != CONST_INT) emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); else @@ -976,7 +982,10 @@ else t5 = operands[2]; emit_insn (gen_subdi3 (t3, t4, t5)); - emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); + rtx t7 = gen_lowpart (SImode, t6); + SUBREG_PROMOTED_VAR_P (t7) = 1; + SUBREG_PROMOTED_SET (t7, SRP_SIGNED); + emit_move_insn (operands[0], t7); riscv_expand_conditional_branch (operands[3], NE, t6, t3); } @@ -1015,8 +1024,11 @@ emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); else t3 = operands[1]; - riscv_emit_binary (MINUS, operands[0], operands[1], operands[2]); - emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0)); + emit_insn (gen_subsi3_extended (t4, operands[1], operands[2])); + rtx t5 = gen_lowpart (SImode, t4); + SUBREG_PROMOTED_VAR_P (t5) = 1; + SUBREG_PROMOTED_SET (t5, SRP_SIGNED); + emit_move_insn (operands[0], t5); riscv_expand_conditional_branch (operands[3], LTU, t3, t4); } diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 51eb64f..3ab4d76 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -2136,18 +2136,34 @@ (match_operand 7 "const_int_operand") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (vec_duplicate:V_VLS - (match_operand:<VEL> 3 "direct_broadcast_operand")) + ;; (vec_duplicate:V_VLS ;; wrapper activated by wrap_vec_dup below. + (match_operand:<VEL> 3 "direct_broadcast_operand") ;; ) (match_operand:V_VLS 2 "vector_merge_operand")))] "TARGET_VECTOR" { /* Transform vmv.v.x/vfmv.v.f (avl = 1) into vmv.s.x since vmv.s.x/vfmv.s.f has better chances to do vsetvl fusion in vsetvl pass. */ + bool wrap_vec_dup = true; + rtx vec_cst = NULL_RTX; if (riscv_vector::splat_to_scalar_move_p (operands)) { operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode); operands[3] = force_reg (<VEL>mode, operands[3]); } + else if (immediate_operand (operands[3], <VEL>mode) + && (vec_cst = gen_const_vec_duplicate (<MODE>mode, operands[3])) + && (/* -> pred_broadcast<mode>_zero */ + (vector_least_significant_set_mask_operand (operands[1], + <VM>mode) + && vector_const_0_operand (vec_cst, <MODE>mode)) + || (/* pred_broadcast<mode>_imm */ + vector_all_trues_mask_operand (operands[1], <VM>mode) + && vector_const_int_or_double_0_operand (vec_cst, + <MODE>mode)))) + { + operands[3] = vec_cst; + wrap_vec_dup = false; + } /* Handle vmv.s.x instruction (Wb1 mask) which has memory scalar. */ else if (satisfies_constraint_Wdm (operands[3])) { @@ -2191,6 +2207,8 @@ ; else operands[3] = force_reg (<VEL>mode, operands[3]); + if (wrap_vec_dup) + operands[3] = gen_rtx_VEC_DUPLICATE (<MODE>mode, operands[3]); }) (define_insn_and_split "*pred_broadcast<mode>" diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 737c3d6..12dbde2 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -25765,10 +25765,13 @@ rs6000_can_inline_p (tree caller, tree callee) } } - /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining - purposes. */ - callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION); - explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION); + /* Ignore -mpower8-fusion, -mpower10-fusion and -msave-toc-indirect options + for inlining purposes. */ + HOST_WIDE_INT ignored_isas = (OPTION_MASK_P8_FUSION + | OPTION_MASK_P10_FUSION + | OPTION_MASK_SAVE_TOC_INDIRECT); + callee_isa &= ~ignored_isas; + explicit_isa &= ~ignored_isas; /* The callee's options must be a subset of the caller's options, i.e. a vsx function may inline an altivec function, but a no-vsx function diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md index edb2c96..a3d966e 100644 --- a/gcc/config/rx/rx.md +++ b/gcc/config/rx/rx.md @@ -2541,10 +2541,17 @@ (unspec_volatile:SI [(match_operand:BLK 1 "memory_operand") ;; String1 (match_operand:BLK 2 "memory_operand")] ;; String2 UNSPEC_CMPSTRN)) - (use (match_operand:SI 3 "register_operand")) ;; Max Length + (use (match_operand:SI 3 "nonmemory_operand")) ;; Max Length (match_operand:SI 4 "immediate_operand")] ;; Known Align "rx_allow_string_insns" { + bool const_len = CONST_INT_P (operands[3]); + if (const_len && operands[3] == CONST0_RTX (SImode)) + { + emit_move_insn (operands[0], CONST0_RTX (SImode)); + DONE; + } + rtx str1 = gen_rtx_REG (SImode, 1); rtx str2 = gen_rtx_REG (SImode, 2); rtx len = gen_rtx_REG (SImode, 3); @@ -2553,6 +2560,13 @@ emit_move_insn (str2, force_operand (XEXP (operands[2], 0), NULL_RTX)); emit_move_insn (len, operands[3]); + /* Set flags in case len is zero */ + if (!const_len) + { + emit_insn (gen_setpsw (GEN_INT ('C'))); + emit_insn (gen_setpsw (GEN_INT ('Z'))); + } + emit_insn (gen_rx_cmpstrn (operands[0], operands[1], operands[2])); DONE; } @@ -2590,9 +2604,7 @@ (clobber (reg:SI 3)) (clobber (reg:CC CC_REG))] "rx_allow_string_insns" - "setpsw z ; Set flags in case len is zero - setpsw c - scmpu ; Perform the string comparison + "scmpu ; Perform the string comparison mov #-1, %0 ; Set up -1 result (which cannot be created ; by the SC insn) bnc ?+ ; If Carry is not set skip over diff --git a/gcc/config/s390/9175.md b/gcc/config/s390/9175.md new file mode 100644 index 0000000..d0ac0e1 --- /dev/null +++ b/gcc/config/s390/9175.md @@ -0,0 +1,316 @@ +;; Scheduling description for z17. +;; Copyright (C) 2025 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3, or (at your option) any later +;; version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_attr "z17_unit_fpd" "" + (cond [(eq_attr "mnemonic" "ddb,ddbr,deb,debr,dxbr,sqdb,sqdbr,sqeb,sqebr,\ +sqxbr,vdf,vdg,vdlf,vdlg,vdlq,vdq,vfddb,vfdsb,vfsqdb,vfsqsb,vrf,vrg,vrlf,vrlg,\ +vrlq,vrq,wfddb,wfdsb,wfdxb,wfsqdb,wfsqxb") + (const_int 1)] (const_int 0))) + +(define_attr "z17_unit_fxa" "" + (cond [(eq_attr "mnemonic" "a,afi,ag,agf,agfi,agfr,agh,aghi,aghik,agr,agrk,ah,\ +ahi,ahik,ahy,al,alc,alcg,alcgr,alcr,alfi,alg,algf,algfi,algfr,alghsik,algr,\ +algrk,alhsik,alr,alrk,aly,ar,ark,ay,bdepg,bextg,clzg,ctzg,etnd,flogr,ic,icm,\ +icmh,icmy,icy,iihf,iilf,ipm,la,larl,lay,lb,lbr,lcgfr,lcgr,lcr,lgb,lgbr,lgf,\ +lgfi,lgfr,lgfrl,lgh,lghi,lghr,lghrl,lgr,lh,lhi,lhr,lhrl,lhy,llcr,llgcr,llgfr,\ +llghr,llgtr,llhr,llihf,llihh,llihl,llilf,llilh,llill,llxab,llxaf,llxag,llxah,\ +llxaq,lngfr,lngr,lnr,loc,locg,locghi,locgr,lochi,locr,lpgfr,lpgr,lpr,lr,lrv,\ +lrvg,lrvgr,lrvh,lrvr,lt,ltg,ltgf,ltgfr,ltgr,ltr,lxab,lxaf,lxag,lxah,lxaq,m,mfy,\ +mg,mgh,mghi,mgrk,mh,mhi,mhy,ml,mlg,mlgr,mlr,mr,ms,msc,msfi,msg,msgc,msgf,msgfi,\ +msgfr,msgr,msgrkc,msr,msrkc,msy,n,ncgrk,ncrk,ng,ngr,ngrk,nihf,nihh,nihl,nilf,\ +nilh,nill,nngrk,nnrk,nogrk,nork,nr,nrk,nxgrk,nxrk,ny,o,ocgrk,ocrk,og,ogr,ogrk,\ +oihf,oihh,oihl,oilf,oilh,oill,or,ork,oy,pfpo,popcnt,risbg,risbgn,rll,rllg,\ +rnsbg,rosbg,rxsbg,s,selgr,selr,sg,sgf,sgfr,sgh,sgr,sgrk,sh,shy,sl,slb,slbg,\ +slbgr,slbr,sldl,slfi,slg,slgf,slgfi,slgfr,slgr,slgrk,sll,sllg,sllk,slr,slrk,\ +sly,sr,sra,srag,srak,srda,srdl,srk,srl,srlg,srlk,sy,x,xg,xgr,xgrk,xihf,xilf,xr,\ +xrk,xy") + (const_int 1)] (const_int 0))) + +(define_attr "z17_unit_fxb" "" + (cond [(eq_attr "mnemonic" "agsi,algsi,alsi,asi,b,bc,bcr,bi,br,c,cfi,cg,cgf,\ +cgfi,cgfr,cgfrl,cgh,cghi,cghrl,cghsi,cgit,cgr,cgrl,cgrt,ch,chi,chrl,chsi,chy,\ +cit,cl,clfhsi,clfi,clfit,clg,clgf,clgfi,clgfr,clgfrl,clghrl,clghsi,clgit,clgr,\ +clgrl,clgrt,clgt,clhhsi,clhrl,cli,cliy,clm,clmy,clr,clrl,clrt,clt,cly,cr,crl,\ +crt,cy,laa,laag,lan,lang,lao,laog,lat,lax,laxg,lcdfr,ldgr,ldr,lgat,lgdr,lndfr,\ +lpdfr,lxr,lzdr,lzer,lzxr,mvghi,mvhhi,mvhi,mvi,mviy,ni,niy,nop,nopr,ntstg,oi,\ +oiy,ppa,st,stc,stcy,std,stdy,ste,stey,stg,stgrl,sth,sthrl,sthy,stoc,stocg,strl,\ +strv,strvg,strvh,sty,tend,tm,tmh,tmhh,tmhl,tml,tmlh,tmll,tmy,vlgvb,vlgvf,vlgvg,\ +vlgvh,vlr,vlvgb,vlvgf,vlvgg,vlvgh,vlvgp,vscef,vsceg,vst,vstbrf,vstbrg,vstbrh,\ +vstbrq,vstebrf,vstebrg,vstef,vsteg,vsterf,vsterg,vsterh,vstl,vstrl,vstrlr,xi,\ +xiy") + (const_int 1)] (const_int 0))) + +(define_attr "z17_unit_fxd" "" + (cond [(eq_attr "mnemonic" "dlgr,dlr,dr,dsgfr,dsgr") + (const_int 1)] (const_int 0))) + +(define_attr "z17_unit_lsu" "" + (cond [(eq_attr "mnemonic" "clc,ear,l,lam,lcbb,ld,lde,ldy,lg,lgrl,llc,llgc,\ +llgf,llgfrl,llgh,llghrl,llgt,llh,llhrl,lm,lmg,lmy,lpq,lrl,ly,mvcrl,sar,sfpc,\ +tabort,vl,vlbb,vlbrf,vlbrg,vlbrh,vlbrq,vlbrrepf,vlbrrepg,vlbrreph,vlerf,vlerg,\ +vlerh,vll,vllebrzf,vllebrzg,vllebrzh,vllezb,vllezf,vllezg,vllezh,vllezlf,\ +vlrepb,vlrepf,vlrepg,vlreph,vlrl,vlrlr") + (const_int 1)] (const_int 0))) + +(define_attr "z17_unit_vfu" "" + (cond [(eq_attr "mnemonic" "adb,adbr,adtr,aeb,aebr,axbr,axtr,brcl,cdb,cdbr,\ +cdtr,ceb,cebr,cpsdr,cxbr,cxtr,ddtr,dxtr,fidbr,fidbra,fidtr,fiebr,fiebra,fixbr,\ +fixbra,fixtr,j,jg,kdb,kdbr,kdtr,keb,kebr,kxbr,kxtr,lcdbr,lcebr,lcxbr,ldeb,\ +ldebr,ldetr,le,ledbr,ledtr,ler,ley,lndbr,lnebr,lnxbr,lpdbr,lpebr,lpxbr,ltdbr,\ +ltdtr,ltebr,ltxbr,ltxtr,lxdb,lxdbr,lxdtr,lxeb,lxebr,madb,madbr,maeb,maebr,mdb,\ +mdbr,mdtr,meeb,meebr,msdb,msdbr,mseb,msebr,mxbr,mxtr,sdb,sdbr,sdtr,seb,sebr,\ +sxbr,sxtr,tcdb,tceb,tcxb,tdcdt,tdcet,tdcxt,vab,vaccb,vacccq,vaccf,vaccg,vacch,\ +vaccq,vacq,vaf,vag,vah,vaq,vavgb,vavgf,vavgg,vavgh,vavglb,vavglf,vavglg,vavglh,\ +vavglq,vavgq,vblendb,vblendf,vblendg,vblendh,vblendq,vbperm,vcdgb,vcdlgb,vcefb,\ +vcelfb,vceqb,vceqbs,vceqf,vceqfs,vceqg,vceqgs,vceqh,vceqhs,vceqq,vceqqs,vcfeb,\ +vcfn,vcgdb,vchb,vchbs,vchf,vchfs,vchg,vchgs,vchh,vchhs,vchlb,vchlbs,vchlf,\ +vchlfs,vchlg,vchlgs,vchlh,vchlhs,vchlq,vchlqs,vchq,vchqs,vcksm,vclfeb,vclfnh,\ +vclfnl,vclgdb,vclzb,vclzf,vclzg,vclzh,vclzq,vcnf,vcrnf,vctzb,vctzf,vctzg,vctzh,\ +vctzq,verimb,verimf,verimg,verimh,verllb,verllf,verllg,verllh,verllvb,verllvf,\ +verllvg,verllvh,veslb,veslf,veslg,veslh,veslvb,veslvf,veslvg,veslvh,vesrab,\ +vesraf,vesrag,vesrah,vesravb,vesravf,vesravg,vesravh,vesrlb,vesrlf,vesrlg,\ +vesrlh,vesrlvb,vesrlvf,vesrlvg,vesrlvh,veval,vfadb,vfasb,vfcedb,vfcedbs,vfcesb,\ +vfcesbs,vfchdb,vfchdbs,vfchedb,vfchedbs,vfchesb,vfchesbs,vfchsb,vfchsbs,vfeeb,\ +vfeef,vfeeh,vfeezbs,vfeezfs,vfeezhs,vfeneb,vfenef,vfeneh,vfenezb,vfenezf,\ +vfenezh,vfidb,vfisb,vfkedb,vfkesb,vfkhdb,vfkhedb,vfkhesb,vfkhsb,vflcdb,vflcsb,\ +vflndb,vflnsb,vflpdb,vflpsb,vfmadb,vfmasb,vfmaxdb,vfmaxsb,vfmdb,vfmindb,\ +vfminsb,vfmsb,vfmsdb,vfmssb,vfnmadb,vfnmasb,vfnmsdb,vfnmssb,vfsdb,vfssb,\ +vftcidb,vftcisb,vgbm,vgemb,vgemf,vgemg,vgemh,vgemq,vgfmab,vgfmaf,vgfmag,vgfmah,\ +vgfmb,vgfmf,vgfmg,vgfmh,vgm,vistrb,vistrbs,vistrf,vistrfs,vistrh,vistrhs,vlcb,\ +vlcf,vlcg,vlch,vldeb,vleb,vlebrf,vlebrg,vlebrh,vledb,vlef,vleg,vleh,vleib,\ +vleif,vleig,vleih,vlpb,vlpf,vlpg,vlph,vlpq,vmaeb,vmaef,vmaeg,vmaeh,vmahb,vmahf,\ +vmahg,vmahh,vmahq,vmalb,vmaleb,vmalef,vmaleg,vmaleh,vmalf,vmalg,vmalhb,vmalhf,\ +vmalhg,vmalhh,vmalhq,vmalhw,vmalob,vmalof,vmalog,vmaloh,vmalq,vmaob,vmaof,\ +vmaog,vmaoh,vmeb,vmef,vmeg,vmeh,vmhb,vmhf,vmhg,vmhh,vmhq,vmlb,vmleb,vmlef,\ +vmleg,vmleh,vmlf,vmlg,vmlhb,vmlhf,vmlhg,vmlhh,vmlhq,vmlhw,vmlob,vmlof,vmlog,\ +vmloh,vmlq,vmnb,vmnf,vmng,vmnh,vmnlb,vmnlf,vmnlg,vmnlh,vmnlq,vmnq,vmob,vmof,\ +vmog,vmoh,vmrhb,vmrhf,vmrhg,vmrhh,vmrlb,vmrlf,vmrlg,vmrlh,vmslg,vmxb,vmxf,vmxg,\ +vmxh,vmxlb,vmxlf,vmxlg,vmxlh,vmxlq,vmxq,vn,vnc,vnn,vno,vnot,vnx,vo,voc,vone,\ +vpdi,vperm,vpkf,vpkg,vpkh,vpklsf,vpklsfs,vpklsg,vpklsgs,vpklsh,vpklshs,vpksf,\ +vpksfs,vpksg,vpksgs,vpksh,vpkshs,vpopct,vpopctb,vpopctf,vpopctg,vpopcth,vrepb,\ +vrepf,vrepg,vreph,vrepi,vrepib,vrepif,vrepig,vrepih,vsb,vsbcbiq,vsbiq,vscbib,\ +vscbif,vscbig,vscbih,vscbiq,vsegb,vsegf,vsegh,vsel,vsf,vsg,vsh,vsl,vslb,vsld,\ +vsldb,vsq,vsra,vsrab,vsrd,vsrl,vsrlb,vsumb,vsumgf,vsumgh,vsumh,vsumqf,vsumqg,\ +vtm,vuphb,vuphf,vuphg,vuphh,vuplb,vuplf,vuplg,vuplhb,vuplhf,vuplhg,vuplhh,\ +vuplhw,vupllb,vupllf,vupllg,vupllh,vx,vzero,wcdgb,wcdlgb,wcefb,wcelfb,wcfeb,\ +wcgdb,wclfeb,wclgdb,wfadb,wfasb,wfaxb,wfcdb,wfcedb,wfcesb,wfcexb,wfcexbs,\ +wfchdb,wfchedb,wfchesb,wfchexb,wfchexbs,wfchsb,wfchxb,wfchxbs,wfcsb,wfcxb,\ +wfidb,wfisb,wfixb,wfkdb,wfkedb,wfkesb,wfkexb,wfkhdb,wfkhedb,wfkhesb,wfkhexb,\ +wfkhsb,wfkhxb,wfksb,wfkxb,wflcdb,wflcsb,wflcxb,wflld,wflndb,wflnsb,wflnxb,\ +wflpdb,wflpsb,wflpxb,wflrx,wfmadb,wfmasb,wfmaxb,wfmaxxb,wfmdb,wfminxb,wfmsb,\ +wfmsdb,wfmssb,wfmsxb,wfmxb,wfnmaxb,wfnmsxb,wfsdb,wfssb,wfsxb,wftcixb,wldeb,\ +wledb") + (const_int 1)] (const_int 0))) + +(define_attr "z17_cracked" "" + (cond [(eq_attr "mnemonic" "bas,basr,bras,brasl,cdfbr,cdftr,cdgbr,cdgtr,\ +cdlfbr,cdlftr,cdlgbr,cdlgtr,cefbr,cegbr,celfbr,celgbr,cfdbr,cfebr,cfxbr,cgdbr,\ +cgdtr,cgebr,cgxbr,cgxtr,chhsi,clfdbr,clfdtr,clfebr,clfxbr,clfxtr,clgdbr,clgdtr,\ +clgebr,clgxbr,clgxtr,cs,csg,csy,efpc,ex,exrl,lcgfr,lngfr,lpgfr,lpq,lxr,lzxr,\ +mvc,nc,oc,rnsbg,rosbg,rxsbg,stpq,vgef,vgeg,vscef,vsceg,vsteb,vstebrh,vsteh,xc") + (const_int 1)] (const_int 0))) + +(define_attr "z17_expanded" "" + (cond [(eq_attr "mnemonic" "cds,cdsg,cdsy,cxfbr,cxftr,cxgbr,cxgtr,cxlfbr,\ +cxlftr,cxlgbr,cxlgtr,d,dl,dlg,dsg,dsgf,lam,lm,lmg,lmy,sldl,srda,srdl,stam,stm,\ +stmg,stmy,tbegin,tbeginc") + (const_int 1)] (const_int 0))) + +(define_attr "z17_groupalone" "" + (cond [(eq_attr "mnemonic" "alc,alcg,alcgr,alcr,axbr,axtr,clc,cxbr,cxfbr,\ +cxftr,cxgbr,cxgtr,cxlfbr,cxlftr,cxlgbr,cxlgtr,cxtr,d,dl,dlg,dlgr,dlr,dr,dsg,\ +dsgf,dsgfr,dsgr,dxbr,dxtr,ex,exrl,fixbr,fixbra,fixtr,flogr,kxbr,kxtr,lcxbr,\ +lnxbr,lpxbr,ltxbr,ltxtr,lxdb,lxdbr,lxdtr,lxeb,lxebr,m,madb,maeb,maebr,mfy,mg,\ +mgrk,ml,mlg,mlgr,mlr,mr,msdb,mseb,msebr,mvc,mvcrl,mxbr,mxtr,nc,oc,ppa,sfpc,slb,\ +slbg,slbgr,slbr,sqxbr,sxbr,sxtr,tabort,tbegin,tbeginc,tcxb,tdcxt,tend,xc") + (const_int 1)] (const_int 0))) + +(define_attr "z17_endgroup" "" + (cond [(eq_attr "mnemonic" "bas,basr,bcr,br,bras,brasl,cdsg,clfebr,cs,csg,csy,\ +efpc,ex,exrl,ipm,lam,lpq,lxr,nopr,sldl,srda,srdl,stam,stm,stmg,stmy,tbegin,\ +tbeginc") + (const_int 1)] (const_int 0))) + +(define_attr "z17_groupoftwo" "" + (cond [(eq_attr "mnemonic" "cdfbr,cdftr,cdgbr,cdgtr,cdlfbr,cdlftr,cdlgbr,\ +cdlgtr,cefbr,cegbr,celfbr,celgbr,cfdbr,cfebr,cfxbr,cgdbr,cgdtr,cgebr,cgxbr,\ +cgxtr,chhsi,clfdbr,clfdtr,clfxbr,clfxtr,clgdbr,clgdtr,clgebr,clgxbr,clgxtr,\ +lcgfr,lngfr,lpgfr,lzxr,vacccq,vacq,vblendb,vblendf,vblendg,vblendh,vblendq,\ +veval,vfmadb,vfmasb,vfmsdb,vfmssb,vfnmadb,vfnmasb,vfnmsdb,vfnmssb,vgef,vgeg,\ +vgfmab,vgfmaf,vgfmag,vgfmah,vmaeb,vmaef,vmaeg,vmaeh,vmahb,vmahf,vmahg,vmahh,\ +vmahq,vmalb,vmaleb,vmalef,vmaleg,vmaleh,vmalf,vmalg,vmalhb,vmalhf,vmalhg,\ +vmalhh,vmalhq,vmalhw,vmalob,vmalof,vmalog,vmaloh,vmalq,vmaob,vmaof,vmaog,vmaoh,\ +vmslg,vperm,vsbcbiq,vsbiq,vscef,vsceg,vsel,vsteb,vstebrh,vsteh,wfmadb,wfmasb,\ +wfmaxb,wfmsdb,wfmssb,wfmsxb,wfnmaxb,wfnmsxb") + (const_int 1)] (const_int 0))) + +(define_insn_reservation "z17_0" 0 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "a,afi,ag,agfi,aghi,aghik,agr,agrk,ahi,ahik,al,alfi,alg,\ +algf,algfi,algfr,alghsik,algr,algrk,alhsik,alr,alrk,aly,ar,ark,ay,b,bc,bcr,bi,\ +br,brcl,c,cfi,cg,cgfi,cghi,cghsi,cgit,cgr,cgrl,cgrt,chi,chsi,cit,cl,clfhsi,\ +clfi,clfit,clg,clgf,clgfi,clgfr,clgfrl,clghrl,clghsi,clgit,clgr,clgrl,clgrt,\ +clgt,clhhsi,clhrl,cli,cliy,clr,clrl,clrt,clt,cly,cr,crl,crt,cy,etnd,ic,icm,\ +icmh,icmy,icy,iihf,iilf,j,jg,la,larl,lat,lay,lb,lbr,lcdfr,lcgr,lcr,ldgr,ldr,\ +lgat,lgb,lgbr,lgf,lgfi,lgfr,lgfrl,lgh,lghi,lghr,lghrl,lgr,lh,lhi,lhr,lhrl,lhy,\ +llcr,llgcr,llgfr,llghr,llgtr,llhr,llihf,llihh,llihl,llilf,llilh,llill,lndfr,\ +lngr,lnr,lpdfr,lpgr,lpr,lr,lrv,lrvg,lrvgr,lrvh,lrvr,lt,ltg,ltgf,ltgfr,ltgr,ltr,\ +lzdr,lzer,n,ncgrk,ncrk,ng,ngr,ngrk,nihf,nihh,nihl,nilf,nilh,nill,nngrk,nnrk,\ +nogrk,nop,nopr,nork,nr,nrk,nxgrk,nxrk,ny,o,ocgrk,ocrk,og,ogr,ogrk,oihf,oihh,\ +oihl,oilf,oilh,oill,or,ork,oy,pfpo,risbg,risbgn,rll,rllg,s,sg,sgr,sgrk,sl,sldl,\ +slfi,slg,slgf,slgfi,slgfr,slgr,slgrk,sll,sllg,sllk,slr,slrk,sly,sr,sra,srag,\ +srak,srda,srdl,srk,srl,srlg,srlk,sy,tm,tmh,tmhh,tmhl,tml,tmlh,tmll,tmy,vlr,\ +vlvgb,vlvgf,vlvgg,vlvgh,x,xg,xgr,xgrk,xihf,xilf,xr,xrk,xy")) "nothing") + +(define_insn_reservation "z17_1" 1 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "agf,agfr,agh,agsi,ah,ahy,algsi,alsi,asi,cgf,cgfr,cgfrl,\ +cgh,cghrl,ch,chrl,chy,clm,clmy,cpsdr,laa,laag,lan,lang,lao,laog,lax,laxg,le,\ +ler,ley,llxab,llxaf,llxag,llxah,llxaq,loc,locg,locghi,locgr,lochi,locr,lxab,\ +lxaf,lxag,lxah,lxaq,mvghi,mvhhi,mvhi,mvi,mviy,ni,niy,ntstg,oi,oiy,selgr,selr,\ +sgf,sgfr,sgh,sh,shy,st,stc,stcy,stg,stgrl,sth,sthrl,sthy,stoc,stocg,strl,strv,\ +strvg,strvh,sty,vab,vaccb,vacccq,vaccf,vaccg,vacch,vaccq,vacq,vaf,vag,vah,vaq,\ +vavgb,vavgf,vavgg,vavgh,vavglb,vavglf,vavglg,vavglh,vavglq,vavgq,vblendb,\ +vblendf,vblendg,vblendh,vblendq,vbperm,vceqb,vceqbs,vceqf,vceqfs,vceqg,vceqgs,\ +vceqh,vceqhs,vceqq,vceqqs,vcfn,vchb,vchbs,vchf,vchfs,vchg,vchgs,vchh,vchhs,\ +vchlb,vchlbs,vchlf,vchlfs,vchlg,vchlgs,vchlh,vchlhs,vchlq,vchlqs,vchq,vchqs,\ +vclfnh,vclfnl,vclzb,vclzf,vclzg,vclzh,vclzq,vcnf,vcrnf,vctzb,vctzf,vctzg,vctzh,\ +vctzq,verimb,verimf,verimg,verimh,verllb,verllf,verllg,verllh,verllvb,verllvf,\ +verllvg,verllvh,veslb,veslf,veslg,veslh,veslvb,veslvf,veslvg,veslvh,vesrab,\ +vesraf,vesrag,vesrah,vesravb,vesravf,vesravg,vesravh,vesrlb,vesrlf,vesrlg,\ +vesrlh,vesrlvb,vesrlvf,vesrlvg,vesrlvh,veval,vfcedb,vfcedbs,vfcesb,vfcesbs,\ +vfchdb,vfchdbs,vfchedb,vfchedbs,vfchesb,vfchesbs,vfchsb,vfchsbs,vfkedb,vfkesb,\ +vfkhdb,vfkhedb,vfkhesb,vfkhsb,vflcdb,vflcsb,vflndb,vflnsb,vflpdb,vflpsb,\ +vfmaxdb,vfmaxsb,vfmindb,vfminsb,vgbm,vgemb,vgemf,vgemg,vgemh,vgemq,vgm,vlcb,\ +vlcf,vlcg,vlch,vleb,vlebrf,vlebrg,vlebrh,vlef,vleg,vleh,vleib,vleif,vleig,\ +vleih,vlpb,vlpf,vlpg,vlph,vlpq,vmnb,vmnf,vmng,vmnh,vmnlb,vmnlf,vmnlg,vmnlh,\ +vmnlq,vmnq,vmrhb,vmrhf,vmrhg,vmrhh,vmrlb,vmrlf,vmrlg,vmrlh,vmxb,vmxf,vmxg,vmxh,\ +vmxlb,vmxlf,vmxlg,vmxlh,vmxlq,vmxq,vn,vnc,vnn,vno,vnot,vnx,vo,voc,vone,vpdi,\ +vperm,vpkf,vpkg,vpkh,vpklsf,vpklsfs,vpklsg,vpklsgs,vpklsh,vpklshs,vpksf,vpksfs,\ +vpksg,vpksgs,vpksh,vpkshs,vpopct,vpopctb,vpopctf,vpopctg,vpopcth,vrepb,vrepf,\ +vrepg,vreph,vrepi,vrepib,vrepif,vrepig,vrepih,vsb,vsbcbiq,vsbiq,vscbib,vscbif,\ +vscbig,vscbih,vscbiq,vsegb,vsegf,vsegh,vsel,vsf,vsg,vsh,vsl,vslb,vsld,vsldb,\ +vsq,vsra,vsrab,vsrd,vsrl,vsrlb,vuphb,vuphf,vuphg,vuphh,vuplb,vuplf,vuplg,\ +vuplhb,vuplhf,vuplhg,vuplhh,vuplhw,vupllb,vupllf,vupllg,vupllh,vx,vzero,wfcedb,\ +wfcesb,wfcexb,wfcexbs,wfchdb,wfchedb,wfchesb,wfchexb,wfchexbs,wfchsb,wfchxb,\ +wfchxbs,wfkedb,wfkesb,wfkexb,wfkhdb,wfkhedb,wfkhesb,wfkhexb,wfkhsb,wfkhxb,\ +wflcdb,wflcsb,wflcxb,wflndb,wflnsb,wflnxb,wflpdb,wflpsb,wflpxb,wfmaxxb,wfminxb,\ +xi,xiy")) "nothing") + +(define_insn_reservation "z17_2" 2 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "cdb,cdbr,ceb,cebr,clzg,ctzg,ear,ipm,kdb,kdbr,keb,kebr,l,\ +lcbb,lcdbr,lcebr,ld,lde,ldy,lg,lgdr,lgrl,llc,llgc,llgf,llgfrl,llgh,llghrl,llgt,\ +llh,llhrl,lm,lmg,lmy,lndbr,lnebr,lpdbr,lpebr,lrl,ltdbr,ltebr,ly,popcnt,sar,\ +tcdb,tceb,vfeeb,vfeef,vfeeh,vfeezbs,vfeezfs,vfeezhs,vfeneb,vfenef,vfeneh,\ +vfenezb,vfenezf,vfenezh,vftcidb,vftcisb,vistrb,vistrbs,vistrf,vistrfs,vistrh,\ +vistrhs,vlbrrepf,vlbrrepg,vlbrreph,vlgvb,vlgvf,vlgvg,vlgvh,vllebrzf,vllebrzg,\ +vllebrzh,vllezb,vllezf,vllezg,vllezh,vllezlf,vlrepb,vlrepf,vlrepg,vlreph,vlrl,\ +vlvgp,wfcdb,wfcsb,wfcxb,wfkdb,wfksb,wfkxb,wftcixb")) "nothing") + +(define_insn_reservation "z17_3" 3 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "bdepg,bextg,cds,cdsy,mgh,mghi,mh,mhi,mhy,ms,msc,msfi,msg,\ +msgc,msgf,msgfi,msgfr,msgr,msgrkc,msr,msrkc,msy,std,stdy,ste,stey,vcksm,vgfmab,\ +vgfmaf,vgfmag,vgfmah,vgfmb,vgfmf,vgfmg,vgfmh,vl,vlbb,vlbrf,vlbrg,vlbrh,vlbrq,\ +vlerf,vlerg,vlerh,vll,vlrlr,vmaeb,vmaef,vmaeg,vmaeh,vmahb,vmahf,vmahg,vmahh,\ +vmahq,vmalb,vmaleb,vmalef,vmaleg,vmaleh,vmalf,vmalg,vmalhb,vmalhf,vmalhg,\ +vmalhh,vmalhq,vmalhw,vmalob,vmalof,vmalog,vmaloh,vmalq,vmaob,vmaof,vmaog,vmaoh,\ +vmeb,vmef,vmeg,vmeh,vmhb,vmhf,vmhg,vmhh,vmhq,vmlb,vmleb,vmlef,vmleg,vmleh,vmlf,\ +vmlg,vmlhb,vmlhf,vmlhg,vmlhh,vmlhq,vmlhw,vmlob,vmlof,vmlog,vmloh,vmlq,vmob,\ +vmof,vmog,vmoh,vsumb,vsumgf,vsumgh,vsumh,vsumqf,vsumqg,vtm")) "nothing") + +(define_insn_reservation "z17_4" 4 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "bas,basr,bras,brasl,chhsi,clc,ex,exrl,lam,lcgfr,lngfr,\ +lpgfr,lxr,lzxr,mvcrl,ppa,rnsbg,rosbg,rxsbg,tabort,tend,vst,vstbrf,vstbrg,\ +vstbrh,vstbrq,vstebrf,vstebrg,vstef,vsteg,vsterf,vsterg,vsterh,vstl,vstrl,\ +vstrlr")) "nothing") + +(define_insn_reservation "z17_5" 5 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "adb,adbr,aeb,aebr,alc,alcg,alcgr,alcr,cs,csg,csy,fidbr,\ +fidbra,fiebr,fiebra,ldeb,ldebr,ledbr,madbr,mdb,mdbr,meeb,meebr,msdbr,sdb,sdbr,\ +seb,sebr,slb,slbg,slbgr,slbr,stm,stmg,stmy,vcdgb,vcdlgb,vcefb,vcelfb,vcfeb,\ +vcgdb,vclfeb,vclgdb,vldeb,vledb,vmslg,wcdgb,wcdlgb,wcefb,wcelfb,wcfeb,wcgdb,\ +wclfeb,wclgdb,wflld,wflrx,wldeb,wledb")) "nothing") + +(define_insn_reservation "z17_6" 6 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "sfpc")) "nothing") + +(define_insn_reservation "z17_7" 7 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "adtr,cdtr,fidtr,kdtr,ldetr,ltdtr,sdtr,tdcdt,tdcet,vfadb,\ +vfasb,vfidb,vfisb,vfsdb,vfssb,vgef,vgeg,wfadb,wfasb,wfaxb,wfidb,wfisb,wfixb,\ +wfsdb,wfssb,wfsxb")) "nothing") + +(define_insn_reservation "z17_8" 8 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "cdgtr,cdlgtr,cdsg,cxgtr,cxlgtr,flogr,lpq,m,mfy,mg,mgrk,\ +ml,mlg,mlgr,mlr,mr,stpq,vsteb,vstebrh,vsteh")) "nothing") + +(define_insn_reservation "z17_9" 9 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "cdfbr,cdgbr,cdlfbr,cdlgbr,cefbr,cegbr,celfbr,celgbr,madb,\ +maeb,maebr,msdb,mseb,msebr,stam")) "nothing") + +(define_insn_reservation "z17_10" 10 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "cgdtr,cgxtr,clfdtr,clfxtr,clgdtr,clgxtr,d,dl,dlg,dsg,\ +dsgf,efpc,lxdb,lxdbr,lxeb,lxebr,vscef,vsceg")) "nothing") + +(define_insn_reservation "z17_11" 11 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "cfdbr,cfebr,cgdbr,cgebr,clfdbr,clfebr,clgdbr,clgebr")) "nothing") + +(define_insn_reservation "z17_12" 12 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "cxbr,cxtr,kxbr,kxtr,tbegin,tbeginc,tcxb,tdcxt")) "nothing") + +(define_insn_reservation "z17_13" 13 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "axbr,axtr,cxfbr,cxgbr,cxlfbr,cxlgbr,fixbr,fixbra,fixtr,\ +lcxbr,lnxbr,lpxbr,ltxbr,ltxtr,lxdtr,sxbr,sxtr")) "nothing") + +(define_insn_reservation "z17_14" 14 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "cfxbr,cgxbr,clfxbr,clgxbr,ledtr")) "nothing") + +(define_insn_reservation "z17_15" 15 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "nc,oc")) "nothing") + +(define_insn_reservation "z17_16" 16 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "cdftr,cdlftr,cxftr,cxlftr")) "nothing") + +(define_insn_reservation "z17_18" 18 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "xc")) "nothing") + +(define_insn_reservation "z17_20" 20 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "ddb,ddbr,ddtr,deb,debr,dlgr,dlr,dr,dsgfr,dsgr,dxbr,dxtr,\ +mdtr,mvc,mxbr,mxtr,sqdb,sqdbr,sqeb,sqebr,sqxbr,vdf,vdg,vdlf,vdlg,vdlq,vdq,\ +vfddb,vfdsb,vfmadb,vfmasb,vfmdb,vfmsb,vfmsdb,vfmssb,vfnmadb,vfnmasb,vfnmsdb,\ +vfnmssb,vfsqdb,vfsqsb,vrf,vrg,vrlf,vrlg,vrlq,vrq,wfddb,wfdsb,wfdxb,wfmadb,\ +wfmasb,wfmaxb,wfmdb,wfmsb,wfmsdb,wfmssb,wfmsxb,wfmxb,wfnmaxb,wfnmsxb,wfsqdb,\ +wfsqxb")) "nothing") + diff --git a/gcc/config/s390/driver-native.cc b/gcc/config/s390/driver-native.cc index 49e8fa0..7a7ceea 100644 --- a/gcc/config/s390/driver-native.cc +++ b/gcc/config/s390/driver-native.cc @@ -127,6 +127,10 @@ s390_host_detect_local_cpu (int argc, const char **argv) case 0x3932: cpu = "arch14"; break; + case 0x9175: + case 0x9176: + cpu = "arch15"; + break; default: cpu = "arch15"; break; diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def index d9af9b1..cee2326 100644 --- a/gcc/config/s390/s390-builtins.def +++ b/gcc/config/s390/s390-builtins.def @@ -300,8 +300,8 @@ #define B_VXE2 (1 << 4) /* Builtins requiring the z15 vector extensions. */ #define B_DEP (1 << 5) /* Builtin has been deprecated and a warning should be issued. */ #define B_NNPA (1 << 6) /* Builtins requiring the NNPA Facility. */ -#define B_VXE3 (1 << 7) /* Builtins requiring the arch15 vector extensions. */ -#define B_ARCH15 (1 << 8) /* Builtins requiring arch15. */ +#define B_VXE3 (1 << 7) /* Builtins requiring the z17 vector extensions. */ +#define B_Z17 (1 << 8) /* Builtins requiring z17. */ /* B_DEF defines a standard (not overloaded) builtin B_DEF (<builtin name>, <RTL expander name>, <function attributes>, <builtin flags>, <operand flags, see above>, <fntype>) @@ -3318,8 +3318,8 @@ B_DEF (s390_vcnf, vcnf_v8hi, 0, /* arch 15 builtins */ -B_DEF (s390_bdepg, bdepg, 0, B_ARCH15, 0, BT_FN_ULONG_ULONG_ULONG) -B_DEF (s390_bextg, bextg, 0, B_ARCH15, 0, BT_FN_ULONG_ULONG_ULONG) +B_DEF (s390_bdepg, bdepg, 0, B_Z17, 0, BT_FN_ULONG_ULONG_ULONG) +B_DEF (s390_bextg, bextg, 0, B_Z17, 0, BT_FN_ULONG_ULONG_ULONG) OB_DEF (s390_vec_blend, s390_vec_blend_s8, s390_vec_blend_dbl, B_VXE3, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) OB_DEF_VAR (s390_vec_blend_s8, s390_vblendb, 0, 0, BT_OV_V16QI_V16QI_V16QI_V16QI) diff --git a/gcc/config/s390/s390-c.cc b/gcc/config/s390/s390-c.cc index 311d74a..a01c44c 100644 --- a/gcc/config/s390/s390-c.cc +++ b/gcc/config/s390/s390-c.cc @@ -962,7 +962,7 @@ s390_resolve_overloaded_builtin (location_t loc, tree ob_fndecl, if (!TARGET_VXE3 && (ob_flags & B_VXE3)) { - error_at (loc, "%qF requires arch15 or higher", ob_fndecl); + error_at (loc, "%qF requires z17 or higher", ob_fndecl); return error_mark_node; } @@ -1056,7 +1056,7 @@ s390_resolve_overloaded_builtin (location_t loc, tree ob_fndecl, if (!TARGET_VXE3 && bflags_overloaded_builtin_var[last_match_index] & B_VXE3) { - error_at (loc, "%qs matching variant requires arch15 or higher", + error_at (loc, "%qs matching variant requires z17 or higher", IDENTIFIER_POINTER (DECL_NAME (ob_fndecl))); return error_mark_node; } diff --git a/gcc/config/s390/s390-opts.h b/gcc/config/s390/s390-opts.h index 437d3b9..9cacb2c 100644 --- a/gcc/config/s390/s390-opts.h +++ b/gcc/config/s390/s390-opts.h @@ -39,7 +39,7 @@ enum processor_type PROCESSOR_3906_Z14, PROCESSOR_8561_Z15, PROCESSOR_3931_Z16, - PROCESSOR_ARCH15, + PROCESSOR_9175_Z17, PROCESSOR_NATIVE, PROCESSOR_max }; diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index 0ff3fd5..d82b16e 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -342,7 +342,7 @@ const struct s390_processor processor_table[] = { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 }, { "z15", "arch13", PROCESSOR_8561_Z15, &zEC12_cost, 13 }, { "z16", "arch14", PROCESSOR_3931_Z16, &zEC12_cost, 14 }, - { "arch15", "arch15", PROCESSOR_ARCH15, &zEC12_cost, 15 }, + { "z17", "arch15", PROCESSOR_9175_Z17, &zEC12_cost, 15 }, { "native", "", PROCESSOR_NATIVE, NULL, 0 } }; @@ -916,7 +916,7 @@ s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, if ((bflags & B_VXE3) && !TARGET_VXE3) { - error ("Builtin %qF requires arch15 or higher", fndecl); + error ("Builtin %qF requires z17 or higher", fndecl); return const0_rtx; } } @@ -9204,7 +9204,7 @@ s390_issue_rate (void) case PROCESSOR_3906_Z14: case PROCESSOR_8561_Z15: case PROCESSOR_3931_Z16: - case PROCESSOR_ARCH15: + case PROCESSOR_9175_Z17: default: return 1; } @@ -15632,7 +15632,6 @@ s390_get_sched_attrmask (rtx_insn *insn) mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO; break; case PROCESSOR_3931_Z16: - case PROCESSOR_ARCH15: if (get_attr_z16_cracked (insn)) mask |= S390_SCHED_ATTR_MASK_CRACKED; if (get_attr_z16_expanded (insn)) @@ -15644,6 +15643,18 @@ s390_get_sched_attrmask (rtx_insn *insn) if (get_attr_z16_groupoftwo (insn)) mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO; break; + case PROCESSOR_9175_Z17: + if (get_attr_z17_cracked (insn)) + mask |= S390_SCHED_ATTR_MASK_CRACKED; + if (get_attr_z17_expanded (insn)) + mask |= S390_SCHED_ATTR_MASK_EXPANDED; + if (get_attr_z17_endgroup (insn)) + mask |= S390_SCHED_ATTR_MASK_ENDGROUP; + if (get_attr_z17_groupalone (insn)) + mask |= S390_SCHED_ATTR_MASK_GROUPALONE; + if (get_attr_z17_groupoftwo (insn)) + mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO; + break; default: gcc_unreachable (); } @@ -15691,7 +15702,6 @@ s390_get_unit_mask (rtx_insn *insn, int *units) mask |= 1 << 3; break; case PROCESSOR_3931_Z16: - case PROCESSOR_ARCH15: *units = 4; if (get_attr_z16_unit_lsu (insn)) mask |= 1 << 0; @@ -15702,6 +15712,17 @@ s390_get_unit_mask (rtx_insn *insn, int *units) if (get_attr_z16_unit_vfu (insn)) mask |= 1 << 3; break; + case PROCESSOR_9175_Z17: + *units = 4; + if (get_attr_z17_unit_lsu (insn)) + mask |= 1 << 0; + if (get_attr_z17_unit_fxa (insn)) + mask |= 1 << 1; + if (get_attr_z17_unit_fxb (insn)) + mask |= 1 << 2; + if (get_attr_z17_unit_vfu (insn)) + mask |= 1 << 3; + break; default: gcc_unreachable (); } @@ -15715,7 +15736,8 @@ s390_is_fpd (rtx_insn *insn) return false; return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn) - || get_attr_z15_unit_fpd (insn) || get_attr_z16_unit_fpd (insn); + || get_attr_z15_unit_fpd (insn) || get_attr_z16_unit_fpd (insn) + || get_attr_z17_unit_fpd (insn); } static bool @@ -15725,7 +15747,8 @@ s390_is_fxd (rtx_insn *insn) return false; return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn) - || get_attr_z15_unit_fxd (insn) || get_attr_z16_unit_fxd (insn); + || get_attr_z15_unit_fxd (insn) || get_attr_z16_unit_fxd (insn) + || get_attr_z17_unit_fxd (insn); } /* Returns TRUE if INSN is a long-running instruction. */ diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h index 6f7195d..8b04bc9 100644 --- a/gcc/config/s390/s390.h +++ b/gcc/config/s390/s390.h @@ -45,12 +45,12 @@ enum processor_flags PF_NNPA = 32768, PF_Z16 = 65536, PF_VXE3 = 131072, - PF_ARCH15 = 262144 + PF_Z17 = 262144 }; /* This is necessary to avoid a warning about comparing different enum types. */ -#define s390_tune_attr ((enum attr_cpu)(s390_tune > PROCESSOR_3931_Z16 ? PROCESSOR_3931_Z16 : s390_tune )) +#define s390_tune_attr ((enum attr_cpu)(s390_tune > PROCESSOR_9175_Z17 ? PROCESSOR_9175_Z17 : s390_tune )) /* These flags indicate that the generated code should run on a cpu providing the respective hardware facility regardless of the @@ -124,10 +124,10 @@ enum processor_flags (s390_arch_flags & PF_VXE3) #define TARGET_CPU_VXE3_P(opts) \ (opts->x_s390_arch_flags & PF_VXE3) -#define TARGET_CPU_ARCH15 \ - (s390_arch_flags & PF_ARCH15) -#define TARGET_CPU_ARCH15_P(opts) \ - (opts->x_s390_arch_flags & PF_ARCH15) +#define TARGET_CPU_Z17 \ + (s390_arch_flags & PF_Z17) +#define TARGET_CPU_Z17_P(opts) \ + (opts->x_s390_arch_flags & PF_Z17) #define TARGET_HARD_FLOAT_P(opts) (!TARGET_SOFT_FLOAT_P(opts)) @@ -198,9 +198,9 @@ enum processor_flags (TARGET_VX && TARGET_CPU_VXE3) #define TARGET_VXE3_P(opts) \ (TARGET_VX_P (opts) && TARGET_CPU_VXE3_P (opts)) -#define TARGET_ARCH15 (TARGET_ZARCH && TARGET_CPU_ARCH15) -#define TARGET_ARCH15_P(opts) \ - (TARGET_ZARCH_P (opts->x_target_flags) && TARGET_CPU_ARCH15_P (opts)) +#define TARGET_Z17 (TARGET_ZARCH && TARGET_CPU_Z17) +#define TARGET_Z17_P(opts) \ + (TARGET_ZARCH_P (opts->x_target_flags) && TARGET_CPU_Z17_P (opts)) #if defined(HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS_ON_Z13) #define TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS TARGET_Z13 diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 9d49580..05b9da6 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -599,11 +599,11 @@ ;; Processor type. This attribute must exactly match the processor_type ;; enumeration in s390.h. -(define_attr "cpu" "z900,z990,z9_109,z9_ec,z10,z196,zEC12,z13,z14,z15,z16" +(define_attr "cpu" "z900,z990,z9_109,z9_ec,z10,z196,zEC12,z13,z14,z15,z16,z17" (const (symbol_ref "s390_tune_attr"))) (define_attr "cpu_facility" - "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12,vx,z13,z14,vxe,z15,vxe2,z16,nnpa,vxe3,arch15" + "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12,vx,z13,z14,vxe,z15,vxe2,z16,nnpa,vxe3,z17" (const_string "standard")) (define_attr "enabled" "" @@ -681,8 +681,8 @@ (match_test "TARGET_VXE3")) (const_int 1) - (and (eq_attr "cpu_facility" "arch15") - (match_test "TARGET_ARCH15")) + (and (eq_attr "cpu_facility" "z17") + (match_test "TARGET_Z17")) (const_int 1) ] (const_int 0))) @@ -725,6 +725,9 @@ ;; Pipeline description for z16 (include "3931.md") +;; Pipeline description for z17 +(include "9175.md") + ;; Predicates (include "predicates.md") @@ -2056,7 +2059,7 @@ [(set (match_operand:DI 0 "register_operand" "=d") (ashift:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "a")) (const_int LXAMODEITER)))] - "TARGET_ARCH15 && TARGET_64BIT" + "TARGET_Z17 && TARGET_64BIT" "lxa<lxamode>\t%0,0(%1,0)" [(set_attr "op_type" "RXY")]) @@ -2066,7 +2069,7 @@ (ashift:DI (sign_extend:DI (plus:SI (match_operand:SI 1 "register_operand" "a") (match_operand:SI 2 "const_int_operand"))) (const_int LXAMODEITER)))] - "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" + "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" "lxa<lxamode>\t%0,%2(%1,0)" [(set_attr "op_type" "RXY")]) @@ -2076,7 +2079,7 @@ (plus:DI (ashift:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "a")) (const_int LXAMODEITER)) (match_operand:DI 2 "register_operand" "a")))] - "TARGET_ARCH15 && TARGET_64BIT" + "TARGET_Z17 && TARGET_64BIT" "lxa<lxamode>\t%0,0(%1,%2)" [(set_attr "op_type" "RXY")]) @@ -2087,7 +2090,7 @@ (match_operand:SI 2 "const_int_operand"))) (const_int LXAMODEITER)) (match_operand:DI 3 "register_operand" "a")))] - "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" + "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" "lxa<lxamode>\t%0,%2(%1,%3)" [(set_attr "op_type" "RXY")]) @@ -2096,7 +2099,7 @@ (plus:DI (sign_extend:DI (plus:SI (match_operand:SI 1 "register_operand" "a") (match_operand:SI 2 "const_int_operand"))) (match_operand:DI 3 "register_operand" "a")))] - "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" + "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" "lxab\t%0,%2(%1,%3)" [(set_attr "op_type" "RXY")]) @@ -2113,7 +2116,7 @@ 0) (const_int LXAMODEITER)) (const_int <LLXAMASK>)))] - "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" + "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" "llxa<lxamode>\t%0,%2(%1,0)" [(set_attr "op_type" "RXY")]) @@ -2124,7 +2127,7 @@ (const_int LXAMODEITER)) (const_int <LLXAMASK>)) (match_operand:DI 2 "register_operand" "a")))] - "TARGET_ARCH15 && TARGET_64BIT" + "TARGET_Z17 && TARGET_64BIT" "llxa<lxamode>\t%0,0(%1,%2)" [(set_attr "op_type" "RXY")]) @@ -2137,7 +2140,7 @@ (const_int LXAMODEITER)) (const_int <LLXAMASK>)) (match_operand:DI 3 "register_operand" "a")))] - "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" + "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" "llxa<lxamode>\t%0,%2(%1,%3)" [(set_attr "op_type" "RXY")]) @@ -2146,7 +2149,7 @@ (plus:DI (zero_extend:DI (plus:SI (match_operand:SI 1 "register_operand" "a") (match_operand:SI 2 "const_int_operand"))) (match_operand:DI 3 "register_operand" "a")))] - "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" + "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" "llxab\t%0,%2(%1,%3)" [(set_attr "op_type" "RXY")]) @@ -3594,7 +3597,7 @@ (match_operand:BLK 1 "memory_operand" "")) (use (match_operand 2 "const_int_operand" "")) (use (match_operand 3 "immediate_operand" "")) - (clobber (scratch))] + (clobber (match_scratch 4))] "reload_completed" [(parallel [(set (match_dup 0) (match_dup 1)) @@ -3606,7 +3609,7 @@ (match_operand:BLK 1 "memory_operand" "")) (use (match_operand 2 "register_operand" "")) (use (match_operand 3 "memory_operand" "")) - (clobber (scratch))] + (clobber (match_scratch 4))] "reload_completed" [(parallel [(unspec [(match_dup 2) (match_dup 3) @@ -3620,14 +3623,14 @@ (match_operand:BLK 1 "memory_operand" "")) (use (match_operand 2 "register_operand" "")) (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN))) - (clobber (scratch))] + (clobber (match_scratch 3))] "TARGET_Z10 && reload_completed" [(parallel [(unspec [(match_dup 2) (const_int 0) - (label_ref (match_dup 3))] UNSPEC_EXECUTE) + (label_ref (match_dup 4))] UNSPEC_EXECUTE) (set (match_dup 0) (match_dup 1)) (use (const_int 1))])] - "operands[3] = gen_label_rtx ();") + "operands[4] = gen_label_rtx ();") (define_split [(set (match_operand:BLK 0 "memory_operand" "") @@ -3849,7 +3852,7 @@ (const_int 0)) (use (match_operand 1 "const_int_operand" "")) (use (match_operand 2 "immediate_operand" "")) - (clobber (scratch)) + (clobber (match_scratch 3)) (clobber (reg:CC CC_REGNUM))] "reload_completed" [(parallel @@ -3863,7 +3866,7 @@ (const_int 0)) (use (match_operand 1 "register_operand" "")) (use (match_operand 2 "memory_operand" "")) - (clobber (scratch)) + (clobber (match_scratch 3)) (clobber (reg:CC CC_REGNUM))] "reload_completed" [(parallel @@ -3879,7 +3882,7 @@ (const_int 0)) (use (match_operand 1 "register_operand" "")) (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN))) - (clobber (scratch)) + (clobber (match_scratch 2)) (clobber (reg:CC CC_REGNUM))] "TARGET_Z10 && reload_completed" [(parallel @@ -4044,7 +4047,7 @@ (match_operand:BLK 1 "memory_operand" ""))) (use (match_operand 2 "const_int_operand" "")) (use (match_operand 3 "immediate_operand" "")) - (clobber (scratch))] + (clobber (match_scratch 4))] "reload_completed" [(parallel [(set (reg:CCU CC_REGNUM) (compare:CCU (match_dup 0) (match_dup 1))) @@ -4057,7 +4060,7 @@ (match_operand:BLK 1 "memory_operand" ""))) (use (match_operand 2 "register_operand" "")) (use (match_operand 3 "memory_operand" "")) - (clobber (scratch))] + (clobber (match_scratch 4))] "reload_completed" [(parallel [(unspec [(match_dup 2) (match_dup 3) @@ -4072,7 +4075,7 @@ (match_operand:BLK 1 "memory_operand" ""))) (use (match_operand 2 "register_operand" "")) (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN))) - (clobber (scratch))] + (clobber (match_scratch 3))] "TARGET_Z10 && reload_completed" [(parallel [(unspec [(match_dup 2) (const_int 0) @@ -4940,7 +4943,7 @@ (unspec:DI [(match_operand:DI 1 "register_operand" "d") (match_operand:DI 2 "register_operand" "d")] UNSPEC_BDEPG))] - "TARGET_ARCH15 && TARGET_64BIT" + "TARGET_Z17 && TARGET_64BIT" "bdepg\t%0,%1,%2" [(set_attr "op_type" "RRF")]) @@ -4953,7 +4956,7 @@ (unspec:DI [(match_operand:DI 1 "register_operand" "d") (match_operand:DI 2 "register_operand" "d")] UNSPEC_BEXTG))] - "TARGET_ARCH15 && TARGET_64BIT" + "TARGET_Z17 && TARGET_64BIT" "bextg\t%0,%1,%2" [(set_attr "op_type" "RRF")]) @@ -9580,7 +9583,7 @@ (clz:DI (match_operand:DI 1 "register_operand" "d")))] "TARGET_EXTIMM && TARGET_ZARCH" { - if (!(TARGET_ARCH15 && TARGET_64BIT)) + if (!(TARGET_Z17 && TARGET_64BIT)) { rtx_insn *insn; rtx clz_equal; @@ -9601,7 +9604,7 @@ (define_insn "*clzg" [(set (match_operand:DI 0 "register_operand" "=d") (clz:DI (match_operand:DI 1 "register_operand" "d")))] - "TARGET_ARCH15 && TARGET_64BIT" + "TARGET_Z17 && TARGET_64BIT" "clzg\t%0,%1" [(set_attr "op_type" "RRE")]) @@ -9630,7 +9633,7 @@ (define_insn "ctzdi2" [(set (match_operand:DI 0 "register_operand" "=d") (ctz:DI (match_operand:DI 1 "register_operand" "d")))] - "TARGET_ARCH15 && TARGET_64BIT" + "TARGET_Z17 && TARGET_64BIT" "ctzg\t%0,%1" [(set_attr "op_type" "RRE")]) diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt index f064597..6753a93 100644 --- a/gcc/config/s390/s390.opt +++ b/gcc/config/s390/s390.opt @@ -122,7 +122,10 @@ EnumValue Enum(processor_type) String(z16) Value(PROCESSOR_3931_Z16) EnumValue -Enum(processor_type) String(arch15) Value(PROCESSOR_ARCH15) +Enum(processor_type) String(arch15) Value(PROCESSOR_9175_Z17) + +EnumValue +Enum(processor_type) String(z17) Value(PROCESSOR_9175_Z17) EnumValue Enum(processor_type) String(native) Value(PROCESSOR_NATIVE) DriverOnly diff --git a/gcc/config/sh/sh-modes.def b/gcc/config/sh/sh-modes.def index 80650b4..e31ae69 100644 --- a/gcc/config/sh/sh-modes.def +++ b/gcc/config/sh/sh-modes.def @@ -17,6 +17,12 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ +/* SH has the same reversed quiet bit as MIPS. */ +RESET_FLOAT_FORMAT (SF, mips_single_format); +RESET_FLOAT_FORMAT (DF, mips_double_format); +/* TFmode: IEEE quad floating point (software). */ +FLOAT_MODE (TF, 16, mips_quad_format); + /* Vector modes. */ VECTOR_MODE (INT, QI, 2); /* V2QI */ VECTOR_MODES (INT, 4); /* V4QI V2HI */ |