diff options
Diffstat (limited to 'gcc/config')
56 files changed, 1556 insertions, 267 deletions
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index d1e2ab9..98337b7 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -293,6 +293,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) aarch64_def_or_undef (TARGET_SME2, "__ARM_FEATURE_SME2", pfile); aarch64_def_or_undef (AARCH64_HAVE_ISA (SME2p1), "__ARM_FEATURE_SME2p1", pfile); + aarch64_def_or_undef (TARGET_FAMINMAX, "__ARM_FEATURE_FAMINMAX", pfile); /* Not for ACLE, but required to keep "float.h" correct if we switch target between implementations that do or do not support ARMv8.2-A diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index 0e22d72..1209630 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -173,6 +173,22 @@ AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, V8_2A, (F AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, (), cortexa53, 0x41, 0xd15, -1) AARCH64_CORE("cortex-r82ae", cortexr82ae, cortexa53, V8R, (), cortexa53, 0x41, 0xd14, -1) +/* Apple (A12 and M) cores. + Known part numbers as listed in other public sources. + Placeholders for schedulers, generic_armv8_a for costs. + A12 seems mostly 8.3, M1 is 8.5 without BTI, M2 and M3 are 8.6 + From measurements made so far the odd-number core IDs are performance. */ +AARCH64_CORE("apple-a12", applea12, cortexa53, V8_3A, (), generic_armv8_a, 0x61, 0x12, -1) +AARCH64_CORE("apple-m1", applem1_0, cortexa57, V8_5A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x21, 0x20), -1) +AARCH64_CORE("apple-m1", applem1_1, cortexa57, V8_5A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x23, 0x22), -1) +AARCH64_CORE("apple-m1", applem1_2, cortexa57, V8_5A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x25, 0x24), -1) +AARCH64_CORE("apple-m1", applem1_3, cortexa57, V8_5A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x29, 0x28), -1) +AARCH64_CORE("apple-m2", applem2_0, cortexa57, V8_6A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x31, 0x30), -1) +AARCH64_CORE("apple-m2", applem2_1, cortexa57, V8_6A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x33, 0x32), -1) +AARCH64_CORE("apple-m2", applem2_2, cortexa57, V8_6A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x35, 0x34), -1) +AARCH64_CORE("apple-m2", applem2_3, cortexa57, V8_6A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x39, 0x38), -1) +AARCH64_CORE("apple-m3", applem3_0, cortexa57, V8_6A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x49, 0x48), -1) + /* Armv9.0-A Architecture Processors. */ /* Arm ('A') cores. */ @@ -208,7 +224,7 @@ AARCH64_CORE("neoverse-v3ae", neoversev3ae, cortexa57, V9_2A, (SVE2_BITPERM, RNG AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) /* NVIDIA ('N') cores. */ -AARCH64_CORE("olympus", olympus, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE, FAMINMAX, FP8DOT2, LUT, SVE2_AES, SVE2_SHA3, SVE2_SM4), neoversev3, 0x4e, 0x10, -1) +AARCH64_CORE("olympus", olympus, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE, FAMINMAX, FP8FMA, FP8DOT2, FP8DOT4, LUT, SVE2_AES, SVE2_SHA3, SVE2_SM4), neoversev3, 0x4e, 0x10, -1) /* Generic Architecture Processors. */ AARCH64_CORE("generic", generic, cortexa53, V8A, (), generic, 0x0, 0x0, -1) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 8f44aea..1ca86c9 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1260,6 +1260,7 @@ void aarch64_restore_za (rtx); void aarch64_expand_crc_using_pmull (scalar_mode, scalar_mode, rtx *); void aarch64_expand_reversed_crc_using_pmull (scalar_mode, scalar_mode, rtx *); +void aarch64_expand_fp_spaceship (rtx, rtx, rtx, rtx); extern bool aarch64_gcs_enabled (); diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 44e4807..3651926 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -5174,7 +5174,11 @@ bool verify_type_context (location_t loc, type_context_kind context, const_tree type, bool silent_p) { - if (!sizeless_type_p (type)) + const_tree tmp = type; + if (omp_type_context (context) && POINTER_TYPE_P (type)) + tmp = strip_pointer_types (tmp); + + if (!sizeless_type_p (tmp)) return true; switch (context) @@ -5234,6 +5238,37 @@ verify_type_context (location_t loc, type_context_kind context, if (!silent_p) error_at (loc, "capture by copy of SVE type %qT", type); return false; + + case TCTX_OMP_MAP: + if (!silent_p) + error_at (loc, "SVE type %qT not allowed in %<map%> clause", type); + return false; + + case TCTX_OMP_MAP_IMP_REF: + if (!silent_p) + error ("cannot reference %qT object types in %<target%> region", type); + return false; + + case TCTX_OMP_PRIVATE: + if (!silent_p) + error_at (loc, "SVE type %qT not allowed in" + " %<target%> %<private%> clause", type); + return false; + + case TCTX_OMP_FIRSTPRIVATE: + if (!silent_p) + error_at (loc, "SVE type %qT not allowed in" + " %<target%> %<firstprivate%> clause", type); + return false; + + case TCTX_OMP_DEVICE_ADDR: + if (!silent_p) + error_at (loc, "SVE type %qT not allowed in" + " %<target%> device clauses", type); + return false; + + default: + break; } gcc_unreachable (); } diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 3dbd659..d4af370 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3133,9 +3133,9 @@ "TARGET_SVE" { rtx tmp = gen_reg_rtx (<MODE>mode); - emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1], - CONST1_RTX (<MODE>mode), - CONST0_RTX (<MODE>mode))); + emit_insn (gen_vcond_mask_<mode><vpred> (tmp, CONST1_RTX (<MODE>mode), + CONST0_RTX (<MODE>mode), + operands[1])); emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2])); DONE; } diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md index 56a914f..982074c 100644 --- a/gcc/config/aarch64/aarch64-tune.md +++ b/gcc/config/aarch64/aarch64-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from aarch64-cores.def (define_attr "tune" - "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexr82ae,cortexa510,cortexa520,cortexa520ae,cortexa710,cortexa715,cortexa720,cortexa720ae,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,demeter,olympus,generic,generic_armv8_a,generic_armv9_a" + "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexr82ae,applea12,applem1_0,applem1_1,applem1_2,applem1_3,applem2_0,applem2_1,applem2_2,applem2_3,applem3_0,cortexa510,cortexa520,cortexa520ae,cortexa710,cortexa715,cortexa720,cortexa720ae,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,demeter,olympus,generic,generic_armv8_a,generic_armv9_a" (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 4e80114..f7bccf5 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -9417,13 +9417,16 @@ aarch64_emit_stack_tie (rtx reg) } /* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch - registers. If POLY_SIZE is not large enough to require a probe this function - will only adjust the stack. When allocating the stack space - FRAME_RELATED_P is then used to indicate if the allocation is frame related. - FINAL_ADJUSTMENT_P indicates whether we are allocating the area below - the saved registers. If we are then we ensure that any allocation - larger than the ABI defined buffer needs a probe so that the - invariant of having a 1KB buffer is maintained. + registers, given that the stack pointer is currently BYTES_BELOW_SP bytes + above the bottom of the static frame. + + If POLY_SIZE is not large enough to require a probe this function will only + adjust the stack. When allocating the stack space FRAME_RELATED_P is then + used to indicate if the allocation is frame related. FINAL_ADJUSTMENT_P + indicates whether we are allocating the area below the saved registers. + If we are then we ensure that any allocation larger than the ABI defined + buffer needs a probe so that the invariant of having a 1KB buffer is + maintained. We emit barriers after each stack adjustment to prevent optimizations from breaking the invariant that we never drop the stack more than a page. This @@ -9440,6 +9443,7 @@ aarch64_emit_stack_tie (rtx reg) static void aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, poly_int64 poly_size, + poly_int64 bytes_below_sp, aarch64_isa_mode force_isa_mode, bool frame_related_p, bool final_adjustment_p) @@ -9503,8 +9507,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, poly_size, temp1, temp2, force_isa_mode, false, true); - rtx_insn *insn = get_last_insn (); - + auto initial_cfa_offset = frame.frame_size - bytes_below_sp; + auto final_cfa_offset = initial_cfa_offset + poly_size; if (frame_related_p) { /* This is done to provide unwinding information for the stack @@ -9514,28 +9518,31 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, The tie will expand to nothing but the optimizers will not touch the instruction. */ rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM); - emit_move_insn (stack_ptr_copy, stack_pointer_rtx); + auto *insn = emit_move_insn (stack_ptr_copy, stack_pointer_rtx); aarch64_emit_stack_tie (stack_ptr_copy); /* We want the CFA independent of the stack pointer for the duration of the loop. */ - add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy); + add_reg_note (insn, REG_CFA_DEF_CFA, + plus_constant (Pmode, stack_ptr_copy, + initial_cfa_offset)); RTX_FRAME_RELATED_P (insn) = 1; } rtx probe_const = gen_int_mode (min_probe_threshold, Pmode); rtx guard_const = gen_int_mode (guard_size, Pmode); - insn = emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx, - stack_pointer_rtx, temp1, - probe_const, guard_const)); + auto *insn + = emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx, + stack_pointer_rtx, temp1, + probe_const, guard_const)); /* Now reset the CFA register if needed. */ if (frame_related_p) { add_reg_note (insn, REG_CFA_DEF_CFA, - gen_rtx_PLUS (Pmode, stack_pointer_rtx, - gen_int_mode (poly_size, Pmode))); + plus_constant (Pmode, stack_pointer_rtx, + final_cfa_offset)); RTX_FRAME_RELATED_P (insn) = 1; } @@ -9581,12 +9588,13 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, We can determine which allocation we are doing by looking at the value of FRAME_RELATED_P since the final allocations are not frame related. */ + auto cfa_offset = frame.frame_size - (bytes_below_sp - rounded_size); if (frame_related_p) { /* We want the CFA independent of the stack pointer for the duration of the loop. */ add_reg_note (insn, REG_CFA_DEF_CFA, - plus_constant (Pmode, temp1, rounded_size)); + plus_constant (Pmode, temp1, cfa_offset)); RTX_FRAME_RELATED_P (insn) = 1; } @@ -9608,7 +9616,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, if (frame_related_p) { add_reg_note (insn, REG_CFA_DEF_CFA, - plus_constant (Pmode, stack_pointer_rtx, rounded_size)); + plus_constant (Pmode, stack_pointer_rtx, cfa_offset)); RTX_FRAME_RELATED_P (insn) = 1; } @@ -9916,17 +9924,22 @@ aarch64_expand_prologue (void) code below does not handle it for -fstack-clash-protection. */ gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0); + /* The offset of the current SP from the bottom of the static frame. */ + poly_int64 bytes_below_sp = frame_size; + /* Will only probe if the initial adjustment is larger than the guard less the amount of the guard reserved for use by the caller's outgoing args. */ aarch64_allocate_and_probe_stack_space (tmp0_rtx, tmp1_rtx, initial_adjust, - force_isa_mode, true, false); + bytes_below_sp, force_isa_mode, + true, false); + bytes_below_sp -= initial_adjust; if (callee_adjust != 0) - aarch64_push_regs (reg1, reg2, callee_adjust); - - /* The offset of the current SP from the bottom of the static frame. */ - poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust; + { + aarch64_push_regs (reg1, reg2, callee_adjust); + bytes_below_sp -= callee_adjust; + } if (emit_frame_chain) { @@ -9994,7 +10007,7 @@ aarch64_expand_prologue (void) || known_eq (frame.reg_offset[VG_REGNUM], bytes_below_sp)); aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, sve_callee_adjust, - force_isa_mode, + bytes_below_sp, force_isa_mode, !frame_pointer_needed, false); bytes_below_sp -= sve_callee_adjust; } @@ -10005,10 +10018,11 @@ aarch64_expand_prologue (void) /* We may need to probe the final adjustment if it is larger than the guard that is assumed by the called. */ - gcc_assert (known_eq (bytes_below_sp, final_adjust)); aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust, - force_isa_mode, + bytes_below_sp, force_isa_mode, !frame_pointer_needed, true); + bytes_below_sp -= final_adjust; + gcc_assert (known_eq (bytes_below_sp, 0)); if (emit_frame_chain && maybe_ne (final_adjust, 0)) aarch64_emit_stack_tie (hard_frame_pointer_rtx); @@ -31073,8 +31087,6 @@ aarch64_valid_sysreg_name_p (const char *regname) const sysreg_t *sysreg = aarch64_lookup_sysreg_map (regname); if (sysreg == NULL) return aarch64_is_implem_def_reg (regname); - if (sysreg->arch_reqs) - return bool (aarch64_isa_flags & sysreg->arch_reqs); return true; } @@ -31098,8 +31110,6 @@ aarch64_retrieve_sysreg (const char *regname, bool write_p, bool is128op) if ((write_p && (sysreg->properties & F_REG_READ)) || (!write_p && (sysreg->properties & F_REG_WRITE))) return NULL; - if ((~aarch64_isa_flags & sysreg->arch_reqs) != 0) - return NULL; return sysreg->encoding; } @@ -31298,6 +31308,79 @@ aarch64_expand_reversed_crc_using_pmull (scalar_mode crc_mode, } } +/* Expand the spaceship optab for floating-point operands. + + If the result is compared against (-1, 0, 1 , 2), expand into + fcmpe + conditional branch insns. + + Otherwise (the result is just stored as an integer), expand into + fcmpe + a sequence of conditional select/increment/invert insns. */ +void +aarch64_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx hint) +{ + rtx cc_reg = gen_rtx_REG (CCFPEmode, CC_REGNUM); + emit_set_insn (cc_reg, gen_rtx_COMPARE (CCFPEmode, op0, op1)); + + rtx cc_gt = gen_rtx_GT (VOIDmode, cc_reg, const0_rtx); + rtx cc_lt = gen_rtx_LT (VOIDmode, cc_reg, const0_rtx); + rtx cc_un = gen_rtx_UNORDERED (VOIDmode, cc_reg, const0_rtx); + + if (hint == const0_rtx) + { + rtx un_label = gen_label_rtx (); + rtx lt_label = gen_label_rtx (); + rtx gt_label = gen_label_rtx (); + rtx end_label = gen_label_rtx (); + + rtx temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_un, + gen_rtx_LABEL_REF (Pmode, un_label), pc_rtx); + aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, temp)); + + temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_lt, + gen_rtx_LABEL_REF (Pmode, lt_label), pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, temp)); + + temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_gt, + gen_rtx_LABEL_REF (Pmode, gt_label), pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, temp)); + + /* Equality. */ + emit_move_insn (dest, const0_rtx); + emit_jump (end_label); + + emit_label (un_label); + emit_move_insn (dest, const2_rtx); + emit_jump (end_label); + + emit_label (gt_label); + emit_move_insn (dest, const1_rtx); + emit_jump (end_label); + + emit_label (lt_label); + emit_move_insn (dest, constm1_rtx); + + emit_label (end_label); + } + else + { + rtx temp0 = gen_reg_rtx (SImode); + rtx temp1 = gen_reg_rtx (SImode); + rtx cc_ungt = gen_rtx_UNGT (VOIDmode, cc_reg, const0_rtx); + + /* The value of hint is stored if the operands are unordered. */ + rtx temp_un = gen_int_mode (UINTVAL (hint) - 1, SImode); + if (!aarch64_reg_zero_or_m1_or_1 (temp_un, SImode)) + temp_un = force_reg (SImode, temp_un); + + emit_set_insn (temp0, gen_rtx_IF_THEN_ELSE (SImode, cc_lt, + constm1_rtx, const0_rtx)); + emit_set_insn (temp1, gen_rtx_IF_THEN_ELSE (SImode, cc_un, + temp_un, const0_rtx)); + emit_set_insn (dest, gen_rtx_IF_THEN_ELSE (SImode, cc_ungt, + gen_rtx_PLUS (SImode, temp1, const1_rtx), temp0)); + } +} + /* Target-specific selftests. */ #if CHECKING_P diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 031e621..c678f7a 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -707,11 +707,12 @@ ) (define_expand "cbranch<mode>4" - [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" - [(match_operand:GPF 1 "register_operand") - (match_operand:GPF 2 "aarch64_fp_compare_operand")]) - (label_ref (match_operand 3 "" "")) - (pc)))] + [(set (pc) (if_then_else + (match_operator 0 "aarch64_comparison_operator" + [(match_operand:GPF_F16 1 "register_operand") + (match_operand:GPF_F16 2 "aarch64_fp_compare_operand")]) + (label_ref (match_operand 3 "" "")) + (pc)))] "" " operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1], @@ -4337,26 +4338,28 @@ (define_insn "fcmp<mode>" [(set (reg:CCFP CC_REGNUM) - (compare:CCFP (match_operand:GPF 0 "register_operand") - (match_operand:GPF 1 "aarch64_fp_compare_operand")))] + (compare:CCFP + (match_operand:GPF_F16 0 "register_operand") + (match_operand:GPF_F16 1 "aarch64_fp_compare_operand")))] "TARGET_FLOAT" {@ [ cons: 0 , 1 ] [ w , Y ] fcmp\t%<s>0, #0.0 [ w , w ] fcmp\t%<s>0, %<s>1 } - [(set_attr "type" "fcmp<s>")] + [(set_attr "type" "fcmp<stype>")] ) (define_insn "fcmpe<mode>" [(set (reg:CCFPE CC_REGNUM) - (compare:CCFPE (match_operand:GPF 0 "register_operand") - (match_operand:GPF 1 "aarch64_fp_compare_operand")))] + (compare:CCFPE + (match_operand:GPF_F16 0 "register_operand") + (match_operand:GPF_F16 1 "aarch64_fp_compare_operand")))] "TARGET_FLOAT" {@ [ cons: 0 , 1 ] [ w , Y ] fcmpe\t%<s>0, #0.0 [ w , w ] fcmpe\t%<s>0, %<s>1 } - [(set_attr "type" "fcmp<s>")] + [(set_attr "type" "fcmp<stype>")] ) (define_insn "*cmp_swp_<shift>_reg<mode>" @@ -4392,6 +4395,49 @@ [(set_attr "type" "alus_ext")] ) +;; <=> operator pattern (integer) +;; (a == b) ? 0 : (a < b) ? -1 : 1. +(define_expand "spaceship<mode>4" + [(match_operand:SI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (match_operand:SI 3 "const_int_operand")] + "" + { + // 1 indicates unsigned comparison, -1 indicates signed. + gcc_assert (operands[3] == constm1_rtx || operands[3] == const1_rtx); + + rtx cc_reg = aarch64_gen_compare_reg (EQ, operands[1], operands[2]); + RTX_CODE code_gt = operands[3] == const1_rtx ? GTU : GT; + RTX_CODE code_lt = operands[3] == const1_rtx ? LTU : LT; + + rtx cc_gt = gen_rtx_fmt_ee (code_gt, VOIDmode, cc_reg, const0_rtx); + rtx cc_lt = gen_rtx_fmt_ee (code_lt, VOIDmode, cc_reg, const0_rtx); + + rtx temp = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (temp, gen_rtx_IF_THEN_ELSE (SImode, cc_gt, + const1_rtx, const0_rtx))); + emit_insn (gen_rtx_SET (operands[0], gen_rtx_IF_THEN_ELSE (SImode, cc_lt, + constm1_rtx, temp))); + DONE; + } +) + +;; <=> operator pattern (floating-point) +;; (a == b) ? 0 : (a < b) ? -1 : (a > b) ? 1 : UNORDERED. +(define_expand "spaceship<mode>4" + [(match_operand:SI 0 "register_operand") + (match_operand:GPF 1 "register_operand") + (match_operand:GPF 2 "register_operand") + (match_operand:SI 3 "const_int_operand")] + "TARGET_FLOAT" + { + aarch64_expand_fp_spaceship (operands[0], operands[1], operands[2], + operands[3]); + DONE; + } +) + ;; ------------------------------------------------------------------- ;; Store-flag and conditional select insns ;; ------------------------------------------------------------------- @@ -4424,8 +4470,8 @@ (define_expand "cstore<mode>4" [(set (match_operand:SI 0 "register_operand") (match_operator:SI 1 "aarch64_comparison_operator_mode" - [(match_operand:GPF 2 "register_operand") - (match_operand:GPF 3 "aarch64_fp_compare_operand")]))] + [(match_operand:GPF_F16 2 "register_operand") + (match_operand:GPF_F16 3 "aarch64_fp_compare_operand")]))] "" " operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc index ba470d9..14e7da5 100644 --- a/gcc/config/alpha/alpha.cc +++ b/gcc/config/alpha/alpha.cc @@ -4291,14 +4291,10 @@ alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o) tree mem = MEM_EXPR (expr); if (mem != NULL_TREE) - switch (TREE_CODE (mem)) - { - case MEM_REF: - tree_offset = mem_ref_offset (mem).force_shwi (); - tree_align = get_object_alignment (get_base_address (mem)); - break; + { + HOST_WIDE_INT comp_offset = 0; - case COMPONENT_REF: + for (; TREE_CODE (mem) == COMPONENT_REF; mem = TREE_OPERAND (mem, 0)) { tree byte_offset = component_ref_field_offset (mem); tree bit_offset = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (mem, 1)); @@ -4307,14 +4303,15 @@ alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o) || !poly_int_tree_p (byte_offset, &offset) || !tree_fits_shwi_p (bit_offset)) break; - tree_offset = offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT; + comp_offset += offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT; } - tree_align = get_object_alignment (get_base_address (mem)); - break; - default: - break; - } + if (TREE_CODE (mem) == MEM_REF) + { + tree_offset = comp_offset + mem_ref_offset (mem).force_shwi (); + tree_align = get_object_alignment (get_base_address (mem)); + } + } if (reg_align > mem_align) { diff --git a/gcc/config/c6x/c6x.h b/gcc/config/c6x/c6x.h index e7da250..50bad27 100644 --- a/gcc/config/c6x/c6x.h +++ b/gcc/config/c6x/c6x.h @@ -444,11 +444,9 @@ struct GTY(()) machine_function #define TARG_VEC_PERMUTE_COST 1 #endif -/* ttype entries (the only interesting data references used) are - sb-relative got-indirect (aka .ehtype). */ +/* .ehtype ttype entries are sb-relative. */ #define ASM_PREFERRED_EH_DATA_FORMAT(code, data) \ - (((code) == 0 && (data) == 1) ? (DW_EH_PE_datarel | DW_EH_PE_indirect) \ - : DW_EH_PE_absptr) + (((code) == 0 && (data) == 1) ? DW_EH_PE_datarel : DW_EH_PE_absptr) /* This should be the same as the definition in elfos.h, plus the call to output special unwinding directives. */ diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h index 8c164fd..9b9a3fe 100644 --- a/gcc/config/darwin.h +++ b/gcc/config/darwin.h @@ -504,6 +504,7 @@ extern GTY(()) int darwin_ms_struct; %{static|static-libgcc|static-libgfortran:%:replace-outfile(-lgfortran libgfortran.a%s)}\ %{static|static-libgcc|static-libquadmath:%:replace-outfile(-lquadmath libquadmath.a%s)}\ %{static|static-libgcc|static-libphobos:%:replace-outfile(-lgphobos libgphobos.a%s)}\ + %{static|static-libgcc|static-libgcobol:%:replace-outfile(-lgcobol libgcobol.a%s)}\ %{static|static-libgcc|static-libstdc++|static-libgfortran:%:replace-outfile(-lgomp libgomp.a%s)}\ %{static|static-libgcc|static-libstdc++:%:replace-outfile(-lstdc++ libstdc++.a%s)}\ %{static|static-libgm2:%:replace-outfile(-lm2pim libm2pim.a%s)}\ diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index d59e87b..91ce801 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -6587,8 +6587,8 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, if (avgpr % vgpr_block_size) avgpr += vgpr_block_size - (avgpr % vgpr_block_size); - fputs ("\t.rodata\n" - "\t.p2align\t6\n" + switch_to_section (readonly_data_section); + fputs ("\t.p2align\t6\n" "\t.amdhsa_kernel\t", file); assemble_name (file, name); fputs ("\n", file); @@ -6707,7 +6707,7 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, fputs (" .end_amdgpu_metadata\n", file); #endif - fputs ("\t.text\n", file); + switch_to_section (current_function_section ()); fputs ("\t.align\t256\n", file); fputs ("\t.type\t", file); assemble_name (file, name); diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index 695656f..e0fb735 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -1018,7 +1018,9 @@ [(const_int 0)] "" { - sorry ("exception handling not supported"); + if (!fake_exceptions) + sorry ("exception handling not supported"); + DONE; }) ;; }}} diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt index 142b439..99d6aeb 100644 --- a/gcc/config/gcn/gcn.opt +++ b/gcc/config/gcn/gcn.opt @@ -101,3 +101,11 @@ Enum(gcn_preferred_vectorization_factor) String(32) Value(32) EnumValue Enum(gcn_preferred_vectorization_factor) String(64) Value(64) + +mfake-exceptions +Target Var(fake_exceptions) Init(0) Undocumented +; With '-mfake-exceptions' enabled, the user-visible behavior in presence of +; exception handling constructs changes such that the compile-time +; 'sorry, unimplemented: exception handling not supported' is skipped, code +; generation proceeds, and instead, exception handling constructs 'abort' at +; run time. (..., or don't, if they're in dead code.) diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc index f5b89c9..b284ff4 100644 --- a/gcc/config/gcn/mkoffload.cc +++ b/gcc/config/gcn/mkoffload.cc @@ -1160,6 +1160,9 @@ main (int argc, char **argv) obstack_ptr_grow (&cc_argv_obstack, "-xlto"); if (fopenmp) obstack_ptr_grow (&cc_argv_obstack, "-mgomp"); + /* The host code may contain exception handling constructs. + Handle these as good as we can. */ + obstack_ptr_grow (&cc_argv_obstack, "-mfake-exceptions"); for (int ix = 1; ix != argc; ix++) { diff --git a/gcc/config/h8300/jumpcall.md b/gcc/config/h8300/jumpcall.md index b596399..4e63408 100644 --- a/gcc/config/h8300/jumpcall.md +++ b/gcc/config/h8300/jumpcall.md @@ -146,9 +146,9 @@ (define_insn_and_split "" [(set (pc) (if_then_else (match_operator 3 "eqne_operator" - [(zero_extract:QHSI (match_operand:QHSI 1 "register_operand" "r") - (const_int 1) - (match_operand 2 "const_int_operand" "n")) + [(zero_extract:HSI (match_operand:HSI 1 "register_operand" "r") + (const_int 1) + (match_operand 2 "const_int_operand" "n")) (const_int 0)]) (label_ref (match_operand 0 "" "")) (pc)))] @@ -156,7 +156,7 @@ "#" "&& reload_completed" [(set (reg:CCZ CC_REG) - (eq (zero_extract:QHSI (match_dup 1) (const_int 1) (match_dup 2)) + (eq (zero_extract:HSI (match_dup 1) (const_int 1) (match_dup 2)) (const_int 0))) (set (pc) (if_then_else (match_op_dup 3 [(reg:CCZ CC_REG) (const_int 0)]) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index cdfd94d..a314800 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -4138,6 +4138,10 @@ ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, return false; mode = GET_MODE (dest); + if (immediate_operand (if_false, mode)) + if_false = force_reg (mode, if_false); + if (immediate_operand (if_true, mode)) + if_true = force_reg (mode, if_true); /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, but MODE may be a vector mode and thus not appropriate. */ @@ -4687,6 +4691,8 @@ ix86_expand_fp_movcc (rtx operands[]) compare_op = ix86_expand_compare (NE, tmp, const0_rtx); } + operands[2] = force_reg (mode, operands[2]); + operands[3] = force_reg (mode, operands[3]); emit_insn (gen_rtx_SET (operands[0], gen_rtx_IF_THEN_ELSE (mode, compare_op, operands[2], operands[3]))); @@ -19256,8 +19262,6 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) e1 = gen_reg_rtx (mode); x1 = gen_reg_rtx (mode); - /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */ - b = force_reg (mode, b); /* x0 = rcp(b) estimate */ @@ -19270,20 +19274,42 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), UNSPEC_RCP))); - /* e0 = x0 * b */ - emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b))); + unsigned vector_size = GET_MODE_SIZE (mode); + + /* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a + N-R step with 2 fma implementation. */ + if (TARGET_FMA + || (TARGET_AVX512F && vector_size == 64) + || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))) + { + /* e0 = x0 * a */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a))); + /* e1 = e0 * b - a */ + emit_insn (gen_rtx_SET (e1, gen_rtx_FMA (mode, e0, b, + gen_rtx_NEG (mode, a)))); + /* res = - e1 * x0 + e0 */ + emit_insn (gen_rtx_SET (res, gen_rtx_FMA (mode, + gen_rtx_NEG (mode, e1), + x0, e0))); + } + else + /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */ + { + /* e0 = x0 * b */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b))); - /* e0 = x0 * e0 */ - emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0))); + /* e1 = x0 + x0 */ + emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0))); - /* e1 = x0 + x0 */ - emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0))); + /* e0 = x0 * e0 */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0))); - /* x1 = e1 - e0 */ - emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0))); + /* x1 = e1 - e0 */ + emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0))); - /* res = a * x1 */ - emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); + /* res = a * x1 */ + emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); + } } /* Output code to perform a Newton-Rhapson approximation of a diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index a9fac01..964449f 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -2828,8 +2828,8 @@ ix86_option_override_internal (bool main_args_p, if (flag_nop_mcount) error ("%<-mnop-mcount%> is not compatible with this target"); #endif - if (flag_nop_mcount && flag_pic) - error ("%<-mnop-mcount%> is not implemented for %<-fPIC%>"); + if (flag_nop_mcount && flag_pic && !flag_plt) + error ("%<-mnop-mcount%> is not implemented for %<-fno-plt%>"); /* Accept -msseregparm only if at least SSE support is enabled. */ if (TARGET_SSEREGPARM_P (opts->x_target_flags) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 4f8380c4..78df3d9 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -100,6 +100,7 @@ along with GCC; see the file COPYING3. If not see #include "i386-features.h" #include "function-abi.h" #include "rtl-error.h" +#include "gimple-pretty-print.h" /* This file should be included last. */ #include "target-def.h" @@ -458,6 +459,9 @@ int ix86_arch_specified; indirect thunk pushes the return address onto stack, destroying red-zone. + NB: Don't use red-zone for functions with no_caller_saved_registers + and 32 GPRs since 128-byte red-zone is too small for 31 GPRs. + TODO: If we can reserve the first 2 WORDs, for PUSH and, another for CALL, in red-zone, we can allow local indirect jumps with indirect thunk. */ @@ -467,6 +471,9 @@ ix86_using_red_zone (void) { return (TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI + && (!TARGET_APX_EGPR + || (cfun->machine->call_saved_registers + != TYPE_NO_CALLER_SAVED_REGISTERS)) && (!cfun->machine->has_local_indirect_jump || cfun->machine->indirect_branch_type == indirect_branch_keep)); } @@ -21810,6 +21817,25 @@ ix86_insn_cost (rtx_insn *insn, bool speed) return insn_cost + pattern_cost (PATTERN (insn), speed); } +/* Return cost of SSE/AVX FP->FP conversion (extensions and truncates). */ + +static int +vec_fp_conversion_cost (const struct processor_costs *cost, int size) +{ + if (size < 128) + return cost->cvtss2sd; + else if (size < 256) + { + if (TARGET_SSE_SPLIT_REGS) + return cost->cvtss2sd * size / 64; + return cost->cvtss2sd; + } + if (size < 512) + return cost->vcvtps2pd256; + else + return cost->vcvtps2pd512; +} + /* Compute a (partial) cost for rtx X. Return true if the complete cost has been computed, and false if subexpressions should be scanned. In either case, *TOTAL contains the cost result. */ @@ -22473,17 +22499,18 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, return false; case FLOAT_EXTEND: + /* x87 represents all values extended to 80bit. */ if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) *total = 0; else - *total = ix86_vec_cost (mode, cost->addss); + *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode)); return false; case FLOAT_TRUNCATE: if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) *total = cost->fadd; else - *total = ix86_vec_cost (mode, cost->addss); + *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode)); return false; case ABS: @@ -23158,6 +23185,12 @@ x86_print_call_or_nop (FILE *file, const char *target) if (flag_nop_mcount || !strcmp (target, "nop")) /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); + else if (!TARGET_PECOFF && flag_pic) + { + gcc_assert (flag_plt); + + fprintf (file, "1:\tcall\t%s@PLT\n", target); + } else fprintf (file, "1:\tcall\t%s\n", target); } @@ -23321,7 +23354,7 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) break; case CM_SMALL_PIC: case CM_MEDIUM_PIC: - if (!ix86_direct_extern_access) + if (!flag_plt) { if (ASSEMBLER_DIALECT == ASM_INTEL) fprintf (file, "1:\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n", @@ -23352,7 +23385,9 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n", LPREFIX, labelno); #endif - if (ASSEMBLER_DIALECT == ASM_INTEL) + if (flag_plt) + x86_print_call_or_nop (file, mcount_name); + else if (ASSEMBLER_DIALECT == ASM_INTEL) fprintf (file, "1:\tcall\t[DWORD PTR %s@GOT[ebx]]\n", mcount_name); else fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name); @@ -24669,7 +24704,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, switch (type_of_cost) { case scalar_stmt: - return fp ? ix86_cost->addss : COSTS_N_INSNS (1); + return fp ? ix86_cost->addss : COSTS_N_INSNS (1); case scalar_load: /* load/store costs are relative to register move which is 2. Recompute @@ -24740,7 +24775,11 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, return ix86_cost->cond_not_taken_branch_cost; case vec_perm: + return ix86_vec_cost (mode, ix86_cost->sse_op); + case vec_promote_demote: + if (fp) + return vec_fp_conversion_cost (ix86_tune_cost, mode); return ix86_vec_cost (mode, ix86_cost->sse_op); case vec_construct: @@ -25261,7 +25300,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, else if (X87_FLOAT_MODE_P (mode)) stmt_cost = ix86_cost->fadd; else - stmt_cost = ix86_cost->add; + stmt_cost = ix86_cost->add; } else stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss @@ -25316,7 +25355,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, (subcode == RSHIFT_EXPR && !TYPE_UNSIGNED (TREE_TYPE (op1))) ? ASHIFTRT : LSHIFTRT, mode, - TREE_CODE (op2) == INTEGER_CST, + TREE_CODE (op2) == INTEGER_CST, cst_and_fits_in_hwi (op2) ? int_cst_value (op2) : -1, false, false, NULL, NULL); @@ -25325,27 +25364,152 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, case NOP_EXPR: /* Only sign-conversions are free. */ if (tree_nop_conversion_p - (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)), + (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)), TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) stmt_cost = 0; + else if (fp) + stmt_cost = vec_fp_conversion_cost + (ix86_tune_cost, GET_MODE_BITSIZE (mode)); + break; + + case COND_EXPR: + { + /* SSE2 conditinal move sequence is: + pcmpgtd %xmm5, %xmm0 (accounted separately) + pand %xmm0, %xmm2 + pandn %xmm1, %xmm0 + por %xmm2, %xmm0 + while SSE4 uses cmp + blend + and AVX512 masked moves. + + The condition is accounted separately since we usually have + p = a < b + c = p ? x : y + and we will account first statement as setcc. Exception is when + p is loaded from memory as bool and then we will not acocunt + the compare, but there is no way to check for this. */ + + int ninsns = TARGET_SSE4_1 ? 1 : 3; + + /* If one of parameters is 0 or -1 the sequence will be simplified: + (if_true & mask) | (if_false & ~mask) -> if_true & mask */ + if (ninsns > 1 + && (zerop (gimple_assign_rhs2 (stmt_info->stmt)) + || zerop (gimple_assign_rhs3 (stmt_info->stmt)) + || integer_minus_onep + (gimple_assign_rhs2 (stmt_info->stmt)) + || integer_minus_onep + (gimple_assign_rhs3 (stmt_info->stmt)))) + ninsns = 1; + + if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + stmt_cost = ninsns * ix86_cost->sse_op; + else if (X87_FLOAT_MODE_P (mode)) + /* x87 requires conditional branch. We don't have cost for + that. */ + ; + else if (VECTOR_MODE_P (mode)) + stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op); + else + /* compare (accounted separately) + cmov. */ + stmt_cost = ix86_cost->add; + } break; - case BIT_IOR_EXPR: - case ABS_EXPR: - case ABSU_EXPR: case MIN_EXPR: case MAX_EXPR: + if (fp) + { + if (X87_FLOAT_MODE_P (mode)) + /* x87 requires conditional branch. We don't have cost for + that. */ + ; + else + /* minss */ + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + } + else + { + if (VECTOR_MODE_P (mode)) + { + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + /* vpmin was introduced in SSE3. + SSE2 needs pcmpgtd + pand + pandn + pxor. + If one of parameters is 0 or -1 the sequence is simplified + to pcmpgtd + pand. */ + if (!TARGET_SSSE3) + { + if (zerop (gimple_assign_rhs2 (stmt_info->stmt)) + || integer_minus_onep + (gimple_assign_rhs2 (stmt_info->stmt))) + stmt_cost *= 2; + else + stmt_cost *= 4; + } + } + else + /* cmp + cmov. */ + stmt_cost = ix86_cost->add * 2; + } + break; + + case ABS_EXPR: + case ABSU_EXPR: + if (fp) + { + if (X87_FLOAT_MODE_P (mode)) + /* fabs. */ + stmt_cost = ix86_cost->fabs; + else + /* andss of sign bit. */ + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + } + else + { + if (VECTOR_MODE_P (mode)) + { + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + /* vabs was introduced in SSE3. + SSE3 uses psrat + pxor + psub. */ + if (!TARGET_SSSE3) + stmt_cost *= 3; + } + else + /* neg + cmov. */ + stmt_cost = ix86_cost->add * 2; + } + break; + + case BIT_IOR_EXPR: case BIT_XOR_EXPR: case BIT_AND_EXPR: case BIT_NOT_EXPR: - if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) - stmt_cost = ix86_cost->sse_op; - else if (VECTOR_MODE_P (mode)) + gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode) + && !X87_FLOAT_MODE_P (mode)); + if (VECTOR_MODE_P (mode)) stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); else stmt_cost = ix86_cost->add; break; + default: + if (truth_value_p (subcode)) + { + if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + /* CMPccS? insructions are cheap, so use sse_op. While they + produce a mask which may need to be turned to 0/1 by and, + expect that this will be optimized away in a common case. */ + stmt_cost = ix86_cost->sse_op; + else if (X87_FLOAT_MODE_P (mode)) + /* fcmp + setcc. */ + stmt_cost = ix86_cost->fadd + ix86_cost->add; + else if (VECTOR_MODE_P (mode)) + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + else + /* setcc. */ + stmt_cost = ix86_cost->add; + break; + } break; } } @@ -25369,6 +25533,29 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, break; } + if (kind == vec_promote_demote + && fp && FLOAT_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) + { + int outer_size + = tree_to_uhwi + (TYPE_SIZE + (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)))); + int inner_size + = tree_to_uhwi + (TYPE_SIZE + (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))); + int stmt_cost = vec_fp_conversion_cost + (ix86_tune_cost, GET_MODE_BITSIZE (mode)); + /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will end + up doing two conversions and packing them. */ + if (inner_size > outer_size) + { + int n = inner_size / outer_size; + stmt_cost = stmt_cost * n + + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op); + } + } + /* If we do elementwise loads into a vector then we are bound by latency and execution resources for the many scalar loads (AGU and load ports). Try to account for this by scaling the diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 13da3d8..18aa42d 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -207,6 +207,12 @@ struct processor_costs { const int divsd; /* cost of DIVSD instructions. */ const int sqrtss; /* cost of SQRTSS instructions. */ const int sqrtsd; /* cost of SQRTSD instructions. */ + const int cvtss2sd; /* cost SSE FP conversions, + such as CVTSS2SD. */ + const int vcvtps2pd256; /* cost 256bit packed FP conversions, + such as VCVTPD2PS with larger reg in ymm. */ + const int vcvtps2pd512; /* cost 512bit packed FP conversions, + such as VCVTPD2PS with larger reg in zmm. */ const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp; /* Specify reassociation width for integer, fp, vector integer and vector fp @@ -2449,11 +2455,11 @@ constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_SKYLAKE | PTA_PKU | PTA_SHA | PTA_WBNOINVD | PTA_CLWB | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_ENQCMD | PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI - | PTA_AMX_FP16 | PTA_PREFETCHI | PTA_AMX_COMPLEX | PTA_AVX10_1 - | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16 | PTA_AVXVNNIINT8 - | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_AVX10_2 - | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32 | PTA_AMX_TRANSPOSE - | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR; + | PTA_AMX_FP16 | PTA_PREFETCHI | PTA_AMX_COMPLEX | PTA_AVX10_1_256 + | PTA_AVX10_1 | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16 + | PTA_AVXVNNIINT8 | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4 + | PTA_AVX10_2 | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32 + | PTA_AMX_TRANSPOSE | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR; constexpr wide_int_bitmask PTA_BDVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d6b2f29..e170da3 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -26592,8 +26592,8 @@ [(set (match_operand:X87MODEF 0 "register_operand") (if_then_else:X87MODEF (match_operand 1 "comparison_operator") - (match_operand:X87MODEF 2 "register_operand") - (match_operand:X87MODEF 3 "register_operand")))] + (match_operand:X87MODEF 2 "nonimm_or_0_or_1s_operand") + (match_operand:X87MODEF 3 "nonimm_or_0_operand")))] "(TARGET_80387 && TARGET_CMOVE) || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;") diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 3d3848c..4b23e18 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1267,6 +1267,14 @@ (match_operand 0 "vector_memory_operand") (match_code "const_vector"))) +; Return true when OP is register_operand, vector_memory_operand, +; const_vector zero or const_vector all ones. +(define_predicate "vector_or_0_or_1s_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "vector_memory_operand") + (match_operand 0 "const0_operand") + (match_operand 0 "int_float_vector_all_ones_operand"))) + (define_predicate "bcst_mem_operand" (and (match_code "vec_duplicate") (and (match_test "TARGET_AVX512F") @@ -1333,6 +1341,12 @@ (ior (match_operand 0 "nonimmediate_operand") (match_operand 0 "const0_operand"))) +; Return true when OP is a nonimmediate or zero or all ones. +(define_predicate "nonimm_or_0_or_1s_operand" + (ior (match_operand 0 "nonimmediate_operand") + (match_operand 0 "const0_operand") + (match_operand 0 "int_float_vector_all_ones_operand"))) + ;; Return true for RTX codes that force SImode address. (define_predicate "SImode_address_operand" (match_code "subreg,zero_extend,and")) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b280676..2ed348c 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -527,6 +527,16 @@ (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) +(define_mode_iterator V48_AVX512VL_4 + [(V4SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") + (V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")]) + +(define_mode_iterator VI48_AVX512VL_4 + [(V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")]) + +(define_mode_iterator V8_AVX512VL_2 + [(V2DF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) + (define_mode_iterator VFH_AVX10_2 [(V32HF "TARGET_AVX10_2") V16HF V8HF (V16SF "TARGET_AVX10_2") V8SF V4SF @@ -4410,7 +4420,7 @@ (unspec:<V48H_AVX512VL:avx512fmaskmode> [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand" "v") (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm") - (match_operand:SI 3 "const_0_to_7_operand" "n")] + (match_operand:SI 3 "<cmp_imm_predicate>" "n")] UNSPEC_PCMP)))] "TARGET_AVX512F && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW) @@ -4428,7 +4438,7 @@ (unspec:<V48H_AVX512VL:avx512fmaskmode> [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand") (match_operand:V48H_AVX512VL 2 "nonimmediate_operand") - (match_operand:SI 3 "const_0_to_7_operand")] + (match_operand:SI 3 "<cmp_imm_predicate>")] UNSPEC_PCMP))) (set (match_operand:<V48H_AVX512VL:avx512fmaskmode> 4 "register_operand") (unspec:<V48H_AVX512VL:avx512fmaskmode> @@ -4469,7 +4479,8 @@ (match_operand:V48H_AVX512VL 2 "nonimmediate_operand") (match_operand:SI 3 "<cmp_imm_predicate>" "n")] UNSPEC_PCMP)))] - "TARGET_AVX512F && ix86_pre_reload_split ()" + "TARGET_AVX512F && GET_MODE_NUNITS (<MODE>mode) >= 8 + && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) @@ -4480,6 +4491,70 @@ UNSPEC_PCMP))] "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);") +(define_insn "*<avx512>_cmp<mode>3_and15" + [(set (match_operand:QI 0 "register_operand" "=k") + (and:QI + (unspec:QI + [(match_operand:V48_AVX512VL_4 1 "nonimmediate_operand" "v") + (match_operand:V48_AVX512VL_4 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "<cmp_imm_predicate>" "n")] + UNSPEC_PCMP) + (const_int 15)))] + "TARGET_AVX512F" + "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "*<avx512>_ucmp<mode>3_and15" + [(set (match_operand:QI 0 "register_operand" "=k") + (and:QI + (unspec:QI + [(match_operand:VI48_AVX512VL_4 1 "nonimmediate_operand" "v") + (match_operand:VI48_AVX512VL_4 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "const_0_to_7_operand" "n")] + UNSPEC_UNSIGNED_PCMP) + (const_int 15)))] + "TARGET_AVX512F" + "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "*<avx512>_cmp<mode>3_and3" + [(set (match_operand:QI 0 "register_operand" "=k") + (and:QI + (unspec:QI + [(match_operand:V8_AVX512VL_2 1 "nonimmediate_operand" "v") + (match_operand:V8_AVX512VL_2 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "<cmp_imm_predicate>" "n")] + UNSPEC_PCMP) + (const_int 3)))] + "TARGET_AVX512F" + "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "*avx512vl_ucmpv2di3_and3" + [(set (match_operand:QI 0 "register_operand" "=k") + (and:QI + (unspec:QI + [(match_operand:V2DI 1 "nonimmediate_operand" "v") + (match_operand:V2DI 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "const_0_to_7_operand" "n")] + UNSPEC_UNSIGNED_PCMP) + (const_int 3)))] + "TARGET_AVX512F" + "vpcmpuq\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") (unspec:<avx512fmaskmode> @@ -4762,7 +4837,8 @@ (match_operand:VI48_AVX512VL 2 "nonimmediate_operand") (match_operand:SI 3 "const_0_to_7_operand")] UNSPEC_UNSIGNED_PCMP)))] - "TARGET_AVX512F && ix86_pre_reload_split ()" + "TARGET_AVX512F && ix86_pre_reload_split () + && GET_MODE_NUNITS (<MODE>mode) >= 8" "#" "&& 1" [(set (match_dup 0) @@ -5142,7 +5218,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VI_256_AVX2 0 "register_operand") (vec_merge:VI_256_AVX2 - (match_operand:VI_256_AVX2 1 "nonimmediate_operand") + (match_operand:VI_256_AVX2 1 "nonimm_or_0_or_1s_operand") (match_operand:VI_256_AVX2 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_AVX" @@ -5155,7 +5231,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VI_128 0 "register_operand") (vec_merge:VI_128 - (match_operand:VI_128 1 "vector_operand") + (match_operand:VI_128 1 "vector_or_0_or_1s_operand") (match_operand:VI_128 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_SSE2" @@ -5168,7 +5244,7 @@ (define_expand "vcond_mask_v1tiv1ti" [(set (match_operand:V1TI 0 "register_operand") (vec_merge:V1TI - (match_operand:V1TI 1 "vector_operand") + (match_operand:V1TI 1 "vector_or_0_or_1s_operand") (match_operand:V1TI 2 "nonimm_or_0_operand") (match_operand:V1TI 3 "register_operand")))] "TARGET_SSE2" @@ -5181,7 +5257,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VF_256 0 "register_operand") (vec_merge:VF_256 - (match_operand:VF_256 1 "nonimmediate_operand") + (match_operand:VF_256 1 "nonimm_or_0_or_1s_operand") (match_operand:VF_256 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_AVX" @@ -5194,7 +5270,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VF_128 0 "register_operand") (vec_merge:VF_128 - (match_operand:VF_128 1 "vector_operand") + (match_operand:VF_128 1 "vector_or_0_or_1s_operand") (match_operand:VF_128 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_SSE" diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 7c8cb73..cddcf61 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -121,16 +121,19 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ COSTS_N_BYTES (2), /* cost of FCHS instruction. */ COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ - COSTS_N_BYTES (2), /* cost of cheap SSE instruction. */ - COSTS_N_BYTES (2), /* cost of ADDSS/SD SUBSS/SD insns. */ - COSTS_N_BYTES (2), /* cost of MULSS instruction. */ - COSTS_N_BYTES (2), /* cost of MULSD instruction. */ - COSTS_N_BYTES (2), /* cost of FMA SS instruction. */ - COSTS_N_BYTES (2), /* cost of FMA SD instruction. */ - COSTS_N_BYTES (2), /* cost of DIVSS instruction. */ - COSTS_N_BYTES (2), /* cost of DIVSD instruction. */ - COSTS_N_BYTES (2), /* cost of SQRTSS instruction. */ - COSTS_N_BYTES (2), /* cost of SQRTSD instruction. */ + COSTS_N_BYTES (4), /* cost of cheap SSE instruction. */ + COSTS_N_BYTES (4), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_BYTES (4), /* cost of MULSS instruction. */ + COSTS_N_BYTES (4), /* cost of MULSD instruction. */ + COSTS_N_BYTES (4), /* cost of FMA SS instruction. */ + COSTS_N_BYTES (4), /* cost of FMA SD instruction. */ + COSTS_N_BYTES (4), /* cost of DIVSS instruction. */ + COSTS_N_BYTES (4), /* cost of DIVSD instruction. */ + COSTS_N_BYTES (4), /* cost of SQRTSS instruction. */ + COSTS_N_BYTES (4), /* cost of SQRTSD instruction. */ + COSTS_N_BYTES (4), /* cost of CVTSS2SD etc. */ + COSTS_N_BYTES (4), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_BYTES (6), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ ix86_size_memcpy, ix86_size_memset, @@ -243,6 +246,9 @@ struct processor_costs i386_cost = { /* 386 specific costs */ COSTS_N_INSNS (88), /* cost of DIVSD instruction. */ COSTS_N_INSNS (122), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (122), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (27), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (54), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (108), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i386_memcpy, i386_memset, @@ -356,6 +362,9 @@ struct processor_costs i486_cost = { /* 486 specific costs */ COSTS_N_INSNS (74), /* cost of DIVSD instruction. */ COSTS_N_INSNS (83), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (83), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i486_memcpy, i486_memset, @@ -467,6 +476,9 @@ struct processor_costs pentium_cost = { COSTS_N_INSNS (39), /* cost of DIVSD instruction. */ COSTS_N_INSNS (70), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (70), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -571,6 +583,9 @@ struct processor_costs lakemont_cost = { COSTS_N_INSNS (60), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (5), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (10), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (20), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -690,6 +705,9 @@ struct processor_costs pentiumpro_cost = { COSTS_N_INSNS (18), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentiumpro_memcpy, pentiumpro_memset, @@ -800,6 +818,9 @@ struct processor_costs geode_cost = { COSTS_N_INSNS (47), /* cost of DIVSD instruction. */ COSTS_N_INSNS (54), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (54), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ geode_memcpy, geode_memset, @@ -913,6 +934,9 @@ struct processor_costs k6_cost = { COSTS_N_INSNS (56), /* cost of DIVSD instruction. */ COSTS_N_INSNS (56), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (56), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (8), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k6_memcpy, k6_memset, @@ -1027,6 +1051,9 @@ struct processor_costs athlon_cost = { COSTS_N_INSNS (24), /* cost of DIVSD instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ athlon_memcpy, athlon_memset, @@ -1150,6 +1177,9 @@ struct processor_costs k8_cost = { COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k8_memcpy, k8_memset, @@ -1281,6 +1311,9 @@ struct processor_costs amdfam10_cost = { COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ amdfam10_memcpy, amdfam10_memset, @@ -1405,6 +1438,9 @@ const struct processor_costs bdver_cost = { COSTS_N_INSNS (27), /* cost of DIVSD instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver_memcpy, bdver_memset, @@ -1553,6 +1589,10 @@ struct processor_costs znver1_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + /* Real latency is 4, but for split regs multiply cost of half op by 2. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests @@ -1712,6 +1752,9 @@ struct processor_costs znver2_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -1847,6 +1890,9 @@ struct processor_costs znver3_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -1984,6 +2030,10 @@ struct processor_costs znver4_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + /* Real latency is 6, but for split regs multiply cost of half op by 2. */ + COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -2120,7 +2170,7 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ /* ADDSS has throughput 2 and latency 2 (in some cases when source is another addition). */ - COSTS_N_INSNS (2), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ /* MULSS has throughput 2 and latency 3. */ COSTS_N_INSNS (3), /* cost of MULSS instruction. */ COSTS_N_INSNS (3), /* cost of MULSD instruction. */ @@ -2135,6 +2185,9 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ /* DIVSD has throughtput 0.13 and latency 20. */ COSTS_N_INSNS (20), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */ /* Zen5 can execute: - integer ops: 6 per cycle, at most 3 multiplications. latency 1 for additions, 3 for multiplications (pipelined) @@ -2274,6 +2327,9 @@ struct processor_costs skylake_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (4), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ skylake_memcpy, skylake_memset, @@ -2403,6 +2459,9 @@ struct processor_costs icelake_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ icelake_memcpy, icelake_memset, @@ -2526,6 +2585,9 @@ struct processor_costs alderlake_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ alderlake_memcpy, alderlake_memset, @@ -2642,6 +2704,9 @@ const struct processor_costs btver1_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (48), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver1_memcpy, btver1_memset, @@ -2755,6 +2820,9 @@ const struct processor_costs btver2_cost = { COSTS_N_INSNS (19), /* cost of DIVSD instruction. */ COSTS_N_INSNS (16), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver2_memcpy, btver2_memset, @@ -2867,6 +2935,9 @@ struct processor_costs pentium4_cost = { COSTS_N_INSNS (38), /* cost of DIVSD instruction. */ COSTS_N_INSNS (23), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (38), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium4_memcpy, pentium4_memset, @@ -2982,6 +3053,9 @@ struct processor_costs nocona_cost = { COSTS_N_INSNS (40), /* cost of DIVSD instruction. */ COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (41), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ nocona_memcpy, nocona_memset, @@ -3095,6 +3169,9 @@ struct processor_costs atom_cost = { COSTS_N_INSNS (60), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */ 2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ atom_memcpy, atom_memset, @@ -3208,6 +3285,9 @@ struct processor_costs slm_cost = { COSTS_N_INSNS (69), /* cost of DIVSD instruction. */ COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ slm_memcpy, slm_memset, @@ -3335,6 +3415,9 @@ struct processor_costs tremont_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ tremont_memcpy, tremont_memset, @@ -3448,6 +3531,9 @@ struct processor_costs intel_cost = { COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ intel_memcpy, intel_memset, @@ -3566,6 +3652,9 @@ struct processor_costs lujiazui_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (60), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ lujiazui_memcpy, lujiazui_memset, @@ -3682,6 +3771,9 @@ struct processor_costs yongfeng_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ yongfeng_memcpy, yongfeng_memset, @@ -3798,6 +3890,9 @@ struct processor_costs shijidadao_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (11), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ shijidadao_memcpy, shijidadao_memset, @@ -3922,6 +4017,9 @@ struct processor_costs generic_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ generic_memcpy, generic_memset, @@ -4051,6 +4149,9 @@ struct processor_costs core_cost = { COSTS_N_INSNS (32), /* cost of DIVSD instruction. */ COSTS_N_INSNS (30), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (58), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ core_memcpy, core_memset, diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc index 685a83c..15d3d91 100644 --- a/gcc/config/i386/x86-tune-sched.cc +++ b/gcc/config/i386/x86-tune-sched.cc @@ -81,6 +81,14 @@ ix86_issue_rate (void) case PROCESSOR_YONGFENG: case PROCESSOR_SHIJIDADAO: case PROCESSOR_GENERIC: + /* For znver5 decoder can handle 4 or 8 instructions per cycle, + op cache 12 instruction/cycle, dispatch 8 instructions + integer rename 8 instructions and Fp 6 instructions. + + The scheduler, without understanding out of order nature of the CPU + is not going to be able to use more than 4 instructions since that + is limits of the decoders. */ + case PROCESSOR_ZNVER5: return 4; case PROCESSOR_ICELAKE_CLIENT: @@ -91,13 +99,6 @@ ix86_issue_rate (void) return 5; case PROCESSOR_SAPPHIRERAPIDS: - /* For znver5 decoder can handle 4 or 8 instructions per cycle, - op cache 12 instruction/cycle, dispatch 8 instructions - integer rename 8 instructions and Fp 6 instructions. - - The scheduler, without understanding out of order nature of the CPU - is unlikely going to be able to fill all of these. */ - case PROCESSOR_ZNVER5: return 6; default: diff --git a/gcc/config/loongarch/genopts/gen-evolution.awk b/gcc/config/loongarch/genopts/gen-evolution.awk index 142b658..507063b 100644 --- a/gcc/config/loongarch/genopts/gen-evolution.awk +++ b/gcc/config/loongarch/genopts/gen-evolution.awk @@ -101,10 +101,18 @@ function gen_cpucfg_useful_idx() idx_list[j++] = i+0 delete idx_bucket - asort (idx_list) + for (i = 1; i < j; i++) { + t = i + for (k = i + 1; k < j; k++) + t = idx_list[k] < idx_list[t] ? k : t + + k = idx_list[t] + idx_list[t] = idx_list[i] + idx_list[i] = k + } print "static constexpr int cpucfg_useful_idx[] = {" - for (i in idx_list) + for (i = 1; i < j; i++) printf(" %d,\n", idx_list[i]) print "};" diff --git a/gcc/config/loongarch/genopts/genstr.sh b/gcc/config/loongarch/genopts/genstr.sh index 16c2edd..97517da 100755 --- a/gcc/config/loongarch/genopts/genstr.sh +++ b/gcc/config/loongarch/genopts/genstr.sh @@ -51,18 +51,18 @@ along with GCC; see the file COPYING3. If not see #define LOONGARCH_STR_H EOF - sed -e '/^$/n' -e 's@#.*$@@' -e '/^$/d' \ - -e 's@^\([^ \t]\+\)[ \t]*\([^ \t]*\)@#define \1 "\2"@' \ - loongarch-strings + awk '/^#.*$/ { next } /^$/ { print; next } + { printf ("#define %s \"%s\"\n", $1, $2) }' \ + loongarch-strings echo - # Generate the strings from isa-evolution.in. - awk '{ - a=$3 - gsub(/-/, "_", a) - print("#define OPTSTR_"toupper(a)"\t\""$3"\"") - }' isa-evolution.in + # Generate the strings from isa-evolution.in. + awk '{ + a=$3 + gsub(/-/, "_", a) + print("#define OPTSTR_"toupper(a)"\t\""$3"\"") + }' isa-evolution.in echo echo "#endif /* LOONGARCH_STR_H */" @@ -73,18 +73,8 @@ EOF # according to the key-value pairs defined in loongarch-strings. gen_options() { - - sed -e '/^$/n' -e 's@#.*$@@' -e '/^$/d' \ - -e 's@^\([^ \t]\+\)[ \t]*\([^ \t]*\)@\1="\2"@' \ - loongarch-strings | { \ - - # read the definitions - while read -r line; do - eval "$line" - done - - # print a header - cat << EOF + # print a header + cat << EOF ; Generated by "genstr" from the template "loongarch.opt.in" ; and definitions from "loongarch-strings" and "isa-evolution.in". ; @@ -95,12 +85,25 @@ gen_options() { ; EOF - # make the substitutions - sed -e 's@"@\\"@g' -e 's/@@\([^@]\+\)@@/${\1}/g' loongarch.opt.in | \ - while read -r line; do - eval "echo \"$line\"" - done - } + # Generate loongarch.opt. + awk 'BEGIN { + delete strtab + while (getline < "loongarch-strings" > 0) { + if ($0 ~ /^#.*$/ || $0 ~ /^$/) continue + strtab[$1] = $2 + } + } + { + n = split($0, tmp, "@@") + for (i = 2; i <= n; i += 2) + tmp[i] = strtab[tmp[i]] + + for (i = 1; i <= n; i++) + printf("%s", tmp[i]) + printf ("\n") + + }' loongarch.opt.in + # Generate the strings from isa-evolution.in. awk '{ diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc index 24a28dc..0d3d026 100644 --- a/gcc/config/mips/mips.cc +++ b/gcc/config/mips/mips.cc @@ -20678,6 +20678,9 @@ mips_option_override (void) "-mcompact-branches=never"); } + if (is_micromips && TARGET_MSA) + error ("unsupported combination: %s", "-mmicromips -mmsa"); + /* Require explicit relocs for MIPS R6 onwards. This enables simplification of the compact branch and jump support through the backend. */ if (!TARGET_EXPLICIT_RELOCS && mips_isa_rev >= 6) diff --git a/gcc/config/nvptx/mkoffload.cc b/gcc/config/nvptx/mkoffload.cc index bdfe7f5..e7ec0ef 100644 --- a/gcc/config/nvptx/mkoffload.cc +++ b/gcc/config/nvptx/mkoffload.cc @@ -778,6 +778,9 @@ main (int argc, char **argv) } if (fopenmp) obstack_ptr_grow (&argv_obstack, "-mgomp"); + /* The host code may contain exception handling constructs. + Handle these as good as we can. */ + obstack_ptr_grow (&argv_obstack, "-mfake-exceptions"); for (int ix = 1; ix != argc; ix++) { diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index de0ce5d..f893971 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -2359,7 +2359,25 @@ nvptx_assemble_integer (rtx x, unsigned int size, int ARG_UNUSED (aligned_p)) { gcc_checking_assert (!init_frag.active); /* Just use the default machinery; it's not getting used, anyway. */ - return default_assemble_integer (x, size, aligned_p); + bool ok = default_assemble_integer (x, size, aligned_p); + /* ..., but a few cases need special handling. */ + switch (GET_CODE (x)) + { + case SYMBOL_REF: + /* The default machinery won't work: we don't define the necessary + operations; don't use them outside of this. */ + gcc_checking_assert (!ok); + { + /* Just emit something; it's not getting used, anyway. */ + const char *op = "\t.symbol_ref\t"; + ok = (assemble_integer_with_op (op, x), true); + } + break; + + default: + break; + } + return ok; } gcc_checking_assert (init_frag.active); @@ -7766,9 +7784,23 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name, return; } +#ifdef ACCEL_COMPILER emit_ptx_alias: +#endif cgraph_node *cnode = cgraph_node::get (name); +#ifdef ACCEL_COMPILER + /* For nvptx offloading, make sure to emit C++ constructor, destructor aliases [PR97106] + + For some reason (yet to be analyzed), they're not 'cnode->referred_to_p ()'. + (..., or that's not the right approach at all; + <https://inbox.sourceware.org/87v7rx8lbx.fsf@euler.schwinge.ddns.net> + "Re: [committed][nvptx] Use .alias directive for mptx >= 6.3"). */ + if (DECL_CXX_CONSTRUCTOR_P (name) + || DECL_CXX_DESTRUCTOR_P (name)) + ; + else +#endif if (!cnode->referred_to_p ()) /* Prevent "Internal error: reference to deleted section". */ return; @@ -7873,8 +7905,6 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name, #define TARGET_ASM_DECLARE_CONSTANT_NAME nvptx_asm_declare_constant_name #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true -#undef TARGET_ASM_NEED_VAR_DECL_BEFORE_USE -#define TARGET_ASM_NEED_VAR_DECL_BEFORE_USE true #undef TARGET_MACHINE_DEPENDENT_REORG #define TARGET_MACHINE_DEPENDENT_REORG nvptx_reorg diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 3201247..7c3bd69 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -1644,7 +1644,9 @@ [(const_int 0)] "" { - sorry ("exception handling not supported"); + if (!fake_exceptions) + sorry ("exception handling not supported"); + DONE; }) (define_expand "nonlocal_goto" diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt index 02d36b3..ce9fbc7 100644 --- a/gcc/config/nvptx/nvptx.opt +++ b/gcc/config/nvptx/nvptx.opt @@ -168,17 +168,25 @@ Target Var(nvptx_alias) Init(0) Undocumented mexperimental Target Var(nvptx_experimental) Init(0) Undocumented +mfake-exceptions +Target Var(fake_exceptions) Init(0) Undocumented +; With '-mfake-exceptions' enabled, the user-visible behavior in presence of +; exception handling constructs changes such that the compile-time +; 'sorry, unimplemented: exception handling not supported' is skipped, code +; generation proceeds, and instead, exception handling constructs 'abort' at +; run time. (..., or don't, if they're in dead code.) + mfake-ptx-alloca Target Var(nvptx_fake_ptx_alloca) Init(0) Undocumented ; With '-mfake-ptx-alloca' enabled, the user-visible behavior changes only ; for configurations where PTX 'alloca' is not available. Rather than a ; compile-time 'sorry, unimplemented: dynamic stack allocation not supported' -; in presence of dynamic stack allocation, compilation and assembly then -; succeeds. However, attempting to link in such '*.o' files then fails due -; to unresolved symbol '__GCC_nvptx__PTX_alloca_not_supported'. +; in presence of dynamic stack allocation, with '-mfake-ptx-alloca' enabled, +; compilation, assembly, and linking succeeds, as does execution, in case that +; 'alloca' is not attempted (if only used in error code paths, for example), +; and a run-time failure only in case that 'alloca' is actually attempted. ; ; This is meant to be used in scenarios where large volumes of code are ; compiled, a small fraction of which runs into dynamic stack allocation, but ; these parts are not important for specific use cases, and we'd thus like the -; build to succeed, and error out just upon actual, very rare use of the -; offending '*.o' files. +; build to succeed, and error out just upon actual, very rare use of 'alloca'. diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 5ed5e18..d0919ec 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -908,6 +908,24 @@ "bext\t%0,%1,%2" [(set_attr "type" "bitmanip")]) +;; We do not define SHIFT_COUNT_TRUNCATED, so we have to have variants +;; that mask/extend the count if we want to eliminate those ops +;; +;; We could (in theory) use GPR for the various modes, but I haven't +;; seen those cases appear in practice. Without a testcase I've +;; elected to keep the modes X which is easy to reason about. +(define_insn "*bext<mode>_mask_pos" + [(set (match_operand:X 0 "register_operand" "=r") + (zero_extract:X (match_operand:X 1 "register_operand" "r") + (const_int 1) + (and:X + (match_operand:X 2 "register_operand" "r") + (match_operand 3 "const_int_operand"))))] + "(TARGET_ZBS + && INTVAL (operands[3]) + 1 == GET_MODE_BITSIZE (<MODE>mode))" + "bext\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + ;; This is a bext followed by a seqz. Normally this would be a 3->2 split ;; But the and-not pattern with a constant operand is a define_insn_and_split, ;; so this looks like a 2->2 split, which combine rejects. So implement it @@ -1245,3 +1263,41 @@ expand_crc_using_clmul (<SUBX:MODE>mode, <SUBX1:MODE>mode, operands); DONE; }) + +;; If we have an XOR/IOR with a constant operand (C) and the we can +;; synthesize ~C more efficiently than C, then synthesize ~C and use +;; xnor/orn instead. +;; +;; The same can be done for AND, but mvconst_internal's issues get in +;; the way. That's future work. +(define_split + [(set (match_operand:X 0 "register_operand") + (any_or:X (match_operand:X 1 "register_operand") + (match_operand:X 2 "const_int_operand"))) + (clobber (match_operand:X 3 "register_operand"))] + "TARGET_ZBB + && (riscv_const_insns (operands[2], true) + > riscv_const_insns (GEN_INT (~INTVAL (operands[2])), true))" + [(const_int 0)] +{ + /* Get the inverted constant into the temporary register. */ + riscv_emit_move (operands[3], GEN_INT (~INTVAL (operands[2]))); + + /* For xnor, the NOT operation is in a different position. So + we have to customize the split code we generate a bit. + + It is expected that AND will be handled like IOR in the future. */ + if (<CODE> == XOR) + { + rtx x = gen_rtx_XOR (<X:MODE>mode, operands[1], operands[3]); + x = gen_rtx_NOT (<X:MODE>mode, x); + emit_insn (gen_rtx_SET (operands[0], x)); + } + else + { + rtx x = gen_rtx_NOT (<X:MODE>mode, operands[3]); + x = gen_rtx_IOR (<X:MODE>mode, x, operands[1]); + emit_insn (gen_rtx_SET (operands[0], x)); + } + DONE; +}) diff --git a/gcc/config/riscv/freebsd.h b/gcc/config/riscv/freebsd.h index 2dc7055..217e0ac 100644 --- a/gcc/config/riscv/freebsd.h +++ b/gcc/config/riscv/freebsd.h @@ -42,7 +42,7 @@ along with GCC; see the file COPYING3. If not see #define LINK_SPEC " \ -melf" XLEN_SPEC DEFAULT_ENDIAN_SPEC "riscv \ %{p:%nconsider using `-pg' instead of `-p' with gprof (1)} \ - " FBSD_LINK_PG_NOTES " \ + " FBSD_LINK_PG_NOTE " \ %{v:-V} \ %{assert*} %{R*} %{rpath*} %{defsym*} \ -X \ diff --git a/gcc/config/riscv/gnu.h b/gcc/config/riscv/gnu.h new file mode 100644 index 0000000..047399b --- /dev/null +++ b/gcc/config/riscv/gnu.h @@ -0,0 +1,59 @@ +/* Definitions for RISC-V GNU/Hurd systems with ELF format. + Copyright (C) 1998-2025 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + GNU_USER_TARGET_OS_CPP_BUILTINS(); \ + } while (0) + +#define GNU_USER_DYNAMIC_LINKER "/lib/ld-riscv" XLEN_SPEC "-" ABI_SPEC ".so.1" + +#define ICACHE_FLUSH_FUNC "__riscv_flush_icache" + +#define CPP_SPEC "%{pthread:-D_REENTRANT}" + +#define LD_EMUL_SUFFIX \ + "%{mabi=lp64d:}" \ + "%{mabi=lp64f:_lp64f}" \ + "%{mabi=lp64:_lp64}" \ + "%{mabi=ilp32d:}" \ + "%{mabi=ilp32f:_ilp32f}" \ + "%{mabi=ilp32:_ilp32}" + +#define LINK_SPEC "\ +-melf" XLEN_SPEC DEFAULT_ENDIAN_SPEC "riscv" LD_EMUL_SUFFIX " \ +%{mno-relax:--no-relax} \ +-X \ +%{mbig-endian:-EB} \ +%{mlittle-endian:-EL} \ +%{shared} \ + %{!shared: \ + %{!static: \ + %{!static-pie: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} \ + %{static:-static} %{static-pie:-static -pie --no-dynamic-linker -z text}}" + +#define STARTFILE_PREFIX_SPEC \ + "/lib" XLEN_SPEC "/" ABI_SPEC "/ " \ + "/usr/lib" XLEN_SPEC "/" ABI_SPEC "/ " \ + "/lib/ " \ + "/usr/lib/ " + +#define RISCV_USE_CUSTOMISED_MULTI_LIB select_by_abi diff --git a/gcc/config/riscv/multilib-generator b/gcc/config/riscv/multilib-generator index 4828016..6ad1cf0 100755 --- a/gcc/config/riscv/multilib-generator +++ b/gcc/config/riscv/multilib-generator @@ -159,8 +159,8 @@ for cmodel in cmodels: "e.g. rv32imafd-ilp32--" % cfg) sys.exit(1) - # Compact code model only support rv64. - if cmodel == "compact" and arch.startswith("rv32"): + # Large code model only support rv64. + if cmodel == "large" and arch.startswith("rv32"): continue arch = arch_canonicalize (arch, args.misa_spec) diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def index 2918496..e31afc3 100644 --- a/gcc/config/riscv/riscv-cores.def +++ b/gcc/config/riscv/riscv-cores.def @@ -41,6 +41,12 @@ RISCV_TUNE("sifive-p400-series", sifive_p400, sifive_p400_tune_info) RISCV_TUNE("sifive-p600-series", sifive_p600, sifive_p600_tune_info) RISCV_TUNE("tt-ascalon-d8", generic_ooo, tt_ascalon_d8_tune_info) RISCV_TUNE("thead-c906", generic, thead_c906_tune_info) +RISCV_TUNE("xt-c908", generic, generic_ooo_tune_info) +RISCV_TUNE("xt-c908v", generic, generic_ooo_tune_info) +RISCV_TUNE("xt-c910", generic, generic_ooo_tune_info) +RISCV_TUNE("xt-c910v2", generic, generic_ooo_tune_info) +RISCV_TUNE("xt-c920", generic, generic_ooo_tune_info) +RISCV_TUNE("xt-c920v2", generic, generic_ooo_tune_info) RISCV_TUNE("xiangshan-nanhu", xiangshan, xiangshan_nanhu_tune_info) RISCV_TUNE("generic-ooo", generic_ooo, generic_ooo_tune_info) RISCV_TUNE("size", generic, optimize_size_tune_info) @@ -93,6 +99,48 @@ RISCV_CORE("thead-c906", "rv64imafdc_xtheadba_xtheadbb_xtheadbs_xtheadcmo_" "xtheadmemidx_xtheadmempair_xtheadsync", "thead-c906") +RISCV_CORE("xt-c908", "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicsr_" + "zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_" + "sstc_svinval_svnapot_svpbmt_xtheadba_xtheadbb_" + "xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_" + "xtheadmac_xtheadmemidx_xtheadmempair_xtheadsync", + "xt-c908") +RISCV_CORE("xt-c908v", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicsr_" + "zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_" + "zvfh_sstc_svinval_svnapot_svpbmt__xtheadba_" + "xtheadbb_xtheadbs_xtheadcmo_xtheadcondmov_" + "xtheadfmemidx_xtheadmac_xtheadmemidx_" + "xtheadmempair_xtheadsync_xtheadvdot", + "xt-c908") +RISCV_CORE("xt-c910", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_" + "xtheadba_xtheadbb_xtheadbs_xtheadcmo_" + "xtheadcondmov_xtheadfmemidx_xtheadmac_" + "xtheadmemidx_xtheadmempair_xtheadsync", + "xt-c910") +RISCV_CORE("xt-c910v2", "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicond_" + "zicsr_zifencei _zihintntl_zihintpause_zihpm_" + "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_" + "zbs_sscofpmf_sstc_svinval_svnapot_svpbmt_" + "xtheadba_xtheadbb_xtheadbs_xtheadcmo_" + "xtheadcondmov_xtheadfmemidx_xtheadmac_" + "xtheadmemidx_xtheadmempair_xtheadsync", + "xt-c910v2") +RISCV_CORE("xt-c920", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_" + "xtheadba_xtheadbb_xtheadbs_xtheadcmo_" + "xtheadcondmov_xtheadfmemidx_xtheadmac_" + "xtheadmemidx_xtheadmempair_xtheadsync_" + "xtheadvector", + "xt-c910") +RISCV_CORE("xt-c920v2", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_" + "zicsr_zifencei _zihintntl_zihintpause_zihpm_" + "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_" + "zbs_zvfbfmin_zvfbfwma_zvfh_sscofpmf_sstc_" + "svinval_svnapot_svpbmt_xtheadba_xtheadbb_" + "xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_" + "xtheadmac_xtheadmemidx_xtheadmempair_" + "xtheadsync_xtheadvdot", + "xt-c920v2") + RISCV_CORE("tt-ascalon-d8", "rv64imafdcv_zic64b_zicbom_zicbop_zicboz_" "ziccamoa_ziccif_zicclsm_ziccrse_zicond_zicsr_" "zifencei_zihintntl_zihintpause_zimop_za64rs_" diff --git a/gcc/config/riscv/riscv-target-attr.cc b/gcc/config/riscv/riscv-target-attr.cc index 1d96865..8ad3025 100644 --- a/gcc/config/riscv/riscv-target-attr.cc +++ b/gcc/config/riscv/riscv-target-attr.cc @@ -257,11 +257,7 @@ riscv_target_attr_parser::update_settings (struct gcc_options *opts) const { std::string local_arch = m_subset_list->to_string (true); const char* local_arch_str = local_arch.c_str (); - struct cl_target_option *default_opts - = TREE_TARGET_OPTION (target_option_default_node); - if (opts->x_riscv_arch_string != default_opts->x_riscv_arch_string) - free (CONST_CAST (void *, (const void *) opts->x_riscv_arch_string)); - opts->x_riscv_arch_string = xstrdup (local_arch_str); + opts->x_riscv_arch_string = ggc_strdup (local_arch_str); riscv_set_arch_by_subset_list (m_subset_list, opts); } diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index d2fe849..61dcdab 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -4724,7 +4724,11 @@ bool verify_type_context (location_t loc, type_context_kind context, const_tree type, bool silent_p) { - if (!sizeless_type_p (type)) + const_tree tmp = type; + if (omp_type_context (context) && POINTER_TYPE_P (type)) + tmp = strip_pointer_types (tmp); + + if (!sizeless_type_p (tmp)) return true; switch (context) @@ -4796,6 +4800,34 @@ verify_type_context (location_t loc, type_context_kind context, const_tree type, error_at (loc, "capture by copy of RVV type %qT", type); return false; + + case TCTX_OMP_MAP: + if (!silent_p) + error_at (loc, "RVV type %qT not allowed in %<map%> clause", type); + return false; + + case TCTX_OMP_MAP_IMP_REF: + if (!silent_p) + error ("cannot reference %qT object types in %<target%> region", type); + return false; + + case TCTX_OMP_PRIVATE: + if (!silent_p) + error_at (loc, "RVV type %qT not allowed in" + " %<target%> %<private%> clause", type); + return false; + + case TCTX_OMP_FIRSTPRIVATE: + if (!silent_p) + error_at (loc, "RVV type %qT not allowed in" + " %<target%> %<firstprivate%> clause", type); + return false; + + case TCTX_OMP_DEVICE_ADDR: + if (!silent_p) + error_at (loc, "RVV type %qT not allowed in" + " %<target%> device clauses", type); + return false; } gcc_unreachable (); diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 0ac2538..a8c9256 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -685,7 +685,7 @@ invalid_opt_bb_p (basic_block cfg_bb) /* We only do LCM optimizations on blocks that are post dominated by EXIT block, that is, we don't do LCM optimizations on infinite loop. */ FOR_EACH_EDGE (e, ei, cfg_bb->succs) - if (e->flags & EDGE_FAKE) + if ((e->flags & EDGE_FAKE) || (e->flags & EDGE_ABNORMAL)) return true; return false; @@ -2698,6 +2698,7 @@ pre_vsetvl::compute_lcm_local_properties () m_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun)); + bitmap_vector_clear (m_kill, last_basic_block_for_fn (cfun)); bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun)); bitmap_vector_ones (m_transp, last_basic_block_for_fn (cfun)); @@ -2749,6 +2750,10 @@ pre_vsetvl::compute_lcm_local_properties () if (invalid_opt_bb_p (bb->cfg_bb ())) { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\n --- skipping bb %u due to weird edge", + bb->index ()); + bitmap_clear (m_antloc[bb_index]); bitmap_clear (m_transp[bb_index]); } @@ -3022,6 +3027,18 @@ pre_vsetvl::earliest_fuse_vsetvl_info (int iter) continue; } + /* We cannot lift a vsetvl into the source block if the block is + not transparent WRT to it. + This is too restrictive for blocks where a register's use only + feeds into vsetvls and no regular insns. One example is the + test rvv/vsetvl/avl_single-68.c which is currently XFAILed for + that reason. + In order to support this case we'd need to check the vsetvl's + AVL operand's uses in the source block and make sure they are + only used in other vsetvls. */ + if (!bitmap_bit_p (m_transp[eg->src->index], expr_index)) + continue; + if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 38f3ae7..bad59e2 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -10382,7 +10382,7 @@ riscv_file_end () fprintf (asm_out_file, "1:\n"); /* pr_type. */ - fprintf (asm_out_file, "\t.p2align\t3\n"); + fprintf (asm_out_file, "\t.p2align\t%u\n", p2align); fprintf (asm_out_file, "2:\n"); fprintf (asm_out_file, "\t.long\t0xc0000000\n"); /* pr_datasz. */ @@ -13136,9 +13136,6 @@ parse_features_for_version (tree decl, DECL_SOURCE_LOCATION (decl)); gcc_assert (parse_res); - if (arch_string != default_opts->x_riscv_arch_string) - free (CONST_CAST (void *, (const void *) arch_string)); - cl_target_option_restore (&global_options, &global_options_set, &cur_target); } diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 2bcabd0..2759a4c 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -888,7 +888,7 @@ extern enum riscv_cc get_riscv_cc (const rtx use); #define ASM_OUTPUT_OPCODE(STREAM, PTR) \ (PTR) = riscv_asm_output_opcode(STREAM, PTR) -#define JUMP_TABLES_IN_TEXT_SECTION 0 +#define JUMP_TABLES_IN_TEXT_SECTION (riscv_cmodel == CM_LARGE) #define CASE_VECTOR_MODE SImode #define CASE_VECTOR_PC_RELATIVE (riscv_cmodel != CM_MEDLOW) diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 26a247c..eec9687 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -789,7 +789,7 @@ rtx t5 = gen_reg_rtx (DImode); rtx t6 = gen_reg_rtx (DImode); - riscv_emit_binary (PLUS, operands[0], operands[1], operands[2]); + emit_insn (gen_addsi3_extended (t6, operands[1], operands[2])); if (GET_CODE (operands[1]) != CONST_INT) emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); else @@ -799,7 +799,10 @@ else t5 = operands[2]; emit_insn (gen_adddi3 (t3, t4, t5)); - emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); + rtx t7 = gen_lowpart (SImode, t6); + SUBREG_PROMOTED_VAR_P (t7) = 1; + SUBREG_PROMOTED_SET (t7, SRP_SIGNED); + emit_move_insn (operands[0], t7); riscv_expand_conditional_branch (operands[3], NE, t6, t3); } @@ -835,8 +838,11 @@ emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); else t3 = operands[1]; - riscv_emit_binary (PLUS, operands[0], operands[1], operands[2]); - emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0)); + emit_insn (gen_addsi3_extended (t4, operands[1], operands[2])); + rtx t5 = gen_lowpart (SImode, t4); + SUBREG_PROMOTED_VAR_P (t5) = 1; + SUBREG_PROMOTED_SET (t5, SRP_SIGNED); + emit_move_insn (operands[0], t5); riscv_expand_conditional_branch (operands[3], LTU, t4, t3); } @@ -966,7 +972,7 @@ rtx t5 = gen_reg_rtx (DImode); rtx t6 = gen_reg_rtx (DImode); - riscv_emit_binary (MINUS, operands[0], operands[1], operands[2]); + emit_insn (gen_subsi3_extended (t6, operands[1], operands[2])); if (GET_CODE (operands[1]) != CONST_INT) emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); else @@ -976,7 +982,10 @@ else t5 = operands[2]; emit_insn (gen_subdi3 (t3, t4, t5)); - emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); + rtx t7 = gen_lowpart (SImode, t6); + SUBREG_PROMOTED_VAR_P (t7) = 1; + SUBREG_PROMOTED_SET (t7, SRP_SIGNED); + emit_move_insn (operands[0], t7); riscv_expand_conditional_branch (operands[3], NE, t6, t3); } @@ -1015,8 +1024,11 @@ emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); else t3 = operands[1]; - riscv_emit_binary (MINUS, operands[0], operands[1], operands[2]); - emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0)); + emit_insn (gen_subsi3_extended (t4, operands[1], operands[2])); + rtx t5 = gen_lowpart (SImode, t4); + SUBREG_PROMOTED_VAR_P (t5) = 1; + SUBREG_PROMOTED_SET (t5, SRP_SIGNED); + emit_move_insn (operands[0], t5); riscv_expand_conditional_branch (operands[3], LTU, t3, t4); } diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 8ee43cf..3ab4d76 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -2136,18 +2136,34 @@ (match_operand 7 "const_int_operand") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (vec_duplicate:V_VLS - (match_operand:<VEL> 3 "direct_broadcast_operand")) + ;; (vec_duplicate:V_VLS ;; wrapper activated by wrap_vec_dup below. + (match_operand:<VEL> 3 "direct_broadcast_operand") ;; ) (match_operand:V_VLS 2 "vector_merge_operand")))] "TARGET_VECTOR" { /* Transform vmv.v.x/vfmv.v.f (avl = 1) into vmv.s.x since vmv.s.x/vfmv.s.f has better chances to do vsetvl fusion in vsetvl pass. */ + bool wrap_vec_dup = true; + rtx vec_cst = NULL_RTX; if (riscv_vector::splat_to_scalar_move_p (operands)) { operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode); operands[3] = force_reg (<VEL>mode, operands[3]); } + else if (immediate_operand (operands[3], <VEL>mode) + && (vec_cst = gen_const_vec_duplicate (<MODE>mode, operands[3])) + && (/* -> pred_broadcast<mode>_zero */ + (vector_least_significant_set_mask_operand (operands[1], + <VM>mode) + && vector_const_0_operand (vec_cst, <MODE>mode)) + || (/* pred_broadcast<mode>_imm */ + vector_all_trues_mask_operand (operands[1], <VM>mode) + && vector_const_int_or_double_0_operand (vec_cst, + <MODE>mode)))) + { + operands[3] = vec_cst; + wrap_vec_dup = false; + } /* Handle vmv.s.x instruction (Wb1 mask) which has memory scalar. */ else if (satisfies_constraint_Wdm (operands[3])) { @@ -2191,6 +2207,8 @@ ; else operands[3] = force_reg (<VEL>mode, operands[3]); + if (wrap_vec_dup) + operands[3] = gen_rtx_VEC_DUPLICATE (<MODE>mode, operands[3]); }) (define_insn_and_split "*pred_broadcast<mode>" @@ -3939,7 +3957,7 @@ (any_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand" " vr, vr")) (match_operand:VWEXTI 2 "vector_merge_operand" " vu, 0")))] - "TARGET_VECTOR" + "TARGET_VECTOR && !TARGET_XTHEADVECTOR" "v<sz>ext.vf2\t%0,%3%p1" [(set_attr "type" "vext") (set_attr "mode" "<MODE>")]) @@ -3959,7 +3977,7 @@ (any_extend:VQEXTI (match_operand:<V_QUAD_TRUNC> 3 "register_operand" " vr, vr")) (match_operand:VQEXTI 2 "vector_merge_operand" " vu, 0")))] - "TARGET_VECTOR" + "TARGET_VECTOR && !TARGET_XTHEADVECTOR" "v<sz>ext.vf4\t%0,%3%p1" [(set_attr "type" "vext") (set_attr "mode" "<MODE>")]) @@ -3979,7 +3997,7 @@ (any_extend:VOEXTI (match_operand:<V_OCT_TRUNC> 3 "register_operand" " vr, vr")) (match_operand:VOEXTI 2 "vector_merge_operand" " vu, 0")))] - "TARGET_VECTOR" + "TARGET_VECTOR && !TARGET_XTHEADVECTOR" "v<sz>ext.vf8\t%0,%3%p1" [(set_attr "type" "vext") (set_attr "mode" "<MODE>")]) diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 737c3d6..12dbde2 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -25765,10 +25765,13 @@ rs6000_can_inline_p (tree caller, tree callee) } } - /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining - purposes. */ - callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION); - explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION); + /* Ignore -mpower8-fusion, -mpower10-fusion and -msave-toc-indirect options + for inlining purposes. */ + HOST_WIDE_INT ignored_isas = (OPTION_MASK_P8_FUSION + | OPTION_MASK_P10_FUSION + | OPTION_MASK_SAVE_TOC_INDIRECT); + callee_isa &= ~ignored_isas; + explicit_isa &= ~ignored_isas; /* The callee's options must be a subset of the caller's options, i.e. a vsx function may inline an altivec function, but a no-vsx function diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md index edb2c96..a3d966e 100644 --- a/gcc/config/rx/rx.md +++ b/gcc/config/rx/rx.md @@ -2541,10 +2541,17 @@ (unspec_volatile:SI [(match_operand:BLK 1 "memory_operand") ;; String1 (match_operand:BLK 2 "memory_operand")] ;; String2 UNSPEC_CMPSTRN)) - (use (match_operand:SI 3 "register_operand")) ;; Max Length + (use (match_operand:SI 3 "nonmemory_operand")) ;; Max Length (match_operand:SI 4 "immediate_operand")] ;; Known Align "rx_allow_string_insns" { + bool const_len = CONST_INT_P (operands[3]); + if (const_len && operands[3] == CONST0_RTX (SImode)) + { + emit_move_insn (operands[0], CONST0_RTX (SImode)); + DONE; + } + rtx str1 = gen_rtx_REG (SImode, 1); rtx str2 = gen_rtx_REG (SImode, 2); rtx len = gen_rtx_REG (SImode, 3); @@ -2553,6 +2560,13 @@ emit_move_insn (str2, force_operand (XEXP (operands[2], 0), NULL_RTX)); emit_move_insn (len, operands[3]); + /* Set flags in case len is zero */ + if (!const_len) + { + emit_insn (gen_setpsw (GEN_INT ('C'))); + emit_insn (gen_setpsw (GEN_INT ('Z'))); + } + emit_insn (gen_rx_cmpstrn (operands[0], operands[1], operands[2])); DONE; } @@ -2590,9 +2604,7 @@ (clobber (reg:SI 3)) (clobber (reg:CC CC_REG))] "rx_allow_string_insns" - "setpsw z ; Set flags in case len is zero - setpsw c - scmpu ; Perform the string comparison + "scmpu ; Perform the string comparison mov #-1, %0 ; Set up -1 result (which cannot be created ; by the SC insn) bnc ?+ ; If Carry is not set skip over diff --git a/gcc/config/s390/9175.md b/gcc/config/s390/9175.md new file mode 100644 index 0000000..d0ac0e1 --- /dev/null +++ b/gcc/config/s390/9175.md @@ -0,0 +1,316 @@ +;; Scheduling description for z17. +;; Copyright (C) 2025 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3, or (at your option) any later +;; version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_attr "z17_unit_fpd" "" + (cond [(eq_attr "mnemonic" "ddb,ddbr,deb,debr,dxbr,sqdb,sqdbr,sqeb,sqebr,\ +sqxbr,vdf,vdg,vdlf,vdlg,vdlq,vdq,vfddb,vfdsb,vfsqdb,vfsqsb,vrf,vrg,vrlf,vrlg,\ +vrlq,vrq,wfddb,wfdsb,wfdxb,wfsqdb,wfsqxb") + (const_int 1)] (const_int 0))) + +(define_attr "z17_unit_fxa" "" + (cond [(eq_attr "mnemonic" "a,afi,ag,agf,agfi,agfr,agh,aghi,aghik,agr,agrk,ah,\ +ahi,ahik,ahy,al,alc,alcg,alcgr,alcr,alfi,alg,algf,algfi,algfr,alghsik,algr,\ +algrk,alhsik,alr,alrk,aly,ar,ark,ay,bdepg,bextg,clzg,ctzg,etnd,flogr,ic,icm,\ +icmh,icmy,icy,iihf,iilf,ipm,la,larl,lay,lb,lbr,lcgfr,lcgr,lcr,lgb,lgbr,lgf,\ +lgfi,lgfr,lgfrl,lgh,lghi,lghr,lghrl,lgr,lh,lhi,lhr,lhrl,lhy,llcr,llgcr,llgfr,\ +llghr,llgtr,llhr,llihf,llihh,llihl,llilf,llilh,llill,llxab,llxaf,llxag,llxah,\ +llxaq,lngfr,lngr,lnr,loc,locg,locghi,locgr,lochi,locr,lpgfr,lpgr,lpr,lr,lrv,\ +lrvg,lrvgr,lrvh,lrvr,lt,ltg,ltgf,ltgfr,ltgr,ltr,lxab,lxaf,lxag,lxah,lxaq,m,mfy,\ +mg,mgh,mghi,mgrk,mh,mhi,mhy,ml,mlg,mlgr,mlr,mr,ms,msc,msfi,msg,msgc,msgf,msgfi,\ +msgfr,msgr,msgrkc,msr,msrkc,msy,n,ncgrk,ncrk,ng,ngr,ngrk,nihf,nihh,nihl,nilf,\ +nilh,nill,nngrk,nnrk,nogrk,nork,nr,nrk,nxgrk,nxrk,ny,o,ocgrk,ocrk,og,ogr,ogrk,\ +oihf,oihh,oihl,oilf,oilh,oill,or,ork,oy,pfpo,popcnt,risbg,risbgn,rll,rllg,\ +rnsbg,rosbg,rxsbg,s,selgr,selr,sg,sgf,sgfr,sgh,sgr,sgrk,sh,shy,sl,slb,slbg,\ +slbgr,slbr,sldl,slfi,slg,slgf,slgfi,slgfr,slgr,slgrk,sll,sllg,sllk,slr,slrk,\ +sly,sr,sra,srag,srak,srda,srdl,srk,srl,srlg,srlk,sy,x,xg,xgr,xgrk,xihf,xilf,xr,\ +xrk,xy") + (const_int 1)] (const_int 0))) + +(define_attr "z17_unit_fxb" "" + (cond [(eq_attr "mnemonic" "agsi,algsi,alsi,asi,b,bc,bcr,bi,br,c,cfi,cg,cgf,\ +cgfi,cgfr,cgfrl,cgh,cghi,cghrl,cghsi,cgit,cgr,cgrl,cgrt,ch,chi,chrl,chsi,chy,\ +cit,cl,clfhsi,clfi,clfit,clg,clgf,clgfi,clgfr,clgfrl,clghrl,clghsi,clgit,clgr,\ +clgrl,clgrt,clgt,clhhsi,clhrl,cli,cliy,clm,clmy,clr,clrl,clrt,clt,cly,cr,crl,\ +crt,cy,laa,laag,lan,lang,lao,laog,lat,lax,laxg,lcdfr,ldgr,ldr,lgat,lgdr,lndfr,\ +lpdfr,lxr,lzdr,lzer,lzxr,mvghi,mvhhi,mvhi,mvi,mviy,ni,niy,nop,nopr,ntstg,oi,\ +oiy,ppa,st,stc,stcy,std,stdy,ste,stey,stg,stgrl,sth,sthrl,sthy,stoc,stocg,strl,\ +strv,strvg,strvh,sty,tend,tm,tmh,tmhh,tmhl,tml,tmlh,tmll,tmy,vlgvb,vlgvf,vlgvg,\ +vlgvh,vlr,vlvgb,vlvgf,vlvgg,vlvgh,vlvgp,vscef,vsceg,vst,vstbrf,vstbrg,vstbrh,\ +vstbrq,vstebrf,vstebrg,vstef,vsteg,vsterf,vsterg,vsterh,vstl,vstrl,vstrlr,xi,\ +xiy") + (const_int 1)] (const_int 0))) + +(define_attr "z17_unit_fxd" "" + (cond [(eq_attr "mnemonic" "dlgr,dlr,dr,dsgfr,dsgr") + (const_int 1)] (const_int 0))) + +(define_attr "z17_unit_lsu" "" + (cond [(eq_attr "mnemonic" "clc,ear,l,lam,lcbb,ld,lde,ldy,lg,lgrl,llc,llgc,\ +llgf,llgfrl,llgh,llghrl,llgt,llh,llhrl,lm,lmg,lmy,lpq,lrl,ly,mvcrl,sar,sfpc,\ +tabort,vl,vlbb,vlbrf,vlbrg,vlbrh,vlbrq,vlbrrepf,vlbrrepg,vlbrreph,vlerf,vlerg,\ +vlerh,vll,vllebrzf,vllebrzg,vllebrzh,vllezb,vllezf,vllezg,vllezh,vllezlf,\ +vlrepb,vlrepf,vlrepg,vlreph,vlrl,vlrlr") + (const_int 1)] (const_int 0))) + +(define_attr "z17_unit_vfu" "" + (cond [(eq_attr "mnemonic" "adb,adbr,adtr,aeb,aebr,axbr,axtr,brcl,cdb,cdbr,\ +cdtr,ceb,cebr,cpsdr,cxbr,cxtr,ddtr,dxtr,fidbr,fidbra,fidtr,fiebr,fiebra,fixbr,\ +fixbra,fixtr,j,jg,kdb,kdbr,kdtr,keb,kebr,kxbr,kxtr,lcdbr,lcebr,lcxbr,ldeb,\ +ldebr,ldetr,le,ledbr,ledtr,ler,ley,lndbr,lnebr,lnxbr,lpdbr,lpebr,lpxbr,ltdbr,\ +ltdtr,ltebr,ltxbr,ltxtr,lxdb,lxdbr,lxdtr,lxeb,lxebr,madb,madbr,maeb,maebr,mdb,\ +mdbr,mdtr,meeb,meebr,msdb,msdbr,mseb,msebr,mxbr,mxtr,sdb,sdbr,sdtr,seb,sebr,\ +sxbr,sxtr,tcdb,tceb,tcxb,tdcdt,tdcet,tdcxt,vab,vaccb,vacccq,vaccf,vaccg,vacch,\ +vaccq,vacq,vaf,vag,vah,vaq,vavgb,vavgf,vavgg,vavgh,vavglb,vavglf,vavglg,vavglh,\ +vavglq,vavgq,vblendb,vblendf,vblendg,vblendh,vblendq,vbperm,vcdgb,vcdlgb,vcefb,\ +vcelfb,vceqb,vceqbs,vceqf,vceqfs,vceqg,vceqgs,vceqh,vceqhs,vceqq,vceqqs,vcfeb,\ +vcfn,vcgdb,vchb,vchbs,vchf,vchfs,vchg,vchgs,vchh,vchhs,vchlb,vchlbs,vchlf,\ +vchlfs,vchlg,vchlgs,vchlh,vchlhs,vchlq,vchlqs,vchq,vchqs,vcksm,vclfeb,vclfnh,\ +vclfnl,vclgdb,vclzb,vclzf,vclzg,vclzh,vclzq,vcnf,vcrnf,vctzb,vctzf,vctzg,vctzh,\ +vctzq,verimb,verimf,verimg,verimh,verllb,verllf,verllg,verllh,verllvb,verllvf,\ +verllvg,verllvh,veslb,veslf,veslg,veslh,veslvb,veslvf,veslvg,veslvh,vesrab,\ +vesraf,vesrag,vesrah,vesravb,vesravf,vesravg,vesravh,vesrlb,vesrlf,vesrlg,\ +vesrlh,vesrlvb,vesrlvf,vesrlvg,vesrlvh,veval,vfadb,vfasb,vfcedb,vfcedbs,vfcesb,\ +vfcesbs,vfchdb,vfchdbs,vfchedb,vfchedbs,vfchesb,vfchesbs,vfchsb,vfchsbs,vfeeb,\ +vfeef,vfeeh,vfeezbs,vfeezfs,vfeezhs,vfeneb,vfenef,vfeneh,vfenezb,vfenezf,\ +vfenezh,vfidb,vfisb,vfkedb,vfkesb,vfkhdb,vfkhedb,vfkhesb,vfkhsb,vflcdb,vflcsb,\ +vflndb,vflnsb,vflpdb,vflpsb,vfmadb,vfmasb,vfmaxdb,vfmaxsb,vfmdb,vfmindb,\ +vfminsb,vfmsb,vfmsdb,vfmssb,vfnmadb,vfnmasb,vfnmsdb,vfnmssb,vfsdb,vfssb,\ +vftcidb,vftcisb,vgbm,vgemb,vgemf,vgemg,vgemh,vgemq,vgfmab,vgfmaf,vgfmag,vgfmah,\ +vgfmb,vgfmf,vgfmg,vgfmh,vgm,vistrb,vistrbs,vistrf,vistrfs,vistrh,vistrhs,vlcb,\ +vlcf,vlcg,vlch,vldeb,vleb,vlebrf,vlebrg,vlebrh,vledb,vlef,vleg,vleh,vleib,\ +vleif,vleig,vleih,vlpb,vlpf,vlpg,vlph,vlpq,vmaeb,vmaef,vmaeg,vmaeh,vmahb,vmahf,\ +vmahg,vmahh,vmahq,vmalb,vmaleb,vmalef,vmaleg,vmaleh,vmalf,vmalg,vmalhb,vmalhf,\ +vmalhg,vmalhh,vmalhq,vmalhw,vmalob,vmalof,vmalog,vmaloh,vmalq,vmaob,vmaof,\ +vmaog,vmaoh,vmeb,vmef,vmeg,vmeh,vmhb,vmhf,vmhg,vmhh,vmhq,vmlb,vmleb,vmlef,\ +vmleg,vmleh,vmlf,vmlg,vmlhb,vmlhf,vmlhg,vmlhh,vmlhq,vmlhw,vmlob,vmlof,vmlog,\ +vmloh,vmlq,vmnb,vmnf,vmng,vmnh,vmnlb,vmnlf,vmnlg,vmnlh,vmnlq,vmnq,vmob,vmof,\ +vmog,vmoh,vmrhb,vmrhf,vmrhg,vmrhh,vmrlb,vmrlf,vmrlg,vmrlh,vmslg,vmxb,vmxf,vmxg,\ +vmxh,vmxlb,vmxlf,vmxlg,vmxlh,vmxlq,vmxq,vn,vnc,vnn,vno,vnot,vnx,vo,voc,vone,\ +vpdi,vperm,vpkf,vpkg,vpkh,vpklsf,vpklsfs,vpklsg,vpklsgs,vpklsh,vpklshs,vpksf,\ +vpksfs,vpksg,vpksgs,vpksh,vpkshs,vpopct,vpopctb,vpopctf,vpopctg,vpopcth,vrepb,\ +vrepf,vrepg,vreph,vrepi,vrepib,vrepif,vrepig,vrepih,vsb,vsbcbiq,vsbiq,vscbib,\ +vscbif,vscbig,vscbih,vscbiq,vsegb,vsegf,vsegh,vsel,vsf,vsg,vsh,vsl,vslb,vsld,\ +vsldb,vsq,vsra,vsrab,vsrd,vsrl,vsrlb,vsumb,vsumgf,vsumgh,vsumh,vsumqf,vsumqg,\ +vtm,vuphb,vuphf,vuphg,vuphh,vuplb,vuplf,vuplg,vuplhb,vuplhf,vuplhg,vuplhh,\ +vuplhw,vupllb,vupllf,vupllg,vupllh,vx,vzero,wcdgb,wcdlgb,wcefb,wcelfb,wcfeb,\ +wcgdb,wclfeb,wclgdb,wfadb,wfasb,wfaxb,wfcdb,wfcedb,wfcesb,wfcexb,wfcexbs,\ +wfchdb,wfchedb,wfchesb,wfchexb,wfchexbs,wfchsb,wfchxb,wfchxbs,wfcsb,wfcxb,\ +wfidb,wfisb,wfixb,wfkdb,wfkedb,wfkesb,wfkexb,wfkhdb,wfkhedb,wfkhesb,wfkhexb,\ +wfkhsb,wfkhxb,wfksb,wfkxb,wflcdb,wflcsb,wflcxb,wflld,wflndb,wflnsb,wflnxb,\ +wflpdb,wflpsb,wflpxb,wflrx,wfmadb,wfmasb,wfmaxb,wfmaxxb,wfmdb,wfminxb,wfmsb,\ +wfmsdb,wfmssb,wfmsxb,wfmxb,wfnmaxb,wfnmsxb,wfsdb,wfssb,wfsxb,wftcixb,wldeb,\ +wledb") + (const_int 1)] (const_int 0))) + +(define_attr "z17_cracked" "" + (cond [(eq_attr "mnemonic" "bas,basr,bras,brasl,cdfbr,cdftr,cdgbr,cdgtr,\ +cdlfbr,cdlftr,cdlgbr,cdlgtr,cefbr,cegbr,celfbr,celgbr,cfdbr,cfebr,cfxbr,cgdbr,\ +cgdtr,cgebr,cgxbr,cgxtr,chhsi,clfdbr,clfdtr,clfebr,clfxbr,clfxtr,clgdbr,clgdtr,\ +clgebr,clgxbr,clgxtr,cs,csg,csy,efpc,ex,exrl,lcgfr,lngfr,lpgfr,lpq,lxr,lzxr,\ +mvc,nc,oc,rnsbg,rosbg,rxsbg,stpq,vgef,vgeg,vscef,vsceg,vsteb,vstebrh,vsteh,xc") + (const_int 1)] (const_int 0))) + +(define_attr "z17_expanded" "" + (cond [(eq_attr "mnemonic" "cds,cdsg,cdsy,cxfbr,cxftr,cxgbr,cxgtr,cxlfbr,\ +cxlftr,cxlgbr,cxlgtr,d,dl,dlg,dsg,dsgf,lam,lm,lmg,lmy,sldl,srda,srdl,stam,stm,\ +stmg,stmy,tbegin,tbeginc") + (const_int 1)] (const_int 0))) + +(define_attr "z17_groupalone" "" + (cond [(eq_attr "mnemonic" "alc,alcg,alcgr,alcr,axbr,axtr,clc,cxbr,cxfbr,\ +cxftr,cxgbr,cxgtr,cxlfbr,cxlftr,cxlgbr,cxlgtr,cxtr,d,dl,dlg,dlgr,dlr,dr,dsg,\ +dsgf,dsgfr,dsgr,dxbr,dxtr,ex,exrl,fixbr,fixbra,fixtr,flogr,kxbr,kxtr,lcxbr,\ +lnxbr,lpxbr,ltxbr,ltxtr,lxdb,lxdbr,lxdtr,lxeb,lxebr,m,madb,maeb,maebr,mfy,mg,\ +mgrk,ml,mlg,mlgr,mlr,mr,msdb,mseb,msebr,mvc,mvcrl,mxbr,mxtr,nc,oc,ppa,sfpc,slb,\ +slbg,slbgr,slbr,sqxbr,sxbr,sxtr,tabort,tbegin,tbeginc,tcxb,tdcxt,tend,xc") + (const_int 1)] (const_int 0))) + +(define_attr "z17_endgroup" "" + (cond [(eq_attr "mnemonic" "bas,basr,bcr,br,bras,brasl,cdsg,clfebr,cs,csg,csy,\ +efpc,ex,exrl,ipm,lam,lpq,lxr,nopr,sldl,srda,srdl,stam,stm,stmg,stmy,tbegin,\ +tbeginc") + (const_int 1)] (const_int 0))) + +(define_attr "z17_groupoftwo" "" + (cond [(eq_attr "mnemonic" "cdfbr,cdftr,cdgbr,cdgtr,cdlfbr,cdlftr,cdlgbr,\ +cdlgtr,cefbr,cegbr,celfbr,celgbr,cfdbr,cfebr,cfxbr,cgdbr,cgdtr,cgebr,cgxbr,\ +cgxtr,chhsi,clfdbr,clfdtr,clfxbr,clfxtr,clgdbr,clgdtr,clgebr,clgxbr,clgxtr,\ +lcgfr,lngfr,lpgfr,lzxr,vacccq,vacq,vblendb,vblendf,vblendg,vblendh,vblendq,\ +veval,vfmadb,vfmasb,vfmsdb,vfmssb,vfnmadb,vfnmasb,vfnmsdb,vfnmssb,vgef,vgeg,\ +vgfmab,vgfmaf,vgfmag,vgfmah,vmaeb,vmaef,vmaeg,vmaeh,vmahb,vmahf,vmahg,vmahh,\ +vmahq,vmalb,vmaleb,vmalef,vmaleg,vmaleh,vmalf,vmalg,vmalhb,vmalhf,vmalhg,\ +vmalhh,vmalhq,vmalhw,vmalob,vmalof,vmalog,vmaloh,vmalq,vmaob,vmaof,vmaog,vmaoh,\ +vmslg,vperm,vsbcbiq,vsbiq,vscef,vsceg,vsel,vsteb,vstebrh,vsteh,wfmadb,wfmasb,\ +wfmaxb,wfmsdb,wfmssb,wfmsxb,wfnmaxb,wfnmsxb") + (const_int 1)] (const_int 0))) + +(define_insn_reservation "z17_0" 0 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "a,afi,ag,agfi,aghi,aghik,agr,agrk,ahi,ahik,al,alfi,alg,\ +algf,algfi,algfr,alghsik,algr,algrk,alhsik,alr,alrk,aly,ar,ark,ay,b,bc,bcr,bi,\ +br,brcl,c,cfi,cg,cgfi,cghi,cghsi,cgit,cgr,cgrl,cgrt,chi,chsi,cit,cl,clfhsi,\ +clfi,clfit,clg,clgf,clgfi,clgfr,clgfrl,clghrl,clghsi,clgit,clgr,clgrl,clgrt,\ +clgt,clhhsi,clhrl,cli,cliy,clr,clrl,clrt,clt,cly,cr,crl,crt,cy,etnd,ic,icm,\ +icmh,icmy,icy,iihf,iilf,j,jg,la,larl,lat,lay,lb,lbr,lcdfr,lcgr,lcr,ldgr,ldr,\ +lgat,lgb,lgbr,lgf,lgfi,lgfr,lgfrl,lgh,lghi,lghr,lghrl,lgr,lh,lhi,lhr,lhrl,lhy,\ +llcr,llgcr,llgfr,llghr,llgtr,llhr,llihf,llihh,llihl,llilf,llilh,llill,lndfr,\ +lngr,lnr,lpdfr,lpgr,lpr,lr,lrv,lrvg,lrvgr,lrvh,lrvr,lt,ltg,ltgf,ltgfr,ltgr,ltr,\ +lzdr,lzer,n,ncgrk,ncrk,ng,ngr,ngrk,nihf,nihh,nihl,nilf,nilh,nill,nngrk,nnrk,\ +nogrk,nop,nopr,nork,nr,nrk,nxgrk,nxrk,ny,o,ocgrk,ocrk,og,ogr,ogrk,oihf,oihh,\ +oihl,oilf,oilh,oill,or,ork,oy,pfpo,risbg,risbgn,rll,rllg,s,sg,sgr,sgrk,sl,sldl,\ +slfi,slg,slgf,slgfi,slgfr,slgr,slgrk,sll,sllg,sllk,slr,slrk,sly,sr,sra,srag,\ +srak,srda,srdl,srk,srl,srlg,srlk,sy,tm,tmh,tmhh,tmhl,tml,tmlh,tmll,tmy,vlr,\ +vlvgb,vlvgf,vlvgg,vlvgh,x,xg,xgr,xgrk,xihf,xilf,xr,xrk,xy")) "nothing") + +(define_insn_reservation "z17_1" 1 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "agf,agfr,agh,agsi,ah,ahy,algsi,alsi,asi,cgf,cgfr,cgfrl,\ +cgh,cghrl,ch,chrl,chy,clm,clmy,cpsdr,laa,laag,lan,lang,lao,laog,lax,laxg,le,\ +ler,ley,llxab,llxaf,llxag,llxah,llxaq,loc,locg,locghi,locgr,lochi,locr,lxab,\ +lxaf,lxag,lxah,lxaq,mvghi,mvhhi,mvhi,mvi,mviy,ni,niy,ntstg,oi,oiy,selgr,selr,\ +sgf,sgfr,sgh,sh,shy,st,stc,stcy,stg,stgrl,sth,sthrl,sthy,stoc,stocg,strl,strv,\ +strvg,strvh,sty,vab,vaccb,vacccq,vaccf,vaccg,vacch,vaccq,vacq,vaf,vag,vah,vaq,\ +vavgb,vavgf,vavgg,vavgh,vavglb,vavglf,vavglg,vavglh,vavglq,vavgq,vblendb,\ +vblendf,vblendg,vblendh,vblendq,vbperm,vceqb,vceqbs,vceqf,vceqfs,vceqg,vceqgs,\ +vceqh,vceqhs,vceqq,vceqqs,vcfn,vchb,vchbs,vchf,vchfs,vchg,vchgs,vchh,vchhs,\ +vchlb,vchlbs,vchlf,vchlfs,vchlg,vchlgs,vchlh,vchlhs,vchlq,vchlqs,vchq,vchqs,\ +vclfnh,vclfnl,vclzb,vclzf,vclzg,vclzh,vclzq,vcnf,vcrnf,vctzb,vctzf,vctzg,vctzh,\ +vctzq,verimb,verimf,verimg,verimh,verllb,verllf,verllg,verllh,verllvb,verllvf,\ +verllvg,verllvh,veslb,veslf,veslg,veslh,veslvb,veslvf,veslvg,veslvh,vesrab,\ +vesraf,vesrag,vesrah,vesravb,vesravf,vesravg,vesravh,vesrlb,vesrlf,vesrlg,\ +vesrlh,vesrlvb,vesrlvf,vesrlvg,vesrlvh,veval,vfcedb,vfcedbs,vfcesb,vfcesbs,\ +vfchdb,vfchdbs,vfchedb,vfchedbs,vfchesb,vfchesbs,vfchsb,vfchsbs,vfkedb,vfkesb,\ +vfkhdb,vfkhedb,vfkhesb,vfkhsb,vflcdb,vflcsb,vflndb,vflnsb,vflpdb,vflpsb,\ +vfmaxdb,vfmaxsb,vfmindb,vfminsb,vgbm,vgemb,vgemf,vgemg,vgemh,vgemq,vgm,vlcb,\ +vlcf,vlcg,vlch,vleb,vlebrf,vlebrg,vlebrh,vlef,vleg,vleh,vleib,vleif,vleig,\ +vleih,vlpb,vlpf,vlpg,vlph,vlpq,vmnb,vmnf,vmng,vmnh,vmnlb,vmnlf,vmnlg,vmnlh,\ +vmnlq,vmnq,vmrhb,vmrhf,vmrhg,vmrhh,vmrlb,vmrlf,vmrlg,vmrlh,vmxb,vmxf,vmxg,vmxh,\ +vmxlb,vmxlf,vmxlg,vmxlh,vmxlq,vmxq,vn,vnc,vnn,vno,vnot,vnx,vo,voc,vone,vpdi,\ +vperm,vpkf,vpkg,vpkh,vpklsf,vpklsfs,vpklsg,vpklsgs,vpklsh,vpklshs,vpksf,vpksfs,\ +vpksg,vpksgs,vpksh,vpkshs,vpopct,vpopctb,vpopctf,vpopctg,vpopcth,vrepb,vrepf,\ +vrepg,vreph,vrepi,vrepib,vrepif,vrepig,vrepih,vsb,vsbcbiq,vsbiq,vscbib,vscbif,\ +vscbig,vscbih,vscbiq,vsegb,vsegf,vsegh,vsel,vsf,vsg,vsh,vsl,vslb,vsld,vsldb,\ +vsq,vsra,vsrab,vsrd,vsrl,vsrlb,vuphb,vuphf,vuphg,vuphh,vuplb,vuplf,vuplg,\ +vuplhb,vuplhf,vuplhg,vuplhh,vuplhw,vupllb,vupllf,vupllg,vupllh,vx,vzero,wfcedb,\ +wfcesb,wfcexb,wfcexbs,wfchdb,wfchedb,wfchesb,wfchexb,wfchexbs,wfchsb,wfchxb,\ +wfchxbs,wfkedb,wfkesb,wfkexb,wfkhdb,wfkhedb,wfkhesb,wfkhexb,wfkhsb,wfkhxb,\ +wflcdb,wflcsb,wflcxb,wflndb,wflnsb,wflnxb,wflpdb,wflpsb,wflpxb,wfmaxxb,wfminxb,\ +xi,xiy")) "nothing") + +(define_insn_reservation "z17_2" 2 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "cdb,cdbr,ceb,cebr,clzg,ctzg,ear,ipm,kdb,kdbr,keb,kebr,l,\ +lcbb,lcdbr,lcebr,ld,lde,ldy,lg,lgdr,lgrl,llc,llgc,llgf,llgfrl,llgh,llghrl,llgt,\ +llh,llhrl,lm,lmg,lmy,lndbr,lnebr,lpdbr,lpebr,lrl,ltdbr,ltebr,ly,popcnt,sar,\ +tcdb,tceb,vfeeb,vfeef,vfeeh,vfeezbs,vfeezfs,vfeezhs,vfeneb,vfenef,vfeneh,\ +vfenezb,vfenezf,vfenezh,vftcidb,vftcisb,vistrb,vistrbs,vistrf,vistrfs,vistrh,\ +vistrhs,vlbrrepf,vlbrrepg,vlbrreph,vlgvb,vlgvf,vlgvg,vlgvh,vllebrzf,vllebrzg,\ +vllebrzh,vllezb,vllezf,vllezg,vllezh,vllezlf,vlrepb,vlrepf,vlrepg,vlreph,vlrl,\ +vlvgp,wfcdb,wfcsb,wfcxb,wfkdb,wfksb,wfkxb,wftcixb")) "nothing") + +(define_insn_reservation "z17_3" 3 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "bdepg,bextg,cds,cdsy,mgh,mghi,mh,mhi,mhy,ms,msc,msfi,msg,\ +msgc,msgf,msgfi,msgfr,msgr,msgrkc,msr,msrkc,msy,std,stdy,ste,stey,vcksm,vgfmab,\ +vgfmaf,vgfmag,vgfmah,vgfmb,vgfmf,vgfmg,vgfmh,vl,vlbb,vlbrf,vlbrg,vlbrh,vlbrq,\ +vlerf,vlerg,vlerh,vll,vlrlr,vmaeb,vmaef,vmaeg,vmaeh,vmahb,vmahf,vmahg,vmahh,\ +vmahq,vmalb,vmaleb,vmalef,vmaleg,vmaleh,vmalf,vmalg,vmalhb,vmalhf,vmalhg,\ +vmalhh,vmalhq,vmalhw,vmalob,vmalof,vmalog,vmaloh,vmalq,vmaob,vmaof,vmaog,vmaoh,\ +vmeb,vmef,vmeg,vmeh,vmhb,vmhf,vmhg,vmhh,vmhq,vmlb,vmleb,vmlef,vmleg,vmleh,vmlf,\ +vmlg,vmlhb,vmlhf,vmlhg,vmlhh,vmlhq,vmlhw,vmlob,vmlof,vmlog,vmloh,vmlq,vmob,\ +vmof,vmog,vmoh,vsumb,vsumgf,vsumgh,vsumh,vsumqf,vsumqg,vtm")) "nothing") + +(define_insn_reservation "z17_4" 4 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "bas,basr,bras,brasl,chhsi,clc,ex,exrl,lam,lcgfr,lngfr,\ +lpgfr,lxr,lzxr,mvcrl,ppa,rnsbg,rosbg,rxsbg,tabort,tend,vst,vstbrf,vstbrg,\ +vstbrh,vstbrq,vstebrf,vstebrg,vstef,vsteg,vsterf,vsterg,vsterh,vstl,vstrl,\ +vstrlr")) "nothing") + +(define_insn_reservation "z17_5" 5 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "adb,adbr,aeb,aebr,alc,alcg,alcgr,alcr,cs,csg,csy,fidbr,\ +fidbra,fiebr,fiebra,ldeb,ldebr,ledbr,madbr,mdb,mdbr,meeb,meebr,msdbr,sdb,sdbr,\ +seb,sebr,slb,slbg,slbgr,slbr,stm,stmg,stmy,vcdgb,vcdlgb,vcefb,vcelfb,vcfeb,\ +vcgdb,vclfeb,vclgdb,vldeb,vledb,vmslg,wcdgb,wcdlgb,wcefb,wcelfb,wcfeb,wcgdb,\ +wclfeb,wclgdb,wflld,wflrx,wldeb,wledb")) "nothing") + +(define_insn_reservation "z17_6" 6 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "sfpc")) "nothing") + +(define_insn_reservation "z17_7" 7 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "adtr,cdtr,fidtr,kdtr,ldetr,ltdtr,sdtr,tdcdt,tdcet,vfadb,\ +vfasb,vfidb,vfisb,vfsdb,vfssb,vgef,vgeg,wfadb,wfasb,wfaxb,wfidb,wfisb,wfixb,\ +wfsdb,wfssb,wfsxb")) "nothing") + +(define_insn_reservation "z17_8" 8 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "cdgtr,cdlgtr,cdsg,cxgtr,cxlgtr,flogr,lpq,m,mfy,mg,mgrk,\ +ml,mlg,mlgr,mlr,mr,stpq,vsteb,vstebrh,vsteh")) "nothing") + +(define_insn_reservation "z17_9" 9 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "cdfbr,cdgbr,cdlfbr,cdlgbr,cefbr,cegbr,celfbr,celgbr,madb,\ +maeb,maebr,msdb,mseb,msebr,stam")) "nothing") + +(define_insn_reservation "z17_10" 10 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "cgdtr,cgxtr,clfdtr,clfxtr,clgdtr,clgxtr,d,dl,dlg,dsg,\ +dsgf,efpc,lxdb,lxdbr,lxeb,lxebr,vscef,vsceg")) "nothing") + +(define_insn_reservation "z17_11" 11 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "cfdbr,cfebr,cgdbr,cgebr,clfdbr,clfebr,clgdbr,clgebr")) "nothing") + +(define_insn_reservation "z17_12" 12 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "cxbr,cxtr,kxbr,kxtr,tbegin,tbeginc,tcxb,tdcxt")) "nothing") + +(define_insn_reservation "z17_13" 13 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "axbr,axtr,cxfbr,cxgbr,cxlfbr,cxlgbr,fixbr,fixbra,fixtr,\ +lcxbr,lnxbr,lpxbr,ltxbr,ltxtr,lxdtr,sxbr,sxtr")) "nothing") + +(define_insn_reservation "z17_14" 14 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "cfxbr,cgxbr,clfxbr,clgxbr,ledtr")) "nothing") + +(define_insn_reservation "z17_15" 15 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "nc,oc")) "nothing") + +(define_insn_reservation "z17_16" 16 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "cdftr,cdlftr,cxftr,cxlftr")) "nothing") + +(define_insn_reservation "z17_18" 18 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "xc")) "nothing") + +(define_insn_reservation "z17_20" 20 + (and (eq_attr "cpu" "z17") + (eq_attr "mnemonic" "ddb,ddbr,ddtr,deb,debr,dlgr,dlr,dr,dsgfr,dsgr,dxbr,dxtr,\ +mdtr,mvc,mxbr,mxtr,sqdb,sqdbr,sqeb,sqebr,sqxbr,vdf,vdg,vdlf,vdlg,vdlq,vdq,\ +vfddb,vfdsb,vfmadb,vfmasb,vfmdb,vfmsb,vfmsdb,vfmssb,vfnmadb,vfnmasb,vfnmsdb,\ +vfnmssb,vfsqdb,vfsqsb,vrf,vrg,vrlf,vrlg,vrlq,vrq,wfddb,wfdsb,wfdxb,wfmadb,\ +wfmasb,wfmaxb,wfmdb,wfmsb,wfmsdb,wfmssb,wfmsxb,wfmxb,wfnmaxb,wfnmsxb,wfsqdb,\ +wfsqxb")) "nothing") + diff --git a/gcc/config/s390/driver-native.cc b/gcc/config/s390/driver-native.cc index 49e8fa0..7a7ceea 100644 --- a/gcc/config/s390/driver-native.cc +++ b/gcc/config/s390/driver-native.cc @@ -127,6 +127,10 @@ s390_host_detect_local_cpu (int argc, const char **argv) case 0x3932: cpu = "arch14"; break; + case 0x9175: + case 0x9176: + cpu = "arch15"; + break; default: cpu = "arch15"; break; diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def index d9af9b1..cee2326 100644 --- a/gcc/config/s390/s390-builtins.def +++ b/gcc/config/s390/s390-builtins.def @@ -300,8 +300,8 @@ #define B_VXE2 (1 << 4) /* Builtins requiring the z15 vector extensions. */ #define B_DEP (1 << 5) /* Builtin has been deprecated and a warning should be issued. */ #define B_NNPA (1 << 6) /* Builtins requiring the NNPA Facility. */ -#define B_VXE3 (1 << 7) /* Builtins requiring the arch15 vector extensions. */ -#define B_ARCH15 (1 << 8) /* Builtins requiring arch15. */ +#define B_VXE3 (1 << 7) /* Builtins requiring the z17 vector extensions. */ +#define B_Z17 (1 << 8) /* Builtins requiring z17. */ /* B_DEF defines a standard (not overloaded) builtin B_DEF (<builtin name>, <RTL expander name>, <function attributes>, <builtin flags>, <operand flags, see above>, <fntype>) @@ -3318,8 +3318,8 @@ B_DEF (s390_vcnf, vcnf_v8hi, 0, /* arch 15 builtins */ -B_DEF (s390_bdepg, bdepg, 0, B_ARCH15, 0, BT_FN_ULONG_ULONG_ULONG) -B_DEF (s390_bextg, bextg, 0, B_ARCH15, 0, BT_FN_ULONG_ULONG_ULONG) +B_DEF (s390_bdepg, bdepg, 0, B_Z17, 0, BT_FN_ULONG_ULONG_ULONG) +B_DEF (s390_bextg, bextg, 0, B_Z17, 0, BT_FN_ULONG_ULONG_ULONG) OB_DEF (s390_vec_blend, s390_vec_blend_s8, s390_vec_blend_dbl, B_VXE3, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) OB_DEF_VAR (s390_vec_blend_s8, s390_vblendb, 0, 0, BT_OV_V16QI_V16QI_V16QI_V16QI) diff --git a/gcc/config/s390/s390-c.cc b/gcc/config/s390/s390-c.cc index 311d74a..a01c44c 100644 --- a/gcc/config/s390/s390-c.cc +++ b/gcc/config/s390/s390-c.cc @@ -962,7 +962,7 @@ s390_resolve_overloaded_builtin (location_t loc, tree ob_fndecl, if (!TARGET_VXE3 && (ob_flags & B_VXE3)) { - error_at (loc, "%qF requires arch15 or higher", ob_fndecl); + error_at (loc, "%qF requires z17 or higher", ob_fndecl); return error_mark_node; } @@ -1056,7 +1056,7 @@ s390_resolve_overloaded_builtin (location_t loc, tree ob_fndecl, if (!TARGET_VXE3 && bflags_overloaded_builtin_var[last_match_index] & B_VXE3) { - error_at (loc, "%qs matching variant requires arch15 or higher", + error_at (loc, "%qs matching variant requires z17 or higher", IDENTIFIER_POINTER (DECL_NAME (ob_fndecl))); return error_mark_node; } diff --git a/gcc/config/s390/s390-opts.h b/gcc/config/s390/s390-opts.h index 437d3b9..9cacb2c 100644 --- a/gcc/config/s390/s390-opts.h +++ b/gcc/config/s390/s390-opts.h @@ -39,7 +39,7 @@ enum processor_type PROCESSOR_3906_Z14, PROCESSOR_8561_Z15, PROCESSOR_3931_Z16, - PROCESSOR_ARCH15, + PROCESSOR_9175_Z17, PROCESSOR_NATIVE, PROCESSOR_max }; diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index 0ff3fd5..e3edf85 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -342,7 +342,7 @@ const struct s390_processor processor_table[] = { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 }, { "z15", "arch13", PROCESSOR_8561_Z15, &zEC12_cost, 13 }, { "z16", "arch14", PROCESSOR_3931_Z16, &zEC12_cost, 14 }, - { "arch15", "arch15", PROCESSOR_ARCH15, &zEC12_cost, 15 }, + { "z17", "arch15", PROCESSOR_9175_Z17, &zEC12_cost, 15 }, { "native", "", PROCESSOR_NATIVE, NULL, 0 } }; @@ -916,7 +916,7 @@ s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, if ((bflags & B_VXE3) && !TARGET_VXE3) { - error ("Builtin %qF requires arch15 or higher", fndecl); + error ("Builtin %qF requires z17 or higher", fndecl); return const0_rtx; } } @@ -9204,7 +9204,7 @@ s390_issue_rate (void) case PROCESSOR_3906_Z14: case PROCESSOR_8561_Z15: case PROCESSOR_3931_Z16: - case PROCESSOR_ARCH15: + case PROCESSOR_9175_Z17: default: return 1; } @@ -14496,7 +14496,21 @@ s390_call_saved_register_used (tree call_expr) for (reg = 0; reg < nregs; reg++) if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx))) - return true; + { + rtx parm; + /* Allow passing through unmodified value from caller, + see PR119873. */ + if (TREE_CODE (parameter) == SSA_NAME + && SSA_NAME_IS_DEFAULT_DEF (parameter) + && SSA_NAME_VAR (parameter) + && TREE_CODE (SSA_NAME_VAR (parameter)) == PARM_DECL + && (parm = DECL_INCOMING_RTL (SSA_NAME_VAR (parameter))) + && REG_P (parm) + && REGNO (parm) == REGNO (parm_rtx) + && REG_NREGS (parm) == REG_NREGS (parm_rtx)) + break; + return true; + } } else if (GET_CODE (parm_rtx) == PARALLEL) { @@ -14510,7 +14524,17 @@ s390_call_saved_register_used (tree call_expr) gcc_assert (REG_NREGS (r) == 1); if (!call_used_or_fixed_reg_p (REGNO (r))) - return true; + { + rtx parm; + if (TREE_CODE (parameter) == SSA_NAME + && SSA_NAME_IS_DEFAULT_DEF (parameter) + && SSA_NAME_VAR (parameter) + && TREE_CODE (SSA_NAME_VAR (parameter)) == PARM_DECL + && (parm = DECL_INCOMING_RTL (SSA_NAME_VAR (parameter))) + && rtx_equal_p (parm_rtx, parm)) + break; + return true; + } } } } @@ -14543,8 +14567,9 @@ s390_function_ok_for_sibcall (tree decl, tree exp) return false; /* Register 6 on s390 is available as an argument register but unfortunately - "caller saved". This makes functions needing this register for arguments - not suitable for sibcalls. */ + "caller saved". This makes functions needing this register for arguments + not suitable for sibcalls, unless the same value is passed from the + caller. */ return !s390_call_saved_register_used (exp); } @@ -15632,7 +15657,6 @@ s390_get_sched_attrmask (rtx_insn *insn) mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO; break; case PROCESSOR_3931_Z16: - case PROCESSOR_ARCH15: if (get_attr_z16_cracked (insn)) mask |= S390_SCHED_ATTR_MASK_CRACKED; if (get_attr_z16_expanded (insn)) @@ -15644,6 +15668,18 @@ s390_get_sched_attrmask (rtx_insn *insn) if (get_attr_z16_groupoftwo (insn)) mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO; break; + case PROCESSOR_9175_Z17: + if (get_attr_z17_cracked (insn)) + mask |= S390_SCHED_ATTR_MASK_CRACKED; + if (get_attr_z17_expanded (insn)) + mask |= S390_SCHED_ATTR_MASK_EXPANDED; + if (get_attr_z17_endgroup (insn)) + mask |= S390_SCHED_ATTR_MASK_ENDGROUP; + if (get_attr_z17_groupalone (insn)) + mask |= S390_SCHED_ATTR_MASK_GROUPALONE; + if (get_attr_z17_groupoftwo (insn)) + mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO; + break; default: gcc_unreachable (); } @@ -15691,7 +15727,6 @@ s390_get_unit_mask (rtx_insn *insn, int *units) mask |= 1 << 3; break; case PROCESSOR_3931_Z16: - case PROCESSOR_ARCH15: *units = 4; if (get_attr_z16_unit_lsu (insn)) mask |= 1 << 0; @@ -15702,6 +15737,17 @@ s390_get_unit_mask (rtx_insn *insn, int *units) if (get_attr_z16_unit_vfu (insn)) mask |= 1 << 3; break; + case PROCESSOR_9175_Z17: + *units = 4; + if (get_attr_z17_unit_lsu (insn)) + mask |= 1 << 0; + if (get_attr_z17_unit_fxa (insn)) + mask |= 1 << 1; + if (get_attr_z17_unit_fxb (insn)) + mask |= 1 << 2; + if (get_attr_z17_unit_vfu (insn)) + mask |= 1 << 3; + break; default: gcc_unreachable (); } @@ -15715,7 +15761,8 @@ s390_is_fpd (rtx_insn *insn) return false; return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn) - || get_attr_z15_unit_fpd (insn) || get_attr_z16_unit_fpd (insn); + || get_attr_z15_unit_fpd (insn) || get_attr_z16_unit_fpd (insn) + || get_attr_z17_unit_fpd (insn); } static bool @@ -15725,7 +15772,8 @@ s390_is_fxd (rtx_insn *insn) return false; return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn) - || get_attr_z15_unit_fxd (insn) || get_attr_z16_unit_fxd (insn); + || get_attr_z15_unit_fxd (insn) || get_attr_z16_unit_fxd (insn) + || get_attr_z17_unit_fxd (insn); } /* Returns TRUE if INSN is a long-running instruction. */ diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h index 6f7195d..8b04bc9 100644 --- a/gcc/config/s390/s390.h +++ b/gcc/config/s390/s390.h @@ -45,12 +45,12 @@ enum processor_flags PF_NNPA = 32768, PF_Z16 = 65536, PF_VXE3 = 131072, - PF_ARCH15 = 262144 + PF_Z17 = 262144 }; /* This is necessary to avoid a warning about comparing different enum types. */ -#define s390_tune_attr ((enum attr_cpu)(s390_tune > PROCESSOR_3931_Z16 ? PROCESSOR_3931_Z16 : s390_tune )) +#define s390_tune_attr ((enum attr_cpu)(s390_tune > PROCESSOR_9175_Z17 ? PROCESSOR_9175_Z17 : s390_tune )) /* These flags indicate that the generated code should run on a cpu providing the respective hardware facility regardless of the @@ -124,10 +124,10 @@ enum processor_flags (s390_arch_flags & PF_VXE3) #define TARGET_CPU_VXE3_P(opts) \ (opts->x_s390_arch_flags & PF_VXE3) -#define TARGET_CPU_ARCH15 \ - (s390_arch_flags & PF_ARCH15) -#define TARGET_CPU_ARCH15_P(opts) \ - (opts->x_s390_arch_flags & PF_ARCH15) +#define TARGET_CPU_Z17 \ + (s390_arch_flags & PF_Z17) +#define TARGET_CPU_Z17_P(opts) \ + (opts->x_s390_arch_flags & PF_Z17) #define TARGET_HARD_FLOAT_P(opts) (!TARGET_SOFT_FLOAT_P(opts)) @@ -198,9 +198,9 @@ enum processor_flags (TARGET_VX && TARGET_CPU_VXE3) #define TARGET_VXE3_P(opts) \ (TARGET_VX_P (opts) && TARGET_CPU_VXE3_P (opts)) -#define TARGET_ARCH15 (TARGET_ZARCH && TARGET_CPU_ARCH15) -#define TARGET_ARCH15_P(opts) \ - (TARGET_ZARCH_P (opts->x_target_flags) && TARGET_CPU_ARCH15_P (opts)) +#define TARGET_Z17 (TARGET_ZARCH && TARGET_CPU_Z17) +#define TARGET_Z17_P(opts) \ + (TARGET_ZARCH_P (opts->x_target_flags) && TARGET_CPU_Z17_P (opts)) #if defined(HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS_ON_Z13) #define TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS TARGET_Z13 diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 9d49580..05b9da6 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -599,11 +599,11 @@ ;; Processor type. This attribute must exactly match the processor_type ;; enumeration in s390.h. -(define_attr "cpu" "z900,z990,z9_109,z9_ec,z10,z196,zEC12,z13,z14,z15,z16" +(define_attr "cpu" "z900,z990,z9_109,z9_ec,z10,z196,zEC12,z13,z14,z15,z16,z17" (const (symbol_ref "s390_tune_attr"))) (define_attr "cpu_facility" - "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12,vx,z13,z14,vxe,z15,vxe2,z16,nnpa,vxe3,arch15" + "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12,vx,z13,z14,vxe,z15,vxe2,z16,nnpa,vxe3,z17" (const_string "standard")) (define_attr "enabled" "" @@ -681,8 +681,8 @@ (match_test "TARGET_VXE3")) (const_int 1) - (and (eq_attr "cpu_facility" "arch15") - (match_test "TARGET_ARCH15")) + (and (eq_attr "cpu_facility" "z17") + (match_test "TARGET_Z17")) (const_int 1) ] (const_int 0))) @@ -725,6 +725,9 @@ ;; Pipeline description for z16 (include "3931.md") +;; Pipeline description for z17 +(include "9175.md") + ;; Predicates (include "predicates.md") @@ -2056,7 +2059,7 @@ [(set (match_operand:DI 0 "register_operand" "=d") (ashift:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "a")) (const_int LXAMODEITER)))] - "TARGET_ARCH15 && TARGET_64BIT" + "TARGET_Z17 && TARGET_64BIT" "lxa<lxamode>\t%0,0(%1,0)" [(set_attr "op_type" "RXY")]) @@ -2066,7 +2069,7 @@ (ashift:DI (sign_extend:DI (plus:SI (match_operand:SI 1 "register_operand" "a") (match_operand:SI 2 "const_int_operand"))) (const_int LXAMODEITER)))] - "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" + "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" "lxa<lxamode>\t%0,%2(%1,0)" [(set_attr "op_type" "RXY")]) @@ -2076,7 +2079,7 @@ (plus:DI (ashift:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "a")) (const_int LXAMODEITER)) (match_operand:DI 2 "register_operand" "a")))] - "TARGET_ARCH15 && TARGET_64BIT" + "TARGET_Z17 && TARGET_64BIT" "lxa<lxamode>\t%0,0(%1,%2)" [(set_attr "op_type" "RXY")]) @@ -2087,7 +2090,7 @@ (match_operand:SI 2 "const_int_operand"))) (const_int LXAMODEITER)) (match_operand:DI 3 "register_operand" "a")))] - "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" + "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" "lxa<lxamode>\t%0,%2(%1,%3)" [(set_attr "op_type" "RXY")]) @@ -2096,7 +2099,7 @@ (plus:DI (sign_extend:DI (plus:SI (match_operand:SI 1 "register_operand" "a") (match_operand:SI 2 "const_int_operand"))) (match_operand:DI 3 "register_operand" "a")))] - "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" + "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" "lxab\t%0,%2(%1,%3)" [(set_attr "op_type" "RXY")]) @@ -2113,7 +2116,7 @@ 0) (const_int LXAMODEITER)) (const_int <LLXAMASK>)))] - "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" + "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" "llxa<lxamode>\t%0,%2(%1,0)" [(set_attr "op_type" "RXY")]) @@ -2124,7 +2127,7 @@ (const_int LXAMODEITER)) (const_int <LLXAMASK>)) (match_operand:DI 2 "register_operand" "a")))] - "TARGET_ARCH15 && TARGET_64BIT" + "TARGET_Z17 && TARGET_64BIT" "llxa<lxamode>\t%0,0(%1,%2)" [(set_attr "op_type" "RXY")]) @@ -2137,7 +2140,7 @@ (const_int LXAMODEITER)) (const_int <LLXAMASK>)) (match_operand:DI 3 "register_operand" "a")))] - "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" + "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" "llxa<lxamode>\t%0,%2(%1,%3)" [(set_attr "op_type" "RXY")]) @@ -2146,7 +2149,7 @@ (plus:DI (zero_extend:DI (plus:SI (match_operand:SI 1 "register_operand" "a") (match_operand:SI 2 "const_int_operand"))) (match_operand:DI 3 "register_operand" "a")))] - "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" + "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF" "llxab\t%0,%2(%1,%3)" [(set_attr "op_type" "RXY")]) @@ -3594,7 +3597,7 @@ (match_operand:BLK 1 "memory_operand" "")) (use (match_operand 2 "const_int_operand" "")) (use (match_operand 3 "immediate_operand" "")) - (clobber (scratch))] + (clobber (match_scratch 4))] "reload_completed" [(parallel [(set (match_dup 0) (match_dup 1)) @@ -3606,7 +3609,7 @@ (match_operand:BLK 1 "memory_operand" "")) (use (match_operand 2 "register_operand" "")) (use (match_operand 3 "memory_operand" "")) - (clobber (scratch))] + (clobber (match_scratch 4))] "reload_completed" [(parallel [(unspec [(match_dup 2) (match_dup 3) @@ -3620,14 +3623,14 @@ (match_operand:BLK 1 "memory_operand" "")) (use (match_operand 2 "register_operand" "")) (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN))) - (clobber (scratch))] + (clobber (match_scratch 3))] "TARGET_Z10 && reload_completed" [(parallel [(unspec [(match_dup 2) (const_int 0) - (label_ref (match_dup 3))] UNSPEC_EXECUTE) + (label_ref (match_dup 4))] UNSPEC_EXECUTE) (set (match_dup 0) (match_dup 1)) (use (const_int 1))])] - "operands[3] = gen_label_rtx ();") + "operands[4] = gen_label_rtx ();") (define_split [(set (match_operand:BLK 0 "memory_operand" "") @@ -3849,7 +3852,7 @@ (const_int 0)) (use (match_operand 1 "const_int_operand" "")) (use (match_operand 2 "immediate_operand" "")) - (clobber (scratch)) + (clobber (match_scratch 3)) (clobber (reg:CC CC_REGNUM))] "reload_completed" [(parallel @@ -3863,7 +3866,7 @@ (const_int 0)) (use (match_operand 1 "register_operand" "")) (use (match_operand 2 "memory_operand" "")) - (clobber (scratch)) + (clobber (match_scratch 3)) (clobber (reg:CC CC_REGNUM))] "reload_completed" [(parallel @@ -3879,7 +3882,7 @@ (const_int 0)) (use (match_operand 1 "register_operand" "")) (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN))) - (clobber (scratch)) + (clobber (match_scratch 2)) (clobber (reg:CC CC_REGNUM))] "TARGET_Z10 && reload_completed" [(parallel @@ -4044,7 +4047,7 @@ (match_operand:BLK 1 "memory_operand" ""))) (use (match_operand 2 "const_int_operand" "")) (use (match_operand 3 "immediate_operand" "")) - (clobber (scratch))] + (clobber (match_scratch 4))] "reload_completed" [(parallel [(set (reg:CCU CC_REGNUM) (compare:CCU (match_dup 0) (match_dup 1))) @@ -4057,7 +4060,7 @@ (match_operand:BLK 1 "memory_operand" ""))) (use (match_operand 2 "register_operand" "")) (use (match_operand 3 "memory_operand" "")) - (clobber (scratch))] + (clobber (match_scratch 4))] "reload_completed" [(parallel [(unspec [(match_dup 2) (match_dup 3) @@ -4072,7 +4075,7 @@ (match_operand:BLK 1 "memory_operand" ""))) (use (match_operand 2 "register_operand" "")) (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN))) - (clobber (scratch))] + (clobber (match_scratch 3))] "TARGET_Z10 && reload_completed" [(parallel [(unspec [(match_dup 2) (const_int 0) @@ -4940,7 +4943,7 @@ (unspec:DI [(match_operand:DI 1 "register_operand" "d") (match_operand:DI 2 "register_operand" "d")] UNSPEC_BDEPG))] - "TARGET_ARCH15 && TARGET_64BIT" + "TARGET_Z17 && TARGET_64BIT" "bdepg\t%0,%1,%2" [(set_attr "op_type" "RRF")]) @@ -4953,7 +4956,7 @@ (unspec:DI [(match_operand:DI 1 "register_operand" "d") (match_operand:DI 2 "register_operand" "d")] UNSPEC_BEXTG))] - "TARGET_ARCH15 && TARGET_64BIT" + "TARGET_Z17 && TARGET_64BIT" "bextg\t%0,%1,%2" [(set_attr "op_type" "RRF")]) @@ -9580,7 +9583,7 @@ (clz:DI (match_operand:DI 1 "register_operand" "d")))] "TARGET_EXTIMM && TARGET_ZARCH" { - if (!(TARGET_ARCH15 && TARGET_64BIT)) + if (!(TARGET_Z17 && TARGET_64BIT)) { rtx_insn *insn; rtx clz_equal; @@ -9601,7 +9604,7 @@ (define_insn "*clzg" [(set (match_operand:DI 0 "register_operand" "=d") (clz:DI (match_operand:DI 1 "register_operand" "d")))] - "TARGET_ARCH15 && TARGET_64BIT" + "TARGET_Z17 && TARGET_64BIT" "clzg\t%0,%1" [(set_attr "op_type" "RRE")]) @@ -9630,7 +9633,7 @@ (define_insn "ctzdi2" [(set (match_operand:DI 0 "register_operand" "=d") (ctz:DI (match_operand:DI 1 "register_operand" "d")))] - "TARGET_ARCH15 && TARGET_64BIT" + "TARGET_Z17 && TARGET_64BIT" "ctzg\t%0,%1" [(set_attr "op_type" "RRE")]) diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt index f064597..6753a93 100644 --- a/gcc/config/s390/s390.opt +++ b/gcc/config/s390/s390.opt @@ -122,7 +122,10 @@ EnumValue Enum(processor_type) String(z16) Value(PROCESSOR_3931_Z16) EnumValue -Enum(processor_type) String(arch15) Value(PROCESSOR_ARCH15) +Enum(processor_type) String(arch15) Value(PROCESSOR_9175_Z17) + +EnumValue +Enum(processor_type) String(z17) Value(PROCESSOR_9175_Z17) EnumValue Enum(processor_type) String(native) Value(PROCESSOR_NATIVE) DriverOnly diff --git a/gcc/config/sh/sh-modes.def b/gcc/config/sh/sh-modes.def index 80650b4..e31ae69 100644 --- a/gcc/config/sh/sh-modes.def +++ b/gcc/config/sh/sh-modes.def @@ -17,6 +17,12 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ +/* SH has the same reversed quiet bit as MIPS. */ +RESET_FLOAT_FORMAT (SF, mips_single_format); +RESET_FLOAT_FORMAT (DF, mips_double_format); +/* TFmode: IEEE quad floating point (software). */ +FLOAT_MODE (TF, 16, mips_quad_format); + /* Vector modes. */ VECTOR_MODE (INT, QI, 2); /* V2QI */ VECTOR_MODES (INT, 4); /* V4QI V2HI */ |