diff options
author | Martin Liska <mliska@suse.cz> | 2022-07-19 15:41:29 +0200 |
---|---|---|
committer | Martin Liska <mliska@suse.cz> | 2022-07-19 15:41:29 +0200 |
commit | e9c8572e74d8de56551ed62f799df7742cd523e9 (patch) | |
tree | 0c765fa720c25f8a08085e6e122ec2aa5e4c5170 | |
parent | 85df616e13a6a176883e39362c764a2dfa3448e8 (diff) | |
parent | edf0c132b19f73e5739715c2ac90c4ae1e96dc31 (diff) | |
download | gcc-e9c8572e74d8de56551ed62f799df7742cd523e9.zip gcc-e9c8572e74d8de56551ed62f799df7742cd523e9.tar.gz gcc-e9c8572e74d8de56551ed62f799df7742cd523e9.tar.bz2 |
Merge branch 'master' into devel/sphinx
40 files changed, 853 insertions, 316 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 994875f..43b70ba 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,115 @@ +2022-07-18 Andrew MacLeod <amacleod@redhat.com> + + PR tree-optimization/106280 + * value-relation.cc (dom_oracle::register_relation): Register + transitives only when it is possible for there to be one. + (dom_oracle::set_one_relation): Return NULL if this is an + existing relation. + +2022-07-18 Maciej W. Rozycki <macro@embecosm.com> + + * doc/invoke.texi (RISC-V Options): Add index references for + `mrelax' and `mriscv-attribute'. + +2022-07-18 Maciej W. Rozycki <macro@embecosm.com> + + * doc/invoke.texi (Option Summary): Add missing second space + around `-mstack-protector-guard-reg='. + +2022-07-18 Maciej W. Rozycki <macro@embecosm.com> + + * doc/invoke.texi (Option Summary): Fix `-mno-riscv-attribute'. + (RISC-V Options): Likewise, and `-mriscv-attribute'. + +2022-07-18 Claudiu Zissulescu <claziss@gmail.com> + + * config/arc/arc-arch.h (arc_tune_attr): Add + ARC_TUNE_ARCHS4X_REL31A variant. + * config/arc/arc.cc (arc_override_options): Tune options for + release 310a. + (arc_sched_issue_rate): Use correct enum. + (arc600_corereg_hazard): Textual change. + (arc_hazard): Add release 310a tunning. + * config/arc/arc.md (tune): Update and take into consideration new + tune option. + (tune_dspmpy): Likewise. + (tune_store): New attribute. + * config/arc/arc.opt (mtune): New tune option. + * config/arc/arcHS4x.md (hs4x_brcc0, hs4x_brcc1): New cpu units. + (hs4x_brcc_op): New instruction rezervation. + (hs4x_data_store_1_op): Likewise. + * config/arc/arc-cpus.def (hs4x_rel31): New cpu variant. + * config/arc/arc-tables.opt: Regenerate. + * config/arc/t-multilib: Likewise. + * doc/invoke.texi (ARC): Update mcpu and tune sections. + +2022-07-18 Richard Biener <rguenther@suse.de> + + * tree-loop-distribution.cc (loop_distribution::distribute_loop): + When computing cost-based merging do not disregard builtin + classified partitions in some cases. + +2022-07-18 Richard Sandiford <richard.sandiford@arm.com> + + PR target/106253 + * config/arm/arm-builtins.cc (arm_builtin_vectorized_function): + Delete. + * config/arm/arm-protos.h (arm_builtin_vectorized_function): Delete. + * config/arm/arm.cc (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): + Delete. + * config/arm/arm_neon_builtins.def (copysignf): Delete. + * config/arm/iterators.md (nvrint_pattern): New attribute. + * config/arm/neon.md (<NEON_VRINT:nvrint_pattern><VCVTF:mode>2): + New pattern. + (l<NEON_VCVT:nvrint_pattern><su_optab><VCVTF:mode><v_cmp_result>2): + Likewise. + (neon_copysignf<mode>): Rename to... + (copysign<mode>3): ...this. + +2022-07-18 Claudiu Zissulescu <claziss@gmail.com> + + * config/arc/arc.cc (arc_expand_epilogue): Adjust the frame + pointer first when in interrupts. + +2022-07-18 Richard Biener <rguenther@suse.de> + + * tree-loop-distribution.cc (copy_loop_before): Add + the ability to replace the original LC PHI defs. + (generate_loops_for_partition): Pass through a flag + whether to redirect original LC PHI defs. + (generate_code_for_partition): Likewise. + (loop_distribution::distribute_loop): Compute the partition + that should provide the LC PHI defs for common reductions + and pass that down. + +2022-07-18 Richard Ball <richard.ball@arm.com> + + * config/aarch64/aarch64.cc (aarch64_evpc_trn): Use std:swap. + (aarch64_evpc_uzp): Likewise. + (aarch64_evpc_zip): Likewise. + +2022-07-18 Roger Sayle <roger@nextmovesoftware.com> + + PR target/106231 + * config/i386/i386.md (*ctzsidi2_<s>ext): New insn_and_split + to recognize any_extend:DI of ctz:SI which is implicitly extended. + (*ctzsidi2_<s>ext_falsedep): New define_insn to model a DImode + extended ctz:SI that has preceding xor to break false dependency. + +2022-07-18 Roger Sayle <roger@nextmovesoftware.com> + + * config/i386/predicates.md (x86_64_const_vector_operand): + Check the operand's mode matches the specified mode argument. + +2022-07-18 Roger Sayle <roger@nextmovesoftware.com> + + * config/i386/sse.md (kunpckhi): Add UNSPEC_MASKOP unspec. + (kunpcksi): Likewise, add UNSPEC_MASKOP unspec. + (kunpckdi): Likewise, add UNSPEC_MASKOP unspec. + (vec_pack_trunc_qi): Update to specify the now required + UNSPEC_MASKOP unspec. + (vec_pack_trunc_<mode>): Likewise. + 2022-07-16 Takayuki 'January June' Suwa <jjsuwa_sys3175@yahoo.co.jp> * config/xtensa/xtensa.md diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 2ac5479..a394c7a 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20220718 +20220719 diff --git a/gcc/builtins.cc b/gcc/builtins.cc index 35b9197..0d13197 100644 --- a/gcc/builtins.cc +++ b/gcc/builtins.cc @@ -1360,7 +1360,7 @@ expand_builtin_prefetch (tree exp) rtx get_memory_rtx (tree exp, tree len) { - tree orig_exp = exp; + tree orig_exp = exp, base; rtx addr, mem; /* When EXP is not resolved SAVE_EXPR, MEM_ATTRS can be still derived @@ -1391,10 +1391,11 @@ get_memory_rtx (tree exp, tree len) if (is_gimple_mem_ref_addr (TREE_OPERAND (exp, 0))) set_mem_attributes (mem, exp, 0); else if (TREE_CODE (TREE_OPERAND (exp, 0)) == ADDR_EXPR - && (exp = get_base_address (TREE_OPERAND (TREE_OPERAND (exp, 0), - 0)))) + && (base = get_base_address (TREE_OPERAND (TREE_OPERAND (exp, 0), + 0)))) { - exp = build_fold_addr_expr (exp); + unsigned int align = get_pointer_alignment (TREE_OPERAND (exp, 0)); + exp = build_fold_addr_expr (base); exp = fold_build2 (MEM_REF, build_array_type (char_type_node, build_range_type (sizetype, @@ -1402,6 +1403,10 @@ get_memory_rtx (tree exp, tree len) NULL)), exp, build_int_cst (ptr_type_node, 0)); set_mem_attributes (mem, exp, 0); + /* Since we stripped parts make sure the offset is unknown and the + alignment is computed from the original address. */ + clear_mem_offset (mem); + set_mem_align (mem, align); } set_mem_alias_set (mem, 0); return mem; @@ -8625,7 +8630,7 @@ fold_builtin_frexp (location_t loc, tree arg0, tree arg1, tree rettype) if (TYPE_MAIN_VARIANT (TREE_TYPE (arg1)) == integer_type_node) { const REAL_VALUE_TYPE *const value = TREE_REAL_CST_PTR (arg0); - tree frac, exp; + tree frac, exp, res; switch (value->cl) { @@ -8656,7 +8661,9 @@ fold_builtin_frexp (location_t loc, tree arg0, tree arg1, tree rettype) /* Create the COMPOUND_EXPR (*arg1 = trunc, frac). */ arg1 = fold_build2_loc (loc, MODIFY_EXPR, rettype, arg1, exp); TREE_SIDE_EFFECTS (arg1) = 1; - return fold_build2_loc (loc, COMPOUND_EXPR, rettype, arg1, frac); + res = fold_build2_loc (loc, COMPOUND_EXPR, rettype, arg1, frac); + suppress_warning (res, OPT_Wunused_value); + return res; } return NULL_TREE; @@ -8682,6 +8689,7 @@ fold_builtin_modf (location_t loc, tree arg0, tree arg1, tree rettype) { const REAL_VALUE_TYPE *const value = TREE_REAL_CST_PTR (arg0); REAL_VALUE_TYPE trunc, frac; + tree res; switch (value->cl) { @@ -8711,8 +8719,10 @@ fold_builtin_modf (location_t loc, tree arg0, tree arg1, tree rettype) arg1 = fold_build2_loc (loc, MODIFY_EXPR, rettype, arg1, build_real (rettype, trunc)); TREE_SIDE_EFFECTS (arg1) = 1; - return fold_build2_loc (loc, COMPOUND_EXPR, rettype, arg1, - build_real (rettype, frac)); + res = fold_build2_loc (loc, COMPOUND_EXPR, rettype, arg1, + build_real (rettype, frac)); + suppress_warning (res, OPT_Wunused_value); + return res; } return NULL_TREE; @@ -10673,8 +10683,10 @@ do_mpfr_remquo (tree arg0, tree arg1, tree arg_quo) integer_quo)); TREE_SIDE_EFFECTS (result_quo) = 1; /* Combine the quo assignment with the rem. */ - result = non_lvalue (fold_build2 (COMPOUND_EXPR, type, - result_quo, result_rem)); + result = fold_build2 (COMPOUND_EXPR, type, + result_quo, result_rem); + suppress_warning (result, OPT_Wunused_value); + result = non_lvalue (result); } } } diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 1a514c1..4b486ae 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -23498,7 +23498,7 @@ aarch64_evpc_trn (struct expand_vec_perm_d *d) { HOST_WIDE_INT odd; poly_uint64 nelt = d->perm.length (); - rtx out, in0, in1, x; + rtx out, in0, in1; machine_mode vmode = d->vmode; if (GET_MODE_UNIT_SIZE (vmode) > 8) @@ -23522,7 +23522,7 @@ aarch64_evpc_trn (struct expand_vec_perm_d *d) at the head of aarch64-sve.md for details. */ if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD) { - x = in0, in0 = in1, in1 = x; + std::swap (in0, in1); odd = !odd; } out = d->target; @@ -23592,7 +23592,7 @@ static bool aarch64_evpc_uzp (struct expand_vec_perm_d *d) { HOST_WIDE_INT odd; - rtx out, in0, in1, x; + rtx out, in0, in1; machine_mode vmode = d->vmode; if (GET_MODE_UNIT_SIZE (vmode) > 8) @@ -23615,7 +23615,7 @@ aarch64_evpc_uzp (struct expand_vec_perm_d *d) at the head of aarch64-sve.md for details. */ if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD) { - x = in0, in0 = in1, in1 = x; + std::swap (in0, in1); odd = !odd; } out = d->target; @@ -23631,7 +23631,7 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d) { unsigned int high; poly_uint64 nelt = d->perm.length (); - rtx out, in0, in1, x; + rtx out, in0, in1; machine_mode vmode = d->vmode; if (GET_MODE_UNIT_SIZE (vmode) > 8) @@ -23656,7 +23656,7 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d) at the head of aarch64-sve.md for details. */ if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD) { - x = in0, in0 = in1, in1 = x; + std::swap (in0, in1); high = !high; } out = d->target; diff --git a/gcc/config/arc/arc-arch.h b/gcc/config/arc/arc-arch.h index 4c728a8..83b156e 100644 --- a/gcc/config/arc/arc-arch.h +++ b/gcc/config/arc/arc-arch.h @@ -77,7 +77,8 @@ enum arc_tune_attr ARC_TUNE_CORE_3, ARC_TUNE_ARCHS4X, ARC_TUNE_ARCHS4XD, - ARC_TUNE_ARCHS4XD_SLOW + ARC_TUNE_ARCHS4XD_SLOW, + ARC_TUNE_ARCHS4X_REL31A }; /* Extra options for a processor template to hold any CPU specific diff --git a/gcc/config/arc/arc-cpus.def b/gcc/config/arc/arc-cpus.def index baf61db..5668b0f 100644 --- a/gcc/config/arc/arc-cpus.def +++ b/gcc/config/arc/arc-cpus.def @@ -64,6 +64,7 @@ ARC_CPU (hs38, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE, NONE) ARC_CPU (hs38_linux, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64|FL_FPU_FPUD_ALL, NONE, NONE) ARC_CPU (hs4x, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE, ARCHS4X) ARC_CPU (hs4xd, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE, ARCHS4XD) +ARC_CPU (hs4x_rel31, hs, FL_MPYOPT_2|FL_DIVREM|FL_LL64, NONE, ARCHS4X_REL31A) ARC_CPU (arc600, 6xx, FL_BS, NONE, ARC600) ARC_CPU (arc600_norm, 6xx, FL_BS|FL_NORM, NONE, ARC600) diff --git a/gcc/config/arc/arc-tables.opt b/gcc/config/arc/arc-tables.opt index 8cc5135..0a0d354 100644 --- a/gcc/config/arc/arc-tables.opt +++ b/gcc/config/arc/arc-tables.opt @@ -70,6 +70,9 @@ EnumValue Enum(processor_type) String(hs4xd) Value(PROCESSOR_hs4xd) EnumValue +Enum(processor_type) String(hs4x_rel31) Value(PROCESSOR_hs4x_rel31) + +EnumValue Enum(processor_type) String(arc600) Value(PROCESSOR_arc600) EnumValue diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc index fbc17e6..064790b 100644 --- a/gcc/config/arc/arc.cc +++ b/gcc/config/arc/arc.cc @@ -646,8 +646,8 @@ arc_sched_issue_rate (void) { switch (arc_tune) { - case TUNE_ARCHS4X: - case TUNE_ARCHS4XD: + case ARC_TUNE_ARCHS4X: + case ARC_TUNE_ARCHS4XD: return 3; default: break; @@ -1458,6 +1458,12 @@ arc_override_options (void) if (!OPTION_SET_P (unaligned_access) && TARGET_HS) unaligned_access = 1; + if (TARGET_HS && (arc_tune == ARC_TUNE_ARCHS4X_REL31A)) + { + TARGET_CODE_DENSITY_FRAME = 0; + flag_delayed_branch = 0; + } + /* These need to be done at start up. It's convenient to do them here. */ arc_init (); } @@ -3965,7 +3971,7 @@ arc_expand_epilogue (int sibcall_p) if (size) emit_insn (gen_blockage ()); - if (ARC_INTERRUPT_P (fn_type) && restore_fp) + if (ARC_INTERRUPT_P (fn_type)) { /* We need to restore FP before any SP operation in an interrupt. */ @@ -7817,6 +7823,115 @@ arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer) return arc_store_addr_hazard_internal_p (producer, consumer); } +/* Return length adjustment for INSN. + For ARC600: + A write to a core reg greater or equal to 32 must not be immediately + followed by a use. Anticipate the length requirement to insert a nop + between PRED and SUCC to prevent a hazard. */ + +static int +arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ) +{ + if (!TARGET_ARC600) + return 0; + if (GET_CODE (PATTERN (pred)) == SEQUENCE) + pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1); + if (GET_CODE (PATTERN (succ)) == SEQUENCE) + succ = as_a <rtx_sequence *> (PATTERN (succ))->insn (0); + if (recog_memoized (pred) == CODE_FOR_mulsi_600 + || recog_memoized (pred) == CODE_FOR_umul_600 + || recog_memoized (pred) == CODE_FOR_mac_600 + || recog_memoized (pred) == CODE_FOR_mul64_600 + || recog_memoized (pred) == CODE_FOR_mac64_600 + || recog_memoized (pred) == CODE_FOR_umul64_600 + || recog_memoized (pred) == CODE_FOR_umac64_600) + return 0; + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST) + { + const_rtx x = *iter; + switch (GET_CODE (x)) + { + case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC: + break; + default: + /* This is also fine for PRE/POST_MODIFY, because they + contain a SET. */ + continue; + } + rtx dest = XEXP (x, 0); + /* Check if this sets a an extension register. N.B. we use 61 for the + condition codes, which is definitely not an extension register. */ + if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61 + /* Check if the same register is used by the PAT. */ + && (refers_to_regno_p + (REGNO (dest), + REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U, + PATTERN (succ), 0))) + return 4; + } + return 0; +} + +/* For ARC600: + A write to a core reg greater or equal to 32 must not be immediately + followed by a use. Anticipate the length requirement to insert a nop + between PRED and SUCC to prevent a hazard. */ + +int +arc_hazard (rtx_insn *pred, rtx_insn *succ) +{ + if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ)) + return 0; + + if (TARGET_ARC600) + return arc600_corereg_hazard (pred, succ); + + return 0; +} + +/* When compiling for release 310a, insert a nop before any + conditional jump. */ + +static int +arc_check_release31a (rtx_insn *pred, rtx_insn *succ) +{ + if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ)) + return 0; + + if (!JUMP_P (pred) && !single_set (pred)) + return 0; + + if (!JUMP_P (succ) && !single_set (succ)) + return 0; + + if (TARGET_HS && (arc_tune == ARC_TUNE_ARCHS4X_REL31A)) + switch (get_attr_type (pred)) + { + case TYPE_STORE: + switch (get_attr_type (succ)) + { + case TYPE_BRCC: + case TYPE_BRCC_NO_DELAY_SLOT: + case TYPE_LOOP_END: + return 1; + default: + break; + } + break; + case TYPE_BRCC: + case TYPE_BRCC_NO_DELAY_SLOT: + case TYPE_LOOP_END: + if (get_attr_type (succ) == TYPE_STORE) + return 1; + break; + default: + break; + } + + return 0; +} + /* The same functionality as arc_hazard. It is called in machine reorg before any other optimization. Hence, the NOP size is taken into account when doing branch shortening. */ @@ -7830,10 +7945,8 @@ workaround_arc_anomaly (void) for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) { succ0 = next_real_insn (insn); - if (arc_hazard (insn, succ0)) - { - emit_insn_before (gen_nopv (), succ0); - } + if (arc_hazard (insn, succ0) || arc_check_release31a (insn, succ0)) + emit_insn_before (gen_nopv (), succ0); } if (!TARGET_ARC700) @@ -9324,56 +9437,6 @@ disi_highpart (rtx in) return simplify_gen_subreg (SImode, in, DImode, TARGET_BIG_ENDIAN ? 0 : 4); } -/* Return length adjustment for INSN. - For ARC600: - A write to a core reg greater or equal to 32 must not be immediately - followed by a use. Anticipate the length requirement to insert a nop - between PRED and SUCC to prevent a hazard. */ - -static int -arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ) -{ - if (!TARGET_ARC600) - return 0; - if (GET_CODE (PATTERN (pred)) == SEQUENCE) - pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1); - if (GET_CODE (PATTERN (succ)) == SEQUENCE) - succ = as_a <rtx_sequence *> (PATTERN (succ))->insn (0); - if (recog_memoized (pred) == CODE_FOR_mulsi_600 - || recog_memoized (pred) == CODE_FOR_umul_600 - || recog_memoized (pred) == CODE_FOR_mac_600 - || recog_memoized (pred) == CODE_FOR_mul64_600 - || recog_memoized (pred) == CODE_FOR_mac64_600 - || recog_memoized (pred) == CODE_FOR_umul64_600 - || recog_memoized (pred) == CODE_FOR_umac64_600) - return 0; - subrtx_iterator::array_type array; - FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST) - { - const_rtx x = *iter; - switch (GET_CODE (x)) - { - case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC: - break; - default: - /* This is also fine for PRE/POST_MODIFY, because they - contain a SET. */ - continue; - } - rtx dest = XEXP (x, 0); - /* Check if this sets an extension register. N.B. we use 61 for the - condition codes, which is definitely not an extension register. */ - if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61 - /* Check if the same register is used by the PAT. */ - && (refers_to_regno_p - (REGNO (dest), - REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U, - PATTERN (succ), 0))) - return 4; - } - return 0; -} - /* Given a rtx, check if it is an assembly instruction or not. */ static int @@ -9408,23 +9471,6 @@ arc_asm_insn_p (rtx x) return 0; } -/* For ARC600: - A write to a core reg greater or equal to 32 must not be immediately - followed by a use. Anticipate the length requirement to insert a nop - between PRED and SUCC to prevent a hazard. */ - -int -arc_hazard (rtx_insn *pred, rtx_insn *succ) -{ - if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ)) - return 0; - - if (TARGET_ARC600) - return arc600_corereg_hazard (pred, succ); - - return 0; -} - /* Return length adjustment for INSN. */ int diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 39b3580..7170445 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -645,22 +645,21 @@ ;; is made that makes conditional execution required. (define_attr "tune" "none,arc600,arc7xx,arc700_4_2_std,arc700_4_2_xmac, \ -core_3, archs4x, archs4xd, archs4xd_slow" +archs4x, archs4xd" (const - (cond [(symbol_ref "arc_tune == TUNE_ARC600") + (cond [(symbol_ref "arc_tune == ARC_TUNE_ARC600") (const_string "arc600") (symbol_ref "arc_tune == ARC_TUNE_ARC7XX") (const_string "arc7xx") - (symbol_ref "arc_tune == TUNE_ARC700_4_2_STD") + (symbol_ref "arc_tune == ARC_TUNE_ARC700_4_2_STD") (const_string "arc700_4_2_std") - (symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC") + (symbol_ref "arc_tune == ARC_TUNE_ARC700_4_2_XMAC") (const_string "arc700_4_2_xmac") - (symbol_ref "arc_tune == ARC_TUNE_CORE_3") - (const_string "core_3") - (symbol_ref "arc_tune == TUNE_ARCHS4X") + (ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X") + (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X_REL31A")) (const_string "archs4x") - (ior (symbol_ref "arc_tune == TUNE_ARCHS4XD") - (symbol_ref "arc_tune == TUNE_ARCHS4XD_SLOW")) + (ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD") + (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD_SLOW")) (const_string "archs4xd")] (const_string "none")))) @@ -671,13 +670,22 @@ core_3, archs4x, archs4xd, archs4xd_slow" (define_attr "tune_dspmpy" "none, slow, fast" (const - (cond [(ior (symbol_ref "arc_tune == TUNE_ARCHS4X") - (symbol_ref "arc_tune == TUNE_ARCHS4XD")) + (cond [(ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X") + (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD")) (const_string "fast") - (symbol_ref "arc_tune == TUNE_ARCHS4XD_SLOW") + (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD_SLOW") (const_string "slow")] (const_string "none")))) +(define_attr "tune_store" "none, normal, rel31a" + (const + (cond [(ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X") + (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD")) + (const_string "normal") + (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X_REL31A") + (const_string "rel31a")] + (const_string "none")))) + ;; Move instructions. (define_expand "movqi" [(set (match_operand:QI 0 "move_dest_operand" "") diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt index eb85f49..0add5a2 100644 --- a/gcc/config/arc/arc.opt +++ b/gcc/config/arc/arc.opt @@ -276,6 +276,9 @@ Enum(arc_tune_attr) String(arc750d) Value(ARC_TUNE_ARC700_4_2_XMAC) EnumValue Enum(arc_tune_attr) String(core3) Value(ARC_TUNE_CORE_3) +EnumValue +Enum(arc_tune_attr) String(release31a) Value(ARC_TUNE_ARCHS4X_REL31A) + mindexed-loads Target Var(TARGET_INDEXED_LOADS) Init(TARGET_INDEXED_LOADS_DEFAULT) Enable the use of indexed loads. diff --git a/gcc/config/arc/arcHS4x.md b/gcc/config/arc/arcHS4x.md index 5136eba..1009833 100644 --- a/gcc/config/arc/arcHS4x.md +++ b/gcc/config/arc/arcHS4x.md @@ -27,14 +27,21 @@ (define_cpu_unit "hs4x_mult" "ARCHS4x") (define_cpu_unit "hs4x_x1, hs4x_x2" "ARCHS4x") (define_cpu_unit "hs4x_y1, hs4x_y2" "ARCHS4x") +(define_cpu_unit "hs4x_brcc0, hs4x_brcc1" "ARCHS4x") (define_insn_reservation "hs4x_brj_op" 1 (and (match_test "TARGET_HS") (eq_attr "tune" "archs4x, archs4xd") (eq_attr "type" "call, call_no_delay_slot, uncond_branch, jump, \ -branch, brcc,brcc_no_delay_slot, sfunc")) +branch, sfunc")) "hs4x_issue0") +(define_insn_reservation "hs4x_brcc_op" 1 + (and (match_test "TARGET_HS") + (eq_attr "tune" "archs4x, archs4xd") + (eq_attr "type" "brcc,brcc_no_delay_slot,loop_end")) + "hs4x_issue0 + hs4x_brcc0 + hs4x_brcc1") + (define_insn_reservation "hs4x_data_load_op" 4 (and (match_test "TARGET_HS") (eq_attr "tune" "archs4x, archs4xd") @@ -43,10 +50,16 @@ branch, brcc,brcc_no_delay_slot, sfunc")) (define_insn_reservation "hs4x_data_store_op" 1 (and (match_test "TARGET_HS") - (eq_attr "tune" "archs4x, archs4xd") + (eq_attr "tune_store" "normal") (eq_attr "type" "store")) "hs4x_issue1 + hs4x_ld_st") +(define_insn_reservation "hs4x_data_store_1_op" 2 + (and (match_test "TARGET_HS") + (eq_attr "tune_store" "rel31a") + (eq_attr "type" "store")) + "hs4x_issue1 + hs4x_ld_st + hs4x_brcc0, hs4x_brcc1") + ;; Advanced ALU (define_insn_reservation "hs4x_adv_alue_op" 4 (and (match_test "TARGET_HS") diff --git a/gcc/config/arc/t-multilib b/gcc/config/arc/t-multilib index 8d97ad1..921945e 100644 --- a/gcc/config/arc/t-multilib +++ b/gcc/config/arc/t-multilib @@ -21,9 +21,9 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -MULTILIB_OPTIONS = mcpu=em/mcpu=em_mini/mcpu=arcem/mcpu=em4/mcpu=em4_dmips/mcpu=em4_fpus/mcpu=em4_fpuda/mcpu=quarkse_em/mcpu=hs/mcpu=archs/mcpu=hs34/mcpu=hs38/mcpu=hs38_linux/mcpu=hs4x/mcpu=hs4xd/mcpu=arc600/mcpu=arc600_norm/mcpu=arc600_mul64/mcpu=arc600_mul32x16/mcpu=arc601/mcpu=arc601_norm/mcpu=arc601_mul64/mcpu=arc601_mul32x16/mcpu=arc700/mcpu=nps400 +MULTILIB_OPTIONS = mcpu=em/mcpu=em_mini/mcpu=arcem/mcpu=em4/mcpu=em4_dmips/mcpu=em4_fpus/mcpu=em4_fpuda/mcpu=quarkse_em/mcpu=hs/mcpu=archs/mcpu=hs34/mcpu=hs38/mcpu=hs38_linux/mcpu=hs4x/mcpu=hs4xd/mcpu=hs4x_rel31/mcpu=arc600/mcpu=arc600_norm/mcpu=arc600_mul64/mcpu=arc600_mul32x16/mcpu=arc601/mcpu=arc601_norm/mcpu=arc601_mul64/mcpu=arc601_mul32x16/mcpu=arc700/mcpu=nps400 -MULTILIB_DIRNAMES = em em_mini arcem em4 em4_dmips em4_fpus em4_fpuda quarkse_em hs archs hs34 hs38 hs38_linux hs4x hs4xd arc600 arc600_norm arc600_mul64 arc600_mul32x16 arc601 arc601_norm arc601_mul64 arc601_mul32x16 arc700 nps400 +MULTILIB_DIRNAMES = em em_mini arcem em4 em4_dmips em4_fpus em4_fpuda quarkse_em hs archs hs34 hs38 hs38_linux hs4x hs4xd hs4x_rel31 arc600 arc600_norm arc600_mul64 arc600_mul32x16 arc601 arc601_norm arc601_mul64 arc601_mul32x16 arc700 nps400 # Aliases: MULTILIB_MATCHES = mcpu?arc600=mcpu?ARC600 diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc index d917137..8f8155c 100644 --- a/gcc/config/arm/arm-builtins.cc +++ b/gcc/config/arm/arm-builtins.cc @@ -4026,129 +4026,6 @@ arm_expand_builtin (tree exp, return NULL_RTX; } -tree -arm_builtin_vectorized_function (unsigned int fn, tree type_out, tree type_in) -{ - machine_mode in_mode, out_mode; - int in_n, out_n; - bool out_unsigned_p = TYPE_UNSIGNED (type_out); - - /* Can't provide any vectorized builtins when we can't use NEON. */ - if (!TARGET_NEON) - return NULL_TREE; - - if (TREE_CODE (type_out) != VECTOR_TYPE - || TREE_CODE (type_in) != VECTOR_TYPE) - return NULL_TREE; - - out_mode = TYPE_MODE (TREE_TYPE (type_out)); - out_n = TYPE_VECTOR_SUBPARTS (type_out); - in_mode = TYPE_MODE (TREE_TYPE (type_in)); - in_n = TYPE_VECTOR_SUBPARTS (type_in); - -/* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the - decl of the vectorized builtin for the appropriate vector mode. - NULL_TREE is returned if no such builtin is available. */ -#undef ARM_CHECK_BUILTIN_MODE -#define ARM_CHECK_BUILTIN_MODE(C) \ - (TARGET_VFP5 \ - && flag_unsafe_math_optimizations \ - && ARM_CHECK_BUILTIN_MODE_1 (C)) - -#undef ARM_CHECK_BUILTIN_MODE_1 -#define ARM_CHECK_BUILTIN_MODE_1(C) \ - (out_mode == SFmode && out_n == C \ - && in_mode == SFmode && in_n == C) - -#undef ARM_FIND_VRINT_VARIANT -#define ARM_FIND_VRINT_VARIANT(N) \ - (ARM_CHECK_BUILTIN_MODE (2) \ - ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \ - : (ARM_CHECK_BUILTIN_MODE (4) \ - ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \ - : NULL_TREE)) - - switch (fn) - { - CASE_CFN_FLOOR: - return ARM_FIND_VRINT_VARIANT (vrintm); - CASE_CFN_CEIL: - return ARM_FIND_VRINT_VARIANT (vrintp); - CASE_CFN_TRUNC: - return ARM_FIND_VRINT_VARIANT (vrintz); - CASE_CFN_ROUND: - return ARM_FIND_VRINT_VARIANT (vrinta); -#undef ARM_CHECK_BUILTIN_MODE_1 -#define ARM_CHECK_BUILTIN_MODE_1(C) \ - (out_mode == SImode && out_n == C \ - && in_mode == SFmode && in_n == C) - -#define ARM_FIND_VCVT_VARIANT(N) \ - (ARM_CHECK_BUILTIN_MODE (2) \ - ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \ - : (ARM_CHECK_BUILTIN_MODE (4) \ - ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \ - : NULL_TREE)) - -#define ARM_FIND_VCVTU_VARIANT(N) \ - (ARM_CHECK_BUILTIN_MODE (2) \ - ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \ - : (ARM_CHECK_BUILTIN_MODE (4) \ - ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \ - : NULL_TREE)) - CASE_CFN_LROUND: - return (out_unsigned_p - ? ARM_FIND_VCVTU_VARIANT (vcvta) - : ARM_FIND_VCVT_VARIANT (vcvta)); - CASE_CFN_LCEIL: - return (out_unsigned_p - ? ARM_FIND_VCVTU_VARIANT (vcvtp) - : ARM_FIND_VCVT_VARIANT (vcvtp)); - CASE_CFN_LFLOOR: - return (out_unsigned_p - ? ARM_FIND_VCVTU_VARIANT (vcvtm) - : ARM_FIND_VCVT_VARIANT (vcvtm)); -#undef ARM_CHECK_BUILTIN_MODE -#define ARM_CHECK_BUILTIN_MODE(C, N) \ - (out_mode == N##mode && out_n == C \ - && in_mode == N##mode && in_n == C) - case CFN_BUILT_IN_BSWAP16: - if (ARM_CHECK_BUILTIN_MODE (4, HI)) - return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false); - else if (ARM_CHECK_BUILTIN_MODE (8, HI)) - return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false); - else - return NULL_TREE; - case CFN_BUILT_IN_BSWAP32: - if (ARM_CHECK_BUILTIN_MODE (2, SI)) - return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false); - else if (ARM_CHECK_BUILTIN_MODE (4, SI)) - return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false); - else - return NULL_TREE; - case CFN_BUILT_IN_BSWAP64: - if (ARM_CHECK_BUILTIN_MODE (2, DI)) - return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false); - else - return NULL_TREE; - CASE_CFN_COPYSIGN: - if (ARM_CHECK_BUILTIN_MODE (2, SF)) - return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf, false); - else if (ARM_CHECK_BUILTIN_MODE (4, SF)) - return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf, false); - else - return NULL_TREE; - - default: - return NULL_TREE; - } - return NULL_TREE; -} -#undef ARM_FIND_VCVT_VARIANT -#undef ARM_FIND_VCVTU_VARIANT -#undef ARM_CHECK_BUILTIN_MODE -#undef ARM_FIND_VRINT_VARIANT - void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) { diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 9d14209..f8aabbd 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -103,7 +103,6 @@ extern void neon_pairwise_reduce (rtx, rtx, machine_mode, rtx (*) (rtx, rtx, rtx)); extern rtx mve_bool_vec_to_const (rtx const_vec); extern rtx neon_make_constant (rtx, bool generate = true); -extern tree arm_builtin_vectorized_function (unsigned int, tree, tree); extern void neon_expand_vector_init (rtx, rtx); extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree); extern void arm_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT); diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 33fb98d..eca99c9 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -739,10 +739,6 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_VECTORIZE_BUILTINS #define TARGET_VECTORIZE_BUILTINS -#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION -#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ - arm_builtin_vectorized_function - #undef TARGET_VECTOR_ALIGNMENT #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 445b2bf..2e642cc 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -264,7 +264,6 @@ VAR1 (UNOP, vcvtv4hf, v4sf) VAR10 (TERNOP, vbsl, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) VAR2 (TERNOP, vbsl, v8hf, v4hf) -VAR2 (UNOP, copysignf, v2sf, v4sf) VAR2 (UNOP, vrintn, v2sf, v4sf) VAR2 (UNOP, vrinta, v2sf, v4sf) VAR2 (UNOP, vrintp, v2sf, v4sf) diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 37cf797..29062cd 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -1150,6 +1150,13 @@ (UNSPEC_VRINTA "unconditional") (UNSPEC_VRINTM "unconditional") (UNSPEC_VRINTR "nocond") (UNSPEC_VRINTX "nocond")]) +(define_int_attr nvrint_pattern [(UNSPEC_NVRINTZ "btrunc") + (UNSPEC_NVRINTP "ceil") + (UNSPEC_NVRINTA "round") + (UNSPEC_NVRINTM "floor") + (UNSPEC_NVRINTX "rint") + (UNSPEC_NVRINTN "roundeven")]) + (define_int_attr nvrint_variant [(UNSPEC_NVRINTZ "z") (UNSPEC_NVRINTP "p") (UNSPEC_NVRINTA "a") (UNSPEC_NVRINTM "m") (UNSPEC_NVRINTX "x") (UNSPEC_NVRINTN "n")]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 275bcc1..e1dae28 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -635,6 +635,13 @@ [(set_attr "type" "neon_fp_mla_s<q>")] ) +(define_expand "<NEON_VRINT:nvrint_pattern><VCVTF:mode>2" + [(set (match_operand:VCVTF 0 "s_register_operand") + (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand")] + NEON_VRINT))] + "TARGET_NEON && TARGET_VFP5 && flag_unsafe_math_optimizations" +) + (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>" [(set (match_operand:VCVTF 0 "s_register_operand" "=w") (unspec:VCVTF [(match_operand:VCVTF 1 @@ -645,6 +652,14 @@ [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")] ) +(define_expand "l<NEON_VCVT:nvrint_pattern><su_optab><VCVTF:mode><v_cmp_result>2" + [(set (match_operand:<V_cmp_result> 0 "register_operand") + (FIXUORS:<V_cmp_result> + (unspec:VCVTF [(match_operand:VCVTF 1 "register_operand")] + NEON_VCVT)))] + "TARGET_NEON && TARGET_VFP5 && flag_unsafe_math_optimizations" +) + (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>" [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") (FIXUORS:<V_cmp_result> (unspec:VCVTF @@ -3059,7 +3074,7 @@ "TARGET_I8MM" ) -(define_expand "neon_copysignf<mode>" +(define_expand "copysign<mode>3" [(match_operand:VCVTF 0 "register_operand") (match_operand:VCVTF 1 "register_operand") (match_operand:VCVTF 2 "register_operand")] diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc index 9433745..a851a7a 100644 --- a/gcc/config/xtensa/xtensa.cc +++ b/gcc/config/xtensa/xtensa.cc @@ -4073,7 +4073,7 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, case SET: if (xtensa_simm12b (INTVAL (x))) { - *total = 4; + *total = speed ? COSTS_N_INSNS (1) : 0; return true; } break; diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index e513316..e39f522 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -11380,7 +11380,7 @@ keyword after the declarator. It is up to you to make sure that the assembler names you choose do not conflict with any other assembler symbols, or reference registers. -@subsubheading Assembler names for data: +@subsubheading Assembler names for data This sample shows how to specify the assembler name for data: @@ -11402,7 +11402,7 @@ since such variables do not have assembler names. If you are trying to put the variable in a particular register, see @ref{Explicit Register Variables}. -@subsubheading Assembler names for functions: +@subsubheading Assembler names for functions To specify the assembler name for functions, write a declaration for the function before its definition and put @code{asm} there, like this: diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 7594245..ed629ca 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1182,10 +1182,10 @@ See RS/6000 and PowerPC Options. -mcmodel=medlow -mcmodel=medany @gol -mexplicit-relocs -mno-explicit-relocs @gol -mrelax -mno-relax @gol --mriscv-attribute -mmo-riscv-attribute @gol +-mriscv-attribute -mno-riscv-attribute @gol -malign-data=@var{type} @gol -mbig-endian -mlittle-endian @gol --mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{reg} @gol +-mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{reg} @gol -mstack-protector-guard-offset=@var{offset}} @emph{RL78 Options} @@ -20016,6 +20016,15 @@ Compile for ARC HS38 CPU. @item hs38_linux Compile for ARC HS38 CPU with all hardware extensions on. +@item hs4x +Compile for ARC HS4x CPU. + +@item hs4xd +Compile for ARC HS4xD CPU. + +@item hs4x_rel31 +Compile for ARC HS4x CPU release 3.10a. + @item arc600_norm Compile for ARC 600 CPU with @code{norm} instructions enabled. @@ -20625,6 +20634,13 @@ Tune for ARC725D CPU. @item ARC750D Tune for ARC750D CPU. +@item core3 +Tune for ARCv2 core3 type CPU. This option enable usage of +@code{dbnz} instruction. + +@item release31a +Tune for ARC4x release 3.10a. + @end table @item -mmultcost=@var{num} @@ -28316,12 +28332,14 @@ limit optimization. @item -mrelax @itemx -mno-relax +@opindex mrelax Take advantage of linker relaxations to reduce the number of instructions required to materialize symbol addresses. The default is to take advantage of linker relaxations. -@item -memit-attribute -@itemx -mno-emit-attribute +@item -mriscv-attribute +@itemx -mno-riscv-attribute +@opindex mriscv-attribute Emit (do not emit) RISC-V attribute to record extra information into ELF objects. This feature requires at least binutils 2.32. diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc index e3920c8..3ac39c1 100644 --- a/gcc/dwarf2out.cc +++ b/gcc/dwarf2out.cc @@ -6069,7 +6069,11 @@ dwarf2out_register_external_die (tree decl, const char *sym, if (!external_die_map) external_die_map = hash_map<tree, sym_off_pair>::create_ggc (1000); - gcc_checking_assert (!external_die_map->get (decl)); + /* When we do tree merging during WPA we can end up re-using GC memory + as there's currently no way to unregister external DIEs. Ideally + we'd register them only after merging finished but allowing override + here is easiest. See PR106334. */ + gcc_checking_assert (flag_wpa || !external_die_map->get (decl)); sym_off_pair p = { IDENTIFIER_POINTER (get_identifier (sym)), off }; external_die_map->put (decl, p); } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index cc8ea71..36913da 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,24 @@ +2022-07-18 Richard Biener <rguenther@suse.de> + + * gcc.dg/tree-ssa/ldist-24.c: XFAIL. + * gcc.dg/tree-ssa/ldist-36.c: Adjust expected outcome. + +2022-07-18 Richard Sandiford <richard.sandiford@arm.com> + + PR target/106253 + * gcc.target/arm/vect_unary_1.c: New test. + * gcc.target/arm/vect_binary_1.c: Likewise. + +2022-07-18 Claudiu Zissulescu <claziss@gmail.com> + + * gcc.target/arc/interrupt-13.c: New file. + +2022-07-18 Roger Sayle <roger@nextmovesoftware.com> + + PR target/106231 + * gcc.target/i386/pr106231-1.c: New test case. + * gcc.target/i386/pr106231-2.c: New test case. + 2022-07-15 H.J. Lu <hjl.tools@gmail.com> PR target/85620 diff --git a/gcc/testsuite/gcc.dg/pr106264.c b/gcc/testsuite/gcc.dg/pr106264.c new file mode 100644 index 0000000..6b4af49 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr106264.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -Wall" } */ +double frexp (double, int*); +double modf (double, double*); +double remquo (double, double, int*); + +int f (void) +{ + int y; + frexp (1.0, &y); + return y; +} + +double g (void) +{ + double y; + modf (1.0, &y); + return y; +} + +int h (void) +{ + int y; + remquo (1.0, 1.0, &y); + return y; +} + diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-24.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-24.c index 75f7b8f..2403a24 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-24.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-24.c @@ -20,5 +20,6 @@ void foo () } } -/* { dg-final { scan-tree-dump "generated memcpy" "ldist" } } */ -/* { dg-final { scan-tree-dump "generated memset zero" "ldist" } } */ +/* The cost modeling does not consider WAR as beneficial to split. */ +/* { dg-final { scan-tree-dump "generated memcpy" "ldist" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "generated memset zero" "ldist" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c index 07393f0..6d56006 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c @@ -25,4 +25,5 @@ foo (struct st * restrict p) } } -/* { dg-final { scan-tree-dump-times "Loop nest . distributed: split to 0 loops and 3 library" 1 "ldist" } } */ +/* The cost modeling doesn't consider splitting a WAR re-use profitable. */ +/* { dg-final { scan-tree-dump-times "Loop nest . distributed: split to 1 loops and 1 library" 1 "ldist" } } */ diff --git a/gcc/testsuite/gcc.target/arc/interrupt-13.c b/gcc/testsuite/gcc.target/arc/interrupt-13.c new file mode 100644 index 0000000..0ed8451 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/interrupt-13.c @@ -0,0 +1,15 @@ +/* { dg-options "-O2" } */ + +extern int foo (int *); + +void __attribute__((interrupt("ilink"))) +irq (void) +{ + struct { + int x0; + int x1; + } a = {1 ,2}; + foo ((int *)&a); +} + +/* { dg-final { scan-assembler "add_s\\s+sp,sp,8.*pop_s\\s+r0" } } */ diff --git a/gcc/testsuite/gcc.target/arm/vect_binary_1.c b/gcc/testsuite/gcc.target/arm/vect_binary_1.c new file mode 100644 index 0000000..c1fc905 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/vect_binary_1.c @@ -0,0 +1,50 @@ +/* { dg-do compile { target { arm*-*-* } } } */ +/* { dg-require-effective-target arm_hard_ok } */ +/* { dg-require-effective-target arm_v8_neon_ok } */ +/* { dg-add-options arm_v8_neon } */ +/* { dg-additional-options "-O3 -mfloat-abi=hard" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include <stdint.h> + +#define TEST2(OUT, NAME, IN) \ +OUT __attribute__((vector_size(sizeof(OUT) * 2))) \ +test2_##OUT##_##NAME##_##IN (float dummy, \ + IN __attribute__((vector_size(sizeof(IN) * 2))) y, \ + IN __attribute__((vector_size(sizeof(IN) * 2))) z) \ +{ \ + OUT __attribute__((vector_size(sizeof(OUT) * 2))) x; \ + x[0] = __builtin_##NAME (y[0], z[0]); \ + x[1] = __builtin_##NAME (y[1], z[1]); \ + return x; \ +} + +#define TEST4(OUT, NAME, IN) \ +OUT __attribute__((vector_size(sizeof(OUT) * 4))) \ +test4_##OUT##_##NAME##_##IN (float dummy, \ + IN __attribute__((vector_size(sizeof(OUT) * 4))) y, \ + IN __attribute__((vector_size(sizeof(OUT) * 4))) z) \ +{ \ + OUT __attribute__((vector_size(sizeof(OUT) * 4))) x; \ + x[0] = __builtin_##NAME (y[0], z[0]); \ + x[1] = __builtin_##NAME (y[1], z[1]); \ + x[2] = __builtin_##NAME (y[2], z[2]); \ + x[3] = __builtin_##NAME (y[3], z[3]); \ + return x; \ +} + +/* +** test2_float_copysignf_float: { target arm_little_endian } +** vmov.i32 d0, #(0x80000000|2147483648)(\s+.*) +** vbsl d0, d2, d1 +** bx lr +*/ +TEST2 (float, copysignf, float) + +/* +** test4_float_copysignf_float: { target arm_little_endian } +** vmov.i32 q0, #(0x80000000|2147483648)(\s+.*) +** vbsl q0, q2, q1 +** bx lr +*/ +TEST4 (float, copysignf, float) diff --git a/gcc/testsuite/gcc.target/arm/vect_unary_1.c b/gcc/testsuite/gcc.target/arm/vect_unary_1.c new file mode 100644 index 0000000..4677180 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/vect_unary_1.c @@ -0,0 +1,224 @@ +/* { dg-do compile { target { arm*-*-* } } } */ +/* { dg-require-effective-target arm_hard_ok } */ +/* { dg-require-effective-target arm_v8_neon_ok } */ +/* { dg-add-options arm_v8_neon } */ +/* { dg-additional-options "-Ofast -mfloat-abi=hard" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include <stdint.h> + +#define TEST2(OUT, NAME, IN) \ +OUT __attribute__((vector_size(sizeof(OUT) * 2))) \ +test2_##OUT##_##NAME##_##IN (float dummy, \ + IN __attribute__((vector_size(sizeof(IN) * 2))) y) \ +{ \ + OUT __attribute__((vector_size(sizeof(OUT) * 2))) x; \ + x[0] = __builtin_##NAME (y[0]); \ + x[1] = __builtin_##NAME (y[1]); \ + return x; \ +} + +#define TEST4(OUT, NAME, IN) \ +OUT __attribute__((vector_size(sizeof(OUT) * 4))) \ +test4_##OUT##_##NAME##_##IN (float dummy, \ + IN __attribute__((vector_size(sizeof(OUT) * 4))) y) \ +{ \ + OUT __attribute__((vector_size(sizeof(OUT) * 4))) x; \ + x[0] = __builtin_##NAME (y[0]); \ + x[1] = __builtin_##NAME (y[1]); \ + x[2] = __builtin_##NAME (y[2]); \ + x[3] = __builtin_##NAME (y[3]); \ + return x; \ +} + +#define TEST8(OUT, NAME, IN) \ +OUT __attribute__((vector_size(sizeof(OUT) * 8))) \ +test8_##OUT##_##NAME##_##IN (float dummy, \ + IN __attribute__((vector_size(sizeof(OUT) * 8))) y) \ +{ \ + OUT __attribute__((vector_size(sizeof(OUT) * 8))) x; \ + x[0] = __builtin_##NAME (y[0]); \ + x[1] = __builtin_##NAME (y[1]); \ + x[2] = __builtin_##NAME (y[2]); \ + x[3] = __builtin_##NAME (y[3]); \ + x[4] = __builtin_##NAME (y[4]); \ + x[5] = __builtin_##NAME (y[5]); \ + x[6] = __builtin_##NAME (y[6]); \ + x[7] = __builtin_##NAME (y[7]); \ + return x; \ +} + +/* +** test2_float_truncf_float: +** vrintz.f32 d0, d1 +** bx lr +*/ +TEST2 (float, truncf, float) + +/* +** test4_float_truncf_float: +** vrintz.f32 q0, q1 +** bx lr +*/ +TEST4 (float, truncf, float) + +/* +** test2_float_roundf_float: +** vrinta.f32 d0, d1 +** bx lr +*/ +TEST2 (float, roundf, float) + +/* +** test4_float_roundf_float: +** vrinta.f32 q0, q1 +** bx lr +*/ +TEST4 (float, roundf, float) + +/* +** test2_float_floorf_float: +** vrintm.f32 d0, d1 +** bx lr +*/ +TEST2 (float, floorf, float) + +/* +** test4_float_floorf_float: +** vrintm.f32 q0, q1 +** bx lr +*/ +TEST4 (float, floorf, float) + +/* +** test2_float_ceilf_float: +** vrintp.f32 d0, d1 +** bx lr +*/ +TEST2 (float, ceilf, float) + +/* +** test4_float_ceilf_float: +** vrintp.f32 q0, q1 +** bx lr +*/ +TEST4 (float, ceilf, float) + +/* +** test2_float_rintf_float: +** vrintx.f32 d0, d1 +** bx lr +*/ +TEST2 (float, rintf, float) + +/* +** test4_float_rintf_float: +** vrintx.f32 q0, q1 +** bx lr +*/ +TEST4 (float, rintf, float) + +/* +** test2_float_roundevenf_float: +** vrintn.f32 d0, d1 +** bx lr +*/ +TEST2 (float, roundevenf, float) + +/* +** test4_float_roundevenf_float: +** vrintn.f32 q0, q1 +** bx lr +*/ +TEST4 (float, roundevenf, float) + +/* +** test2_int_roundf_float: +** vcvta.s32.f32 d0, d1 +** bx lr +*/ +TEST2 (int, roundf, float) + +/* +** test4_int_roundf_float: +** vcvta.s32.f32 q0, q1 +** bx lr +*/ +TEST4 (int, roundf, float) + +/* +** test2_int_floorf_float: +** vcvtm.s32.f32 d0, d1 +** bx lr +*/ +TEST2 (int, floorf, float) + +/* +** test4_int_floorf_float: +** vcvtm.s32.f32 q0, q1 +** bx lr +*/ +TEST4 (int, floorf, float) + +/* +** test2_int_ceilf_float: +** vcvtp.s32.f32 d0, d1 +** bx lr +*/ +TEST2 (int, ceilf, float) + +/* +** test4_int_ceilf_float: +** vcvtp.s32.f32 q0, q1 +** bx lr +*/ +TEST4 (int, ceilf, float) + +/* +** test2_int_clz_int: +** vclz.i32 d0, d1 +** bx lr +*/ +TEST2 (int, clz, int) + +/* +** test4_int_clz_int: +** vclz.i32 q0, q1 +** bx lr +*/ +TEST4 (int, clz, int) + +/* +** test4_int16_t_bswap16_int16_t: { target arm_little_endian } +** vrev16.8 d0, d1 +** bx lr +*/ +TEST4 (int16_t, bswap16, int16_t) + +/* +** test8_int16_t_bswap16_int16_t: { target arm_little_endian } +** vrev16.8 q0, q1 +** bx lr +*/ +TEST8 (int16_t, bswap16, int16_t) + +/* +** test2_int_bswap32_int: { target arm_little_endian } +** vrev32.8 d0, d1 +** bx lr +*/ +TEST2 (int, bswap32, int) + +/* +** test4_int_bswap32_int: { target arm_little_endian } +** vrev32.8 q0, q1 +** bx lr +*/ +TEST4 (int, bswap32, int) + +/* +** test2_int64_t_bswap64_int64_t: { target arm_little_endian } +** vrev64.8 q0, q1 +** bx lr +*/ +TEST2 (int64_t, bswap64, int64_t) diff --git a/gcc/testsuite/gfortran.dg/pr106331.f90 b/gcc/testsuite/gfortran.dg/pr106331.f90 new file mode 100644 index 0000000..3873863 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr106331.f90 @@ -0,0 +1,7 @@ +! { dg-do run } +! { dg-options "-Og" } + +PROGRAM main + CHARACTER(LEN=24) :: a(2) + a = '' +END PROGRAM diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc index ed7f432..0ee441c 100644 --- a/gcc/tree-loop-distribution.cc +++ b/gcc/tree-loop-distribution.cc @@ -942,7 +942,7 @@ stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt) /* Return a copy of LOOP placed before LOOP. */ static class loop * -copy_loop_before (class loop *loop) +copy_loop_before (class loop *loop, bool redirect_lc_phi_defs) { class loop *res; edge preheader = loop_preheader_edge (loop); @@ -950,6 +950,24 @@ copy_loop_before (class loop *loop) initialize_original_copy_tables (); res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader); gcc_assert (res != NULL); + + /* When a not last partition is supposed to keep the LC PHIs computed + adjust their definitions. */ + if (redirect_lc_phi_defs) + { + edge exit = single_exit (loop); + for (gphi_iterator si = gsi_start_phis (exit->dest); !gsi_end_p (si); + gsi_next (&si)) + { + gphi *phi = si.phi (); + if (virtual_operand_p (gimple_phi_result (phi))) + continue; + use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit); + tree new_def = get_current_def (USE_FROM_PTR (use_p)); + SET_USE (use_p, new_def); + } + } + free_original_copy_tables (); delete_update_ssa (); @@ -977,7 +995,7 @@ create_bb_after_loop (class loop *loop) static void generate_loops_for_partition (class loop *loop, partition *partition, - bool copy_p) + bool copy_p, bool keep_lc_phis_p) { unsigned i; basic_block *bbs; @@ -985,7 +1003,7 @@ generate_loops_for_partition (class loop *loop, partition *partition, if (copy_p) { int orig_loop_num = loop->orig_loop_num; - loop = copy_loop_before (loop); + loop = copy_loop_before (loop, keep_lc_phis_p); gcc_assert (loop != NULL); loop->orig_loop_num = orig_loop_num; create_preheader (loop, CP_SIMPLE_PREHEADERS); @@ -1336,7 +1354,8 @@ destroy_loop (class loop *loop) static bool generate_code_for_partition (class loop *loop, - partition *partition, bool copy_p) + partition *partition, bool copy_p, + bool keep_lc_phis_p) { switch (partition->kind) { @@ -1345,7 +1364,8 @@ generate_code_for_partition (class loop *loop, /* Reductions all have to be in the last partition. */ gcc_assert (!partition_reduction_p (partition) || !copy_p); - generate_loops_for_partition (loop, partition, copy_p); + generate_loops_for_partition (loop, partition, copy_p, + keep_lc_phis_p); return false; case PKIND_MEMSET: @@ -3013,6 +3033,7 @@ loop_distribution::distribute_loop (class loop *loop, bool any_builtin = false; bool reduction_in_all = false; + int reduction_partition_num = -1; FOR_EACH_VEC_ELT (partitions, i, partition) { reduction_in_all @@ -3069,10 +3090,7 @@ loop_distribution::distribute_loop (class loop *loop, for (i = 0; partitions.iterate (i, &into); ++i) { bool changed = false; - if (partition_builtin_p (into) || into->kind == PKIND_PARTIAL_MEMSET) - continue; - for (int j = i + 1; - partitions.iterate (j, &partition); ++j) + for (int j = i + 1; partitions.iterate (j, &partition); ++j) { if (share_memory_accesses (rdg, into, partition)) { @@ -3092,10 +3110,13 @@ loop_distribution::distribute_loop (class loop *loop, } /* Put a non-builtin partition last if we need to preserve a reduction. - ??? This is a workaround that makes sort_partitions_by_post_order do - the correct thing while in reality it should sort each component - separately and then put the component with a reduction or a non-builtin - last. */ + In most cases this helps to keep a normal partition last avoiding to + spill a reduction result across builtin calls. + ??? The proper way would be to use dependences to see whether we + can move builtin partitions earlier during merge_dep_scc_partitions + and its sort_partitions_by_post_order. Especially when the + dependence graph is composed of multiple independent subgraphs the + heuristic does not work reliably. */ if (reduction_in_all && partition_builtin_p (partitions.last())) FOR_EACH_VEC_ELT (partitions, i, partition) @@ -3126,19 +3147,20 @@ loop_distribution::distribute_loop (class loop *loop, finalize_partitions (loop, &partitions, &alias_ddrs); - /* If there is a reduction in all partitions make sure the last one - is not classified for builtin code generation. */ + /* If there is a reduction in all partitions make sure the last + non-builtin partition provides the LC PHI defs. */ if (reduction_in_all) { - partition = partitions.last (); - if (only_patterns_p - && partition_builtin_p (partition) - && !partition_builtin_p (partitions[0])) + FOR_EACH_VEC_ELT (partitions, i, partition) + if (!partition_builtin_p (partition)) + reduction_partition_num = i; + if (reduction_partition_num == -1) { - nbp = 0; - goto ldist_done; + /* If all partitions are builtin, force the last one to + be code generated as normal partition. */ + partition = partitions.last (); + partition->kind = PKIND_NORMAL; } - partition->kind = PKIND_NORMAL; } nbp = partitions.length (); @@ -3164,7 +3186,8 @@ loop_distribution::distribute_loop (class loop *loop, { if (partition_builtin_p (partition)) (*nb_calls)++; - *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1); + *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1, + i == reduction_partition_num); } ldist_done: diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index fdc4bc8..d04cf4b 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -2661,7 +2661,7 @@ simplify_permutation (gimple_stmt_iterator *gsi) /* Shuffle of a constructor. */ bool ret = false; - tree res_type = TREE_TYPE (arg0); + tree res_type = TREE_TYPE (gimple_assign_lhs (stmt)); tree opt = fold_ternary (VEC_PERM_EXPR, res_type, arg0, arg1, op2); if (!opt || (TREE_CODE (opt) != CONSTRUCTOR && TREE_CODE (opt) != VECTOR_CST)) diff --git a/gcc/value-relation.cc b/gcc/value-relation.cc index 13ce441..bd34425 100644 --- a/gcc/value-relation.cc +++ b/gcc/value-relation.cc @@ -967,8 +967,12 @@ dom_oracle::register_relation (basic_block bb, relation_kind k, tree op1, equiv_oracle::register_relation (bb, k, op1, op2); else { + // if neither op1 nor op2 are in a relation before this is registered, + // there will be no transitive. + bool check = bitmap_bit_p (m_relation_set, SSA_NAME_VERSION (op1)) + || bitmap_bit_p (m_relation_set, SSA_NAME_VERSION (op2)); relation_chain *ptr = set_one_relation (bb, k, op1, op2); - if (ptr) + if (ptr && check) register_transitives (bb, *ptr); } } @@ -1010,13 +1014,16 @@ dom_oracle::set_one_relation (basic_block bb, relation_kind k, tree op1, // Check into whether we can simply replace the relation rather than // intersecting it. THis may help with some optimistic iterative // updating algorithms. - ptr->intersect (vr); + bool new_rel = ptr->intersect (vr); if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, " to produce "); ptr->dump (dump_file); - fprintf (dump_file, "\n"); + fprintf (dump_file, " %s.\n", new_rel ? "Updated" : "No Change"); } + // If there was no change, return no record.. + if (!new_rel) + return NULL; } else { diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index f4591e1..fcae8df 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,9 @@ +2022-07-18 Claudiu Zissulescu <claziss@synopsys.com> + + * config/arc/lib2funcs.c (udivmodsi4): Update AND mask. + * config/arc/lib1funcs.S (umodsi3): Don't use it for RF16 + configurations. + 2022-06-25 Jeff Law <jeffreyalaw@gmail.com> * config.host: Removed tilegx and tilepro entries. diff --git a/libgcc/config/arc/lib1funcs.S b/libgcc/config/arc/lib1funcs.S index 14fd1d2..b063612 100644 --- a/libgcc/config/arc/lib1funcs.S +++ b/libgcc/config/arc/lib1funcs.S @@ -936,6 +936,7 @@ SYM(__divsi3): #endif /* L_divsi3 */ +#ifndef __ARC_RF16__ #ifdef L_umodsi3 .section .text .align 4 @@ -950,6 +951,7 @@ SYM(__umodsi3): ENDFUNC(__umodsi3) #endif /* L_umodsi3 */ +#endif /* !__ARC_RF16__ */ #ifdef L_modsi3 .section .text diff --git a/libgcc/config/arc/lib2funcs.c b/libgcc/config/arc/lib2funcs.c index 70727b5..8cba451 100644 --- a/libgcc/config/arc/lib2funcs.c +++ b/libgcc/config/arc/lib2funcs.c @@ -59,7 +59,7 @@ udivmodsi4 (nint32_t num, nint32_t den, word_t modwanted) nint32_t bit = 1; nint32_t res = 0; - while (den < num && bit && !(den & (1LL << 63))) + while (den < num && bit && !(den & (1L << 31))) { den <<= 1; bit <<= 1; diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 1980646..eb1486f 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,13 @@ +2022-07-18 François Dumont <fdumont@gcc.gnu.org> + + * include/bits/stl_algo.h + (__merge_adaptive): Adapt to merge only when buffer is large enough.. + (__merge_adaptive_resize): New, adapt merge when buffer is too small. + (__inplace_merge): Adapt, use latter. + (__stable_sort_adaptive): Adapt to sort only when buffer is large enough. + (__stable_sort_adaptive_resize): New, adapt sort when buffer is too small. + (__stable_sort): Adapt, use latter. + 2022-07-15 Marek Polacek <polacek@redhat.com> PR c++/104477 diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure index 19ca336..9e2ee86 100755 --- a/libstdc++-v3/configure +++ b/libstdc++-v3/configure @@ -29254,8 +29254,6 @@ else $as_echo "#define HAVE_LINK 1" >>confdefs.h - $as_echo "#define HAVE_POLL 1" >>confdefs.h - $as_echo "#define HAVE_QUICK_EXIT 1" >>confdefs.h $as_echo "#define HAVE_READLINK 1" >>confdefs.h diff --git a/libstdc++-v3/configure.ac b/libstdc++-v3/configure.ac index 79512a3..b351622 100644 --- a/libstdc++-v3/configure.ac +++ b/libstdc++-v3/configure.ac @@ -366,7 +366,6 @@ else AC_DEFINE(HAVE_ALIGNED_ALLOC) AC_DEFINE(HAVE_AT_QUICK_EXIT) AC_DEFINE(HAVE_LINK) - AC_DEFINE(HAVE_POLL) AC_DEFINE(HAVE_QUICK_EXIT) AC_DEFINE(HAVE_READLINK) AC_DEFINE(HAVE_SETENV) diff --git a/libstdc++-v3/include/bits/stl_algo.h b/libstdc++-v3/include/bits/stl_algo.h index 1d8ed4e..c607805 100644 --- a/libstdc++-v3/include/bits/stl_algo.h +++ b/libstdc++-v3/include/bits/stl_algo.h @@ -2390,28 +2390,42 @@ _GLIBCXX_END_INLINE_ABI_NAMESPACE(_V2) } /// This is a helper function for the merge routines. - template<typename _BidirectionalIterator, typename _Distance, + template<typename _BidirectionalIterator, typename _Distance, typename _Pointer, typename _Compare> void __merge_adaptive(_BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last, _Distance __len1, _Distance __len2, - _Pointer __buffer, _Distance __buffer_size, - _Compare __comp) + _Pointer __buffer, _Compare __comp) { - if (__len1 <= __len2 && __len1 <= __buffer_size) + if (__len1 <= __len2) { _Pointer __buffer_end = _GLIBCXX_MOVE3(__first, __middle, __buffer); std::__move_merge_adaptive(__buffer, __buffer_end, __middle, __last, __first, __comp); } - else if (__len2 <= __buffer_size) + else { _Pointer __buffer_end = _GLIBCXX_MOVE3(__middle, __last, __buffer); std::__move_merge_adaptive_backward(__first, __middle, __buffer, __buffer_end, __last, __comp); } + } + + template<typename _BidirectionalIterator, typename _Distance, + typename _Pointer, typename _Compare> + void + __merge_adaptive_resize(_BidirectionalIterator __first, + _BidirectionalIterator __middle, + _BidirectionalIterator __last, + _Distance __len1, _Distance __len2, + _Pointer __buffer, _Distance __buffer_size, + _Compare __comp) + { + if (__len1 <= __buffer_size || __len2 <= __buffer_size) + std::__merge_adaptive(__first, __middle, __last, + __len1, __len2, __buffer, __comp); else { _BidirectionalIterator __first_cut = __first; @@ -2439,14 +2453,14 @@ _GLIBCXX_END_INLINE_ABI_NAMESPACE(_V2) _BidirectionalIterator __new_middle = std::__rotate_adaptive(__first_cut, __middle, __second_cut, - __len1 - __len11, __len22, __buffer, - __buffer_size); - std::__merge_adaptive(__first, __first_cut, __new_middle, __len11, - __len22, __buffer, __buffer_size, __comp); - std::__merge_adaptive(__new_middle, __second_cut, __last, - __len1 - __len11, - __len2 - __len22, __buffer, - __buffer_size, __comp); + __len1 - __len11, __len22, + __buffer, __buffer_size); + std::__merge_adaptive_resize(__first, __first_cut, __new_middle, + __len11, __len22, + __buffer, __buffer_size, __comp); + std::__merge_adaptive_resize(__new_middle, __second_cut, __last, + __len1 - __len11, __len2 - __len22, + __buffer, __buffer_size, __comp); } } @@ -2524,11 +2538,14 @@ _GLIBCXX_END_INLINE_ABI_NAMESPACE(_V2) // [first,middle) and [middle,last). _TmpBuf __buf(__first, std::min(__len1, __len2)); - if (__buf.begin() == 0) + if (__builtin_expect(__buf.size() == __buf.requested_size(), true)) + std::__merge_adaptive + (__first, __middle, __last, __len1, __len2, __buf.begin(), __comp); + else if (__builtin_expect(__buf.begin() == 0, false)) std::__merge_without_buffer (__first, __middle, __last, __len1, __len2, __comp); else - std::__merge_adaptive + std::__merge_adaptive_resize (__first, __middle, __last, __len1, __len2, __buf.begin(), _DistanceType(__buf.size()), __comp); } @@ -2709,34 +2726,46 @@ _GLIBCXX_END_INLINE_ABI_NAMESPACE(_V2) } } - template<typename _RandomAccessIterator, typename _Pointer, - typename _Distance, typename _Compare> + template<typename _RandomAccessIterator, typename _Pointer, typename _Compare> void __stable_sort_adaptive(_RandomAccessIterator __first, + _RandomAccessIterator __middle, _RandomAccessIterator __last, - _Pointer __buffer, _Distance __buffer_size, - _Compare __comp) + _Pointer __buffer, _Compare __comp) + { + std::__merge_sort_with_buffer(__first, __middle, __buffer, __comp); + std::__merge_sort_with_buffer(__middle, __last, __buffer, __comp); + + std::__merge_adaptive(__first, __middle, __last, + __middle - __first, __last - __middle, + __buffer, __comp); + } + + template<typename _RandomAccessIterator, typename _Pointer, + typename _Distance, typename _Compare> + void + __stable_sort_adaptive_resize(_RandomAccessIterator __first, + _RandomAccessIterator __last, + _Pointer __buffer, _Distance __buffer_size, + _Compare __comp) { const _Distance __len = (__last - __first + 1) / 2; const _RandomAccessIterator __middle = __first + __len; if (__len > __buffer_size) { - std::__stable_sort_adaptive(__first, __middle, __buffer, - __buffer_size, __comp); - std::__stable_sort_adaptive(__middle, __last, __buffer, - __buffer_size, __comp); + std::__stable_sort_adaptive_resize(__first, __middle, __buffer, + __buffer_size, __comp); + std::__stable_sort_adaptive_resize(__middle, __last, __buffer, + __buffer_size, __comp); + std::__merge_adaptive_resize(__first, __middle, __last, + _Distance(__middle - __first), + _Distance(__last - __middle), + __buffer, __buffer_size, + __comp); } else - { - std::__merge_sort_with_buffer(__first, __middle, __buffer, __comp); - std::__merge_sort_with_buffer(__middle, __last, __buffer, __comp); - } - - std::__merge_adaptive(__first, __middle, __last, - _Distance(__middle - __first), - _Distance(__last - __middle), - __buffer, __buffer_size, - __comp); + std::__stable_sort_adaptive(__first, __middle, __last, + __buffer, __comp); } /// This is a helper function for the stable sorting routines. @@ -4996,11 +5025,14 @@ _GLIBCXX_BEGIN_NAMESPACE_ALGO // so the buffer only needs to fit half the range at once. _TmpBuf __buf(__first, (__last - __first + 1) / 2); - if (__buf.begin() == 0) + if (__builtin_expect(__buf.requested_size() == __buf.size(), true)) + std::__stable_sort_adaptive(__first, __first + __buf.size(), __last, + __buf.begin(), __comp); + else if (__builtin_expect(__buf.begin() == 0, false)) std::__inplace_stable_sort(__first, __last, __comp); else - std::__stable_sort_adaptive(__first, __last, __buf.begin(), - _DistanceType(__buf.size()), __comp); + std::__stable_sort_adaptive_resize(__first, __last, __buf.begin(), + _DistanceType(__buf.size()), __comp); } /** |