diff options
Diffstat (limited to 'gcc')
34 files changed, 531 insertions, 272 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ac8a2c6..137e388 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,43 @@ +2025-05-01 Jason Merrill <jason@redhat.com> + + * doc/invoke.texi: Add -fabi-version detail. + * common.opt: Likewise. + +2025-05-01 Andrew Pinski <quic_apinski@quicinc.com> + + * tree-ssa-phiopt.cc (phiopt_early_allow): Only allow a sequence + with one statement for MIN/MAX and the op was MIN/MAX. + +2025-05-01 Ayan Shafqat <ayan.x.shafqat@gmail.com> + + * config/aarch64/arm_acle.h (__sqrt, __sqrtf): New function. + +2025-05-01 Ayan Shafqat <ayan.x.shafqat@gmail.com> + + * config/aarch64/aarch64-simd-builtins.def: Change + BUILTIN_VHSDF_DF to BUILTIN_VHSDF_HSDF. + +2025-05-01 Jason Merrill <jason@redhat.com> + + PR c++/119162 + * fold-const.cc (maybe_nonzero_address): Return 1 for non-symtab + vars if folding_cxx_constexpr. + +2025-05-01 Richard Biener <rguenther@suse.de> + + * tree-vect-stmts.cc (vectorizable_conversion): Remove non-SLP + paths. + +2025-05-01 Richard Biener <rguenther@suse.de> + + * tree-vect-slp.cc (vect_remove_slp_scalar_calls): Look + at the original stmt. + +2025-05-01 Jakub Jelinek <jakub@redhat.com> + + * combine.cc (try_combine): Sets which satisfy set_noop_p can go + to i2 unless i3 is a jump and the other set is not. + 2025-04-30 Andrew Pinski <quic_apinski@quicinc.com> PR target/120042 diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index c95ab32..8632515 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20250501 +20250502 diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index 4c8fde7..08e747e 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,17 @@ +2025-05-01 Christopher Bazley <chris.bazley@arm.com> + + PR c/119317 + * c-decl.cc (c_get_loop_names): Do not prematurely + end the search for a label that names a loop or + switch statement upon encountering a DEBUG_BEGIN_STMT. + Instead, ignore any instances of DEBUG_BEGIN_STMT. + +2025-05-01 Florian Weimer <fweimer@redhat.com> + + PR c/119950 + * c-typeck.cc (convert_arguments): Check for built-in + function declaration before warning. + 2025-04-28 David Malcolm <dmalcolm@redhat.com> * c-decl.cc: Drop include of "make-unique.h". diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc index c7a13bf..05fb129 100644 --- a/gcc/c/c-typeck.cc +++ b/gcc/c/c-typeck.cc @@ -4337,7 +4337,7 @@ convert_arguments (location_t loc, vec<location_t> arg_loc, tree fntype, } if (!typetail && parmnum == 0 && !TYPE_NO_NAMED_ARGS_STDARG_P (fntype) - && !fndecl_built_in_p (fundecl)) + && !(fundecl && fndecl_built_in_p (fundecl))) { auto_diagnostic_group d; bool warned; diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc index 67efe7d..e49bafa 100644 --- a/gcc/ccmp.cc +++ b/gcc/ccmp.cc @@ -133,23 +133,22 @@ ccmp_candidate_p (gimple *g, bool outer = false) /* Extract the comparison we want to do from the tree. */ void -get_compare_parts (tree t, int *up, rtx_code *rcode, +get_compare_parts (tree t, rtx_code *rcode, tree *rhs1, tree *rhs2) { tree_code code; gimple *g = get_gimple_for_ssa_name (t); if (g && is_gimple_assign (g)) { - *up = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (g))); + int up = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (g))); code = gimple_assign_rhs_code (g); - *rcode = get_rtx_code (code, *up); + *rcode = get_rtx_code (code, up); *rhs1 = gimple_assign_rhs1 (g); *rhs2 = gimple_assign_rhs2 (g); } else { /* If g is not a comparison operator create a compare to zero. */ - *up = 1; *rcode = NE; *rhs1 = t; *rhs2 = build_zero_cst (TREE_TYPE (t)); @@ -167,10 +166,9 @@ expand_ccmp_next (tree op, tree_code code, rtx prev, rtx_insn **prep_seq, rtx_insn **gen_seq) { rtx_code rcode; - int unsignedp; tree rhs1, rhs2; - get_compare_parts(op, &unsignedp, &rcode, &rhs1, &rhs2); + get_compare_parts (op, &rcode, &rhs1, &rhs2); return targetm.gen_ccmp_next (prep_seq, gen_seq, prev, rcode, rhs1, rhs2, get_rtx_code (code, 0)); } @@ -204,7 +202,6 @@ expand_ccmp_expr_1 (gimple *g, rtx_insn **prep_seq, rtx_insn **gen_seq) { if (ccmp_tree_comparison_p (op1, bb)) { - int unsignedp0, unsignedp1; rtx_code rcode0, rcode1; tree logical_op0_rhs1, logical_op0_rhs2; tree logical_op1_rhs1, logical_op1_rhs2; @@ -214,10 +211,10 @@ expand_ccmp_expr_1 (gimple *g, rtx_insn **prep_seq, rtx_insn **gen_seq) unsigned cost1 = MAX_COST; unsigned cost2 = MAX_COST; - get_compare_parts (op0, &unsignedp0, &rcode0, + get_compare_parts (op0, &rcode0, &logical_op0_rhs1, &logical_op0_rhs2); - get_compare_parts (op1, &unsignedp1, &rcode1, + get_compare_parts (op1, &rcode1, &logical_op1_rhs1, &logical_op1_rhs2); rtx_insn *prep_seq_1, *gen_seq_1; diff --git a/gcc/common.opt b/gcc/common.opt index 8a5b69d..0e50305 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1056,6 +1056,7 @@ Driver Undocumented ; Default in G++ 15. ; ; 21: Fix noexcept lambda capture pruning. +; Fix C++20 layout of base with all explicitly defaulted constructors. ; Default in G++ 16. ; ; Additional positive integers will be assigned as new versions of diff --git a/gcc/common.opt.urls b/gcc/common.opt.urls index 0077511..c108560 100644 --- a/gcc/common.opt.urls +++ b/gcc/common.opt.urls @@ -502,6 +502,9 @@ UrlSuffix(gcc/Optimize-Options.html#index-fcse-follow-jumps) fcse-skip-blocks UrlSuffix(gcc/Optimize-Options.html#index-fcse-skip-blocks) +fcx-method= +UrlSuffix(gcc/Optimize-Options.html#index-fcx-method) + fcx-limited-range UrlSuffix(gcc/Optimize-Options.html#index-fcx-limited-range) diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index d4af370..7bf12ff 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -2969,10 +2969,11 @@ { poly_int64 val; if (poly_int_rtx_p (operands[2], &val) - && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1)) + && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1) + && !val.is_constant ()) { - /* The last element can be extracted with a LASTB and a false - predicate. */ + /* For VLA, extract the last element with a LASTB and a false + predicate. */ rtx sel = aarch64_pfalse_reg (<VPRED>mode); emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); DONE; diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc index be4bd61..3b4b038 100644 --- a/gcc/config/arc/arc.cc +++ b/gcc/config/arc/arc.cc @@ -720,8 +720,6 @@ static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode); #define TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P \ arc_no_speculation_in_delay_slots_p -#undef TARGET_LRA_P -#define TARGET_LRA_P hook_bool_void_true #define TARGET_REGISTER_PRIORITY arc_register_priority /* Stores with scaled offsets have different displacement ranges. */ #define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P hook_bool_void_true diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 91ce801..687bb4e 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -7908,8 +7908,6 @@ gcn_dwarf_register_span (rtx rtl) #define TARGET_LEGITIMATE_CONSTANT_P gcn_legitimate_constant_p #undef TARGET_LIBC_HAS_FUNCTION #define TARGET_LIBC_HAS_FUNCTION gcn_libc_has_function -#undef TARGET_LRA_P -#define TARGET_LRA_P hook_bool_void_true #undef TARGET_MACHINE_DEPENDENT_REORG #define TARGET_MACHINE_DEPENDENT_REORG gcn_md_reorg #undef TARGET_MEMORY_MOVE_COST diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 2f84033..cb348cb 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -18084,9 +18084,14 @@ ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type) if (cum->decl && !TREE_PUBLIC (cum->decl)) return; - const_tree ctx = get_ultimate_context (cum->decl); - if (ctx != NULL_TREE - && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx)) + tree decl = cum->decl; + if (!decl) + /* If we don't know the target, look at the current TU. */ + decl = current_function_decl; + + const_tree ctx = get_ultimate_context (decl); + if (ctx == NULL_TREE + || !TRANSLATION_UNIT_WARN_EMPTY_P (ctx)) return; /* If the actual size of the type is zero, then there is no change diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index a74c74d..d50293c 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,30 @@ +2025-05-01 Patrick Palka <ppalka@redhat.com> + + * constexpr.cc (explain_invalid_constexpr_fn): In the + DECL_CONSTRUCTOR_P branch pass the non-genericized body to + require_potential_constant_expression. + +2025-05-01 Patrick Palka <ppalka@redhat.com> + + PR c++/119034 + PR c++/68942 + * pt.cc (tsubst_expr) <case CALL_EXPR>: Revert PR68942 fix. + * semantics.cc (finish_call_expr): Ensure the callee of an + ADL-enabled call is wrapped in an OVERLOAD. + +2025-05-01 Jason Merrill <jason@redhat.com> + + * Make-lang.in: Don't pass the full path to gperf. + * std-name-hint.h: Regenerate. + +2025-05-01 Jason Merrill <jason@redhat.com> + + PR c++/119162 + * constexpr.cc (find_deleted_heap_var): Remove. + (cxx_eval_call_expression): Don't call it. Don't set TREE_STATIC on + heap vars. + (cxx_eval_outermost_constant_expr): Don't mess with varpool. + 2025-04-30 Nathaniel Shead <nathanieloshead@gmail.com> PR c++/120023 diff --git a/gcc/cp/class.cc b/gcc/cp/class.cc index 2b694b9..6767ac1 100644 --- a/gcc/cp/class.cc +++ b/gcc/cp/class.cc @@ -6413,9 +6413,7 @@ check_bases_and_members (tree t) Again, other conditions for being an aggregate are checked elsewhere. */ CLASSTYPE_NON_AGGREGATE (t) - |= ((cxx_dialect < cxx20 - ? type_has_user_provided_or_explicit_constructor (t) - : TYPE_HAS_USER_CONSTRUCTOR (t)) + |= (type_has_user_provided_or_explicit_constructor (t) || TYPE_POLYMORPHIC_P (t)); /* This is the C++98/03 definition of POD; it changed in C++0x, but we retain the old definition internally for ABI reasons. */ @@ -6437,6 +6435,20 @@ check_bases_and_members (tree t) CLASSTYPE_NON_LAYOUT_POD_P (t) = true; } + /* P1008: Prohibit aggregates with user-declared constructors. */ + if (cxx_dialect >= cxx20 && TYPE_HAS_USER_CONSTRUCTOR (t)) + { + CLASSTYPE_NON_AGGREGATE (t) = true; + if (!CLASSTYPE_NON_LAYOUT_POD_P (t)) + { + /* c++/120012: The C++20 aggregate change affected layout. */ + if (!abi_version_at_least (21)) + CLASSTYPE_NON_LAYOUT_POD_P (t) = true; + if (abi_version_crosses (21)) + CLASSTYPE_NON_AGGREGATE_POD (t) = true; + } + } + /* If the only explicitly declared default constructor is user-provided, set TYPE_HAS_COMPLEX_DFLT. */ if (!TYPE_HAS_COMPLEX_DFLT (t) @@ -6809,7 +6821,8 @@ end_of_class (tree t, eoc_mode mode) static void check_non_pod_aggregate (tree field) { - if (!abi_version_crosses (17) || cxx_dialect < cxx14) + if ((!abi_version_crosses (17) || cxx_dialect < cxx14) + && (!abi_version_crosses (21) || cxx_dialect < cxx20)) return; if (TREE_CODE (field) != FIELD_DECL || (!DECL_FIELD_IS_BASE (field) @@ -6822,7 +6835,8 @@ check_non_pod_aggregate (tree field) tree type = TREE_TYPE (field); if (TYPE_IDENTIFIER (type) == as_base_identifier) type = TYPE_CONTEXT (type); - if (!CLASS_TYPE_P (type) || !CLASSTYPE_NON_POD_AGGREGATE (type)) + if (!CLASS_TYPE_P (type) || (!CLASSTYPE_NON_POD_AGGREGATE (type) + && !CLASSTYPE_NON_AGGREGATE_POD (type))) return; tree size = end_of_class (type, (DECL_FIELD_IS_BASE (field) ? eoc_nvsize : eoc_nv_or_dsize)); @@ -6831,13 +6845,31 @@ check_non_pod_aggregate (tree field) { location_t loc = DECL_SOURCE_LOCATION (next); if (DECL_FIELD_IS_BASE (next)) - warning_at (loc, OPT_Wabi,"offset of %qT base class for " - "%<-std=c++14%> and up changes in " - "%<-fabi-version=17%> (GCC 12)", TREE_TYPE (next)); + { + if (abi_version_crosses (17) + && CLASSTYPE_NON_POD_AGGREGATE (type)) + warning_at (loc, OPT_Wabi,"offset of %qT base class for " + "%<-std=c++14%> and up changes in " + "%<-fabi-version=17%> (GCC 12)", TREE_TYPE (next)); + else if (abi_version_crosses (21) + && CLASSTYPE_NON_AGGREGATE_POD (type)) + warning_at (loc, OPT_Wabi,"offset of %qT base class for " + "%<-std=c++20%> and up changes in " + "%<-fabi-version=21%> (GCC 16)", TREE_TYPE (next)); + } else - warning_at (loc, OPT_Wabi, "offset of %qD for " - "%<-std=c++14%> and up changes in " - "%<-fabi-version=17%> (GCC 12)", next); + { + if (abi_version_crosses (17) + && CLASSTYPE_NON_POD_AGGREGATE (type)) + warning_at (loc, OPT_Wabi, "offset of %qD for " + "%<-std=c++14%> and up changes in " + "%<-fabi-version=17%> (GCC 12)", next); + else if (abi_version_crosses (21) + && CLASSTYPE_NON_AGGREGATE_POD (type)) + warning_at (loc, OPT_Wabi, "offset of %qD for " + "%<-std=c++20%> and up changes in " + "%<-fabi-version=21%> (GCC 16)", next); + } } } diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 856202c..af51d67 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -2491,6 +2491,7 @@ struct GTY(()) lang_type { unsigned unique_obj_representations_set : 1; bool erroneous : 1; bool non_pod_aggregate : 1; + bool non_aggregate_pod : 1; /* When adding a flag here, consider whether or not it ought to apply to a template instance if it applies to the template. If @@ -2499,7 +2500,7 @@ struct GTY(()) lang_type { /* There are some bits left to fill out a 32-bit word. Keep track of this by updating the size of this bitfield whenever you add or remove a flag. */ - unsigned dummy : 3; + unsigned dummy : 2; tree primary_base; vec<tree_pair_s, va_gc> *vcall_indices; @@ -2826,6 +2827,11 @@ struct GTY(()) lang_type { with a hash_set only filled in when abi_version_crosses (17). */ #define CLASSTYPE_NON_POD_AGGREGATE(NODE) \ (LANG_TYPE_CLASS_CHECK (NODE)->non_pod_aggregate) + +/* True if this class is layout-POD though it's not an aggregate in C++20 and + above (c++/120012). This could also be a hash_set. */ +#define CLASSTYPE_NON_AGGREGATE_POD(NODE) \ + (LANG_TYPE_CLASS_CHECK (NODE)->non_aggregate_pod) /* Additional macros for inheritance information. */ diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc index 062a493..80a37a1 100644 --- a/gcc/cp/init.cc +++ b/gcc/cp/init.cc @@ -4747,7 +4747,8 @@ build_vec_init (tree base, tree maxindex, tree init, itself. But that breaks when gimplify_target_expr adds a clobber cleanup that runs before the build_vec_init cleanup. */ if (cleanup_flags) - vec_safe_push (*cleanup_flags, build_tree_list (iterator, maxindex)); + vec_safe_push (*cleanup_flags, + build_tree_list (rval, build_zero_cst (ptype))); } /* Should we try to create a constant initializer? */ diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index e7a9a03..32bc457 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -3015,7 +3015,8 @@ Version 20, which first appeared in G++ 15, fixes manglings of lambdas in static data member initializers. Version 21, which first appeared in G++ 16, fixes unnecessary captures -in noexcept lambdas (c++/119764). +in noexcept lambdas (c++/119764) and layout of a base class +with all explicitly defaulted constructors (c++/120012). See also @option{-Wabi}. diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index e6ecc8d..9b2a48d 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,10 @@ +2025-05-01 Paul Thomas <pault@gcc.gnu.org> + + PR fortran/119948 + * resolve.cc (gfc_impure_variable): The result of a module + procedure with an interface declaration is not impure even if + the current namespace is not the same as the symbol's. + 2025-04-25 Harald Anlauf <anlauf@gmx.de> PR fortran/102900 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index aa5d296..354a2ad 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,37 @@ +2025-05-01 Christopher Bazley <chris.bazley@arm.com> + + PR c/119317 + * gcc.dg/c2y-named-loops-8.c: New test. + +2025-05-01 Florian Weimer <fweimer@redhat.com> + + * gcc.dg/Wdeprecated-non-prototype-5.c: New test. + +2025-05-01 Patrick Palka <ppalka@redhat.com> + + * g++.dg/cpp23/constexpr-nonlit19.C: New test. + +2025-05-01 Patrick Palka <ppalka@redhat.com> + + PR c++/119034 + PR c++/68942 + * g++.dg/template/koenig13.C: New test. + +2025-05-01 Paul Thomas <pault@gcc.gnu.org> + + PR fortran/119948 + * gfortran.dg/pr119948.f90: New test. + +2025-05-01 Ayan Shafqat <ayan.x.shafqat@gmail.com> + + * gcc.target/aarch64/acle/acle_sqrt.c: New test. + +2025-05-01 Richard Biener <rguenther@suse.de> + + PR tree-optimization/120003 + * gcc.dg/tree-ssa/ssa-dom-thread-7.c: Adjust aarch64 expected + thread2 number of threads. + 2025-04-30 Nathaniel Shead <nathanieloshead@gmail.com> PR c++/120023 diff --git a/gcc/testsuite/g++.dg/abi/base-defaulted1.C b/gcc/testsuite/g++.dg/abi/base-defaulted1.C new file mode 100644 index 0000000..aaada72 --- /dev/null +++ b/gcc/testsuite/g++.dg/abi/base-defaulted1.C @@ -0,0 +1,19 @@ +// PR c++/120012 +// { dg-do compile { target c++11 } } +// { dg-additional-options "-fabi-version=21 -Wabi=20" } + +struct A +{ + A(const A&) = default; + A(A&&) = default; + A& operator=(A&&) = default; + unsigned int a; + unsigned char b; +}; +struct B: A +{ + unsigned char c; // { dg-warning "offset" "" { target c++20 } } +}; + +static_assert(sizeof(A) == (2 * sizeof(unsigned int)), ""); +static_assert(sizeof(B) == (3 * sizeof(unsigned int)), ""); diff --git a/gcc/testsuite/g++.dg/abi/base-defaulted1a.C b/gcc/testsuite/g++.dg/abi/base-defaulted1a.C new file mode 100644 index 0000000..d61eb39 --- /dev/null +++ b/gcc/testsuite/g++.dg/abi/base-defaulted1a.C @@ -0,0 +1,23 @@ +// PR c++/120012 +// { dg-do compile { target c++11 } } +// { dg-additional-options "-fabi-version=20 -Wabi" } + +struct A +{ + A(const A&) = default; + A(A&&) = default; + A& operator=(A&&) = default; + unsigned int a; + unsigned char b; +}; +struct B: A +{ + unsigned char c; // { dg-warning "offset" "" { target c++20 } } +}; + +static_assert(sizeof(A) == (2 * sizeof(unsigned int)), ""); +#if __cplusplus >= 202002L +static_assert(sizeof(B) == (2 * sizeof(unsigned int)), ""); +#else +static_assert(sizeof(B) == (3 * sizeof(unsigned int)), ""); +#endif diff --git a/gcc/testsuite/g++.dg/abi/pr60336-8a.C b/gcc/testsuite/g++.dg/abi/pr60336-8a.C new file mode 100644 index 0000000..a051843 --- /dev/null +++ b/gcc/testsuite/g++.dg/abi/pr60336-8a.C @@ -0,0 +1,15 @@ +// { dg-do compile } +// { dg-options "-O2 -Wabi=12" } + +struct dummy { struct{} a[7][3]; }; + +extern void test1 (struct dummy, ...); +extern void (*test2) (struct dummy, ...); + +void +foo () +{ + struct dummy a0; + test1 (a0, 42); + test2 (a0, 42); +} diff --git a/gcc/testsuite/gcc.dg/Wdeprecated-non-prototype-6.c b/gcc/testsuite/gcc.dg/Wdeprecated-non-prototype-6.c new file mode 100644 index 0000000..08f2995 --- /dev/null +++ b/gcc/testsuite/gcc.dg/Wdeprecated-non-prototype-6.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-std=gnu17 -Wdeprecated-non-prototype" } */ + +void (*f1) (); +void (*f2) (); +void (*f3) (...); + +void +g () +{ + f1 (); + f2 (1); /* { dg-warning "does not allow arguments for function" } */ + f3 (1); +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/switch-5.c b/gcc/testsuite/gcc.dg/tree-ssa/switch-5.c new file mode 100644 index 0000000..b05742c --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/switch-5.c @@ -0,0 +1,60 @@ +/* { dg-do compile { target { { x86_64-*-* aarch64-*-* ia64-*-* powerpc64-*-* } && lp64 } } } */ +/* { dg-options "-O2 -fdump-tree-switchlower1" } */ + +int f0(); +int f1(); +int f2(); +int f3(); +int f4(); + +int foo(int a) +{ + switch (a) + { + case 0: + case 2: + case 4: + case 6: + return f0(); + case 8: + return f1(); + case 10: + case 14: + case 16: + case 18: + return f2(); + case 12: + return f3(); + case 20: + return f4(); + } + return -1; +} + +/* { dg-final { scan-tree-dump ";; GIMPLE switch case clusters: BT:0-8 BT:10-20" "switchlower1" } } */ + +int bar(int a) +{ + switch (a) + { + case 20: + case 18: + case 16: + case 14: + return f0(); + case 12: + return f1(); + case 10: + case 6: + case 4: + case 2: + return f2(); + case 8: + return f3(); + case 0: + return f4(); + } + return -1; +} + +/* { dg-final { scan-tree-dump ";; GIMPLE switch case clusters: BT:0-10 BT:12-20" "switchlower1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/switch-6.c b/gcc/testsuite/gcc.dg/tree-ssa/switch-6.c new file mode 100644 index 0000000..bbbc874 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/switch-6.c @@ -0,0 +1,51 @@ +/* { dg-do compile { target { { x86_64-*-* aarch64-*-* ia64-*-* powerpc64-*-* } && lp64 } } } */ +/* { dg-options "-O2 -fdump-tree-switchlower1 -fno-jump-tables" } */ + +/* Test that bit-test switch lowering can create cluster of size 64 (there was + an of-by-one error causing it to only do 63 before). */ + +int f(); + +int foo(int a) +{ + switch (a) + { + case 0: + case 3: + case 5: + case 7: + case 9: + case 11: + case 13: + case 15: + case 17: + case 19: + case 21: + case 23: + case 25: + case 27: + case 29: + case 31: + case 33: + case 35: + case 37: + case 39: + case 41: + case 43: + case 45: + case 47: + case 49: + case 51: + case 53: + case 55: + case 57: + case 59: + case 61: + case 63: + return f(); + default: + return -1; + } +} + +/* { dg-final { scan-tree-dump ";; GIMPLE switch case clusters: BT:0-63" "switchlower1" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_1.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_1.c index 5d5edf2..b5ca3b3 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/extract_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/extract_1.c @@ -56,40 +56,37 @@ typedef _Float16 vnx8hf __attribute__((vector_size (32))); TEST_ALL (EXTRACT) -/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]+\n} 2 { target aarch64_little_endian } } } */ -/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]+\n} 3 { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 2 { target aarch64_big_endian } } } */ /* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ /* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[3\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]+\n} 2 { target aarch64_little_endian } } } */ -/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]+\n} 3 { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 2 { target aarch64_big_endian } } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ /* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[7\]\n} 2 } } */ /* Also used to move the result of a non-Advanced SIMD extract. */ -/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */ /* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[15\]\n} 2 } } */ /* Also used to move the result of a non-Advanced SIMD extract. */ -/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[31\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_2.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_2.c index 0e6ec83..a3886b2 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/extract_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/extract_2.c @@ -56,40 +56,37 @@ typedef _Float16 vnx16hf __attribute__((vector_size (64))); TEST_ALL (EXTRACT) -/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]+\n} 2 { target aarch64_little_endian } } } */ -/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]+\n} 3 { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 2 { target aarch64_big_endian } } } */ /* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ /* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[7\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]+\n} 2 { target aarch64_little_endian } } } */ -/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]+\n} 3 { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 2 { target aarch64_big_endian } } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ /* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[15\]\n} 2 } } */ /* Also used to move the result of a non-Advanced SIMD extract. */ -/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */ /* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[31\]\n} 2 } } */ /* Also used to move the result of a non-Advanced SIMD extract. */ -/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[63\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_3.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_3.c index 0d7a2fa..c22b8a9 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/extract_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/extract_3.c @@ -77,18 +77,16 @@ typedef _Float16 vnx32hf __attribute__((vector_size (128))); TEST_ALL (EXTRACT) -/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]+\n} 5 { target aarch64_little_endian } } } */ -/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]+\n} 6 { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 2 { target aarch64_big_endian } } } */ /* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ /* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[7\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]+\n} 5 { target aarch64_little_endian } } } */ -/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]+\n} 6 { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 2 { target aarch64_big_endian } } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ @@ -96,11 +94,9 @@ TEST_ALL (EXTRACT) /* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[15\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ /* Also used to move the result of a non-Advanced SIMD extract. */ -/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 5 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 6 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */ @@ -108,19 +104,20 @@ TEST_ALL (EXTRACT) /* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[31\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ /* Also used to move the result of a non-Advanced SIMD extract. */ -/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 5 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 6 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[63\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #64\n} 7 } } */ /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #72\n} 2 } } */ /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #84\n} 2 } } */ /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #94\n} 2 } } */ /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #100\n} 1 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #120\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #124\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #126\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #127\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_4.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_4.c index a706291..0fa9175 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/extract_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/extract_4.c @@ -84,18 +84,16 @@ typedef _Float16 v128hf __attribute__((vector_size (256))); TEST_ALL (EXTRACT) -/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]+\n} 6 { target aarch64_little_endian } } } */ -/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]+\n} 7 { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 2 { target aarch64_big_endian } } } */ /* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ /* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[7\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]+\n} 6 { target aarch64_little_endian } } } */ -/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]+\n} 7 { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 2 { target aarch64_big_endian } } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ @@ -103,11 +101,9 @@ TEST_ALL (EXTRACT) /* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[15\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ /* Also used to move the result of a non-Advanced SIMD extract. */ -/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 7 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */ @@ -115,16 +111,13 @@ TEST_ALL (EXTRACT) /* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[31\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ /* Also used to move the result of a non-Advanced SIMD extract. */ -/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 7 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[63\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #64\n} 7 } } */ /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #72\n} 2 } } */ @@ -135,3 +128,7 @@ TEST_ALL (EXTRACT) /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #124\n} 2 } } */ /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #126\n} 2 } } */ /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #127\n} 1 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #248\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #252\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #254\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #255\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_last_128.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_last_128.c new file mode 100644 index 0000000..2684fb6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/extract_last_128.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -msve-vector-bits=128" } */ + +#include <arm_sve.h> + +#define TEST(TYPE, TY) \ + TYPE extract_last_##TY (sv##TYPE x) \ + { \ + svbool_t pg = svpfalse (); \ + return svlastb_##TY (pg, x); \ + } + +TEST(bfloat16_t, bf16) +TEST(float16_t, f16) +TEST(float32_t, f32) +TEST(float64_t, f64) +TEST(int8_t, s8) +TEST(int16_t, s16) +TEST(int32_t, s32) +TEST(int64_t, s64) +TEST(uint8_t, u8) +TEST(uint16_t, u16) +TEST(uint32_t, u32) +TEST(uint64_t, u64) + +/* { dg-final { scan-assembler-times {\tdup\th0, v0\.h\[7\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\ts0, v0\.s\[3\]} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\td0, v0\.d\[1\]} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw0, v0\.h\[7\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw0, v0\.b\[15\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw0, v0\.s\[3\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tumov\tx0, v0\.d\[1\]} 2 } } */ +/* { dg-final { scan-assembler-not "lastb" } } */
\ No newline at end of file diff --git a/gcc/tree-ssanames.cc b/gcc/tree-ssanames.cc index d7865f2..de7b9b7 100644 --- a/gcc/tree-ssanames.cc +++ b/gcc/tree-ssanames.cc @@ -576,7 +576,7 @@ get_known_nonzero_bits_1 (const_tree name) if (tmp.undefined_p ()) return wi::shwi (0, precision); irange_bitmask bm = tmp.get_bitmask (); - return bm.value () & ~bm.mask (); + return wi::bit_and_not (bm.value (), bm.mask ()); } /* Return a wide_int with known non-zero bits in SSA_NAME diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc index 39a8a89..dea217a 100644 --- a/gcc/tree-switch-conversion.cc +++ b/gcc/tree-switch-conversion.cc @@ -1773,122 +1773,108 @@ jump_table_cluster::is_beneficial (const vec<cluster *> &, return end - start + 1 >= case_values_threshold (); } -/* Find bit tests of given CLUSTERS, where all members of the vector are of - type simple_cluster. Use a fast algorithm that might not find the optimal - solution (minimal number of clusters on the output). New clusters are - returned. - - You should call find_bit_tests () instead of calling this function - directly. */ +/* Find bit tests of given CLUSTERS, where all members of the vector + are of type simple_cluster. MAX_C is the approx max number of cases per + label. New clusters are returned. */ vec<cluster *> -bit_test_cluster::find_bit_tests_fast (vec<cluster *> &clusters) +bit_test_cluster::find_bit_tests (vec<cluster *> &clusters, int max_c) { - unsigned l = clusters.length (); - vec<cluster *> output; + if (!is_enabled () || max_c == 1) + return clusters.copy (); - output.create (l); + /* Dynamic programming algorithm. - /* Look at sliding BITS_PER_WORD sized windows in the switch value space - and determine if they are suitable for a bit test cluster. Worst case - this can examine every value BITS_PER_WORD-1 times. */ - unsigned k; - for (unsigned i = 0; i < l; i += k) - { - hash_set<basic_block> targets; - cluster *start_cluster = clusters[i]; + In: List of simple clusters + Out: List of simple clusters and bit test clusters such that each bit test + cluster can_be_handled() and is_beneficial() - /* Find the biggest k such that clusters i to i+k-1 can be turned into a - one big bit test cluster. */ - k = 0; - while (i + k < l) - { - cluster *end_cluster = clusters[i + k]; + Tries to merge consecutive clusters into bigger (bit test) ones. Tries to + end up with as few clusters as possible. */ - /* Does value range fit into the BITS_PER_WORD window? */ - HOST_WIDE_INT w = cluster::get_range (start_cluster->get_low (), - end_cluster->get_high ()); - if (w == 0 || w > BITS_PER_WORD) - break; + unsigned l = clusters.length (); + auto_vec<min_cluster_item> min; + min.reserve (l + 1); - /* Check for max # of targets. */ - if (targets.elements () == m_max_case_bit_tests - && !targets.contains (end_cluster->m_case_bb)) - break; + gcc_checking_assert (l > 0); + gcc_checking_assert (l <= INT_MAX); - targets.add (end_cluster->m_case_bb); - k++; - } + int bits_in_word = GET_MODE_BITSIZE (word_mode); - if (is_beneficial (k, targets.elements ())) - { - output.safe_push (new bit_test_cluster (clusters, i, i + k - 1, - i == 0 && k == l)); - } - else - { - output.safe_push (clusters[i]); - /* ??? Might be able to skip more. */ - k = 1; - } - } + /* First phase: Compute the minimum number of clusters for each prefix of the + input list incrementally - return output; -} - -/* Find bit tests of given CLUSTERS, where all members of the vector - are of type simple_cluster. Use a slow (quadratic) algorithm that always - finds the optimal solution (minimal number of clusters on the output). New - clusters are returned. + min[i] = (count, j, _) means that the prefix ending with the (i-1)-th + element can be made to contain as few as count clusters and that in such + clustering the last cluster is made up of input clusters [j, i-1] + (inclusive). */ + min.quick_push (min_cluster_item (0, 0, INT_MAX)); + min.quick_push (min_cluster_item (1, 0, INT_MAX)); + for (int i = 2; i <= (int) l; i++) + { + auto_vec<unsigned, m_max_case_bit_tests> unique_labels; - You should call find_bit_tests () instead of calling this function - directly. */ + /* Since each cluster contains at least one case number and one bit test + cluster can cover at most bits_in_word case numbers, we don't need to + look farther than bits_in_word clusters back. */ + for (int j = i - 1; j >= 0 && j >= i - bits_in_word; j--) + { + /* Consider creating a bit test cluster from input clusters [j, i-1] + (inclusive) */ -vec<cluster *> -bit_test_cluster::find_bit_tests_slow (vec<cluster *> &clusters) -{ - unsigned l = clusters.length (); - auto_vec<min_cluster_item> min; - min.reserve (l + 1); + simple_cluster *sc = static_cast<simple_cluster *> (clusters[j]); + unsigned label = sc->m_case_bb->index; + if (!unique_labels.contains (label)) + { + if (unique_labels.length () >= m_max_case_bit_tests) + /* is_beneficial() will be false for this and the following + iterations. */ + break; + unique_labels.quick_push (label); + } - min.quick_push (min_cluster_item (0, 0, 0)); + unsigned new_count = min[j].m_count + 1; - for (unsigned i = 1; i <= l; i++) - { - /* Set minimal # of clusters with i-th item to infinite. */ - min.quick_push (min_cluster_item (INT_MAX, INT_MAX, INT_MAX)); + if (j == i - 1) + { + min.quick_push (min_cluster_item (new_count, j, INT_MAX)); + continue; + } - for (unsigned j = 0; j < i; j++) - { - if (min[j].m_count + 1 < min[i].m_count - && can_be_handled (clusters, j, i - 1)) - min[i] = min_cluster_item (min[j].m_count + 1, j, INT_MAX); + unsigned HOST_WIDE_INT range + = get_range (clusters[j]->get_low (), clusters[i-1]->get_high ()); + if (new_count < min[i].m_count + && can_be_handled (range, unique_labels.length ()) + && is_beneficial (i - j, unique_labels.length ())) + min[i] = min_cluster_item (new_count, j, INT_MAX); } - - gcc_checking_assert (min[i].m_count != INT_MAX); } - /* No result. */ if (min[l].m_count == l) + /* No bit test clustering opportunities. */ return clusters.copy (); vec<cluster *> output; output.create (4); - /* Find and build the clusters. */ + /* Second phase: Find and build the bit test clusters by traversing min + array backwards. */ for (unsigned end = l;;) { - int start = min[end].m_start; + unsigned start = min[end].m_start; + gcc_checking_assert (start < end); - if (is_beneficial (clusters, start, end - 1)) + /* This cluster will be made out of input clusters [start, end - 1]. */ + + if (start == end - 1) + /* Let the cluster be a simple cluster. */ + output.safe_push (clusters[start]); + else { - bool entire = start == 0 && end == clusters.length (); + bool entire = start == 0 && end == l; output.safe_push (new bit_test_cluster (clusters, start, end - 1, entire)); } - else - for (int i = end - 1; i >= start; i--) - output.safe_push (clusters[i]); end = start; @@ -1900,25 +1886,6 @@ bit_test_cluster::find_bit_tests_slow (vec<cluster *> &clusters) return output; } -/* Find bit tests of given CLUSTERS, where all members of the vector - are of type simple_cluster. MAX_C is the approx max number of cases per - label. New clusters are returned. */ - -vec<cluster *> -bit_test_cluster::find_bit_tests (vec<cluster *> &clusters, int max_c) -{ - if (!is_enabled () || max_c == 1) - return clusters.copy (); - - unsigned l = clusters.length (); - - /* Note: l + 1 is the number of cases of the switch. */ - if (l + 1 > (unsigned) param_switch_lower_slow_alg_max_cases) - return find_bit_tests_fast (clusters); - else - return find_bit_tests_slow (clusters); -} - /* Return true when RANGE of case values with UNIQ labels can build a bit test. */ @@ -1930,84 +1897,25 @@ bit_test_cluster::can_be_handled (unsigned HOST_WIDE_INT range, if (range == 0) return false; - if (range >= GET_MODE_BITSIZE (word_mode)) + if (range > GET_MODE_BITSIZE (word_mode)) return false; return uniq <= m_max_case_bit_tests; } -/* Return true when cluster starting at START and ending at END (inclusive) - can build a bit test. */ - -bool -bit_test_cluster::can_be_handled (const vec<cluster *> &clusters, - unsigned start, unsigned end) -{ - auto_vec<int, m_max_case_bit_tests> dest_bbs; - /* For algorithm correctness, bit test for a single case must return - true. We bail out in is_beneficial if it's called just for - a single case. */ - if (start == end) - return true; - - unsigned HOST_WIDE_INT range = get_range (clusters[start]->get_low (), - clusters[end]->get_high ()); - - /* Make a guess first. */ - if (!can_be_handled (range, m_max_case_bit_tests)) - return false; - - for (unsigned i = start; i <= end; i++) - { - simple_cluster *sc = static_cast<simple_cluster *> (clusters[i]); - /* m_max_case_bit_tests is very small integer, thus the operation - is constant. */ - if (!dest_bbs.contains (sc->m_case_bb->index)) - { - if (dest_bbs.length () >= m_max_case_bit_tests) - return false; - dest_bbs.quick_push (sc->m_case_bb->index); - } - } - - return true; -} - /* Return true when COUNT of cases of UNIQ labels is beneficial for bit test transformation. */ bool bit_test_cluster::is_beneficial (unsigned count, unsigned uniq) { + /* NOTE: When modifying this, keep in mind the value of + m_max_case_bit_tests. */ return (((uniq == 1 && count >= 3) || (uniq == 2 && count >= 5) || (uniq == 3 && count >= 6))); } -/* Return true if cluster starting at START and ending at END (inclusive) - is profitable transformation. */ - -bool -bit_test_cluster::is_beneficial (const vec<cluster *> &clusters, - unsigned start, unsigned end) -{ - /* Single case bail out. */ - if (start == end) - return false; - - auto_bitmap dest_bbs; - - for (unsigned i = start; i <= end; i++) - { - simple_cluster *sc = static_cast<simple_cluster *> (clusters[i]); - bitmap_set_bit (dest_bbs, sc->m_case_bb->index); - } - - unsigned uniq = bitmap_count_bits (dest_bbs); - unsigned count = end - start + 1; - return is_beneficial (count, uniq); -} - /* Comparison function for qsort to order bit tests by decreasing probability of execution. */ @@ -2349,13 +2257,6 @@ switch_decision_tree::analyze_switch_statement () reset_out_edges_aux (m_switch); - if (l > (unsigned) param_switch_lower_slow_alg_max_cases) - warning_at (gimple_location (m_switch), OPT_Wdisabled_optimization, - "Using faster switch lowering algorithms. " - "Number of switch cases (%d) exceeds " - "%<--param=switch-lower-slow-alg-max-cases=%d%> limit.", - l, param_switch_lower_slow_alg_max_cases); - /* Find bit-test clusters. */ vec<cluster *> output = bit_test_cluster::find_bit_tests (clusters, max_c); diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h index 2ed7e1c..b2b6ddc 100644 --- a/gcc/tree-switch-conversion.h +++ b/gcc/tree-switch-conversion.h @@ -423,20 +423,10 @@ public: can build a bit test. */ static bool can_be_handled (unsigned HOST_WIDE_INT range, unsigned uniq); - /* Return true when cluster starting at START and ending at END (inclusive) - can build a bit test. */ - static bool can_be_handled (const vec<cluster *> &clusters, unsigned start, - unsigned end); - /* Return true when COUNT of cases of UNIQ labels is beneficial for bit test transformation. */ static bool is_beneficial (unsigned count, unsigned uniq); - /* Return true if cluster starting at START and ending at END (inclusive) - is profitable transformation. */ - static bool is_beneficial (const vec<cluster *> &clusters, unsigned start, - unsigned end); - /* Split the basic block at the statement pointed to by GSIP, and insert a branch to the target basic block of E_TRUE conditional on tree expression COND. diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc index 80c2d31..3c68361 100644 --- a/gcc/tree-vect-generic.cc +++ b/gcc/tree-vect-generic.cc @@ -1754,7 +1754,7 @@ expand_vector_conversion (gimple_stmt_iterator *gsi) else if (ret_elt_bits > arg_elt_bits) modifier = WIDEN; - auto_vec<std::pair<tree, tree_code> > converts; + auto_vec<std::pair<tree, tree_code>, 2> converts; if (supportable_indirect_convert_operation (code, ret_type, arg_type, converts)) { diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 42b6059..537ae6c 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -5706,7 +5706,7 @@ vectorizable_conversion (vec_info *vinfo, scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type); scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type); opt_scalar_mode rhs_mode_iter; - auto_vec<std::pair<tree, tree_code> > converts; + auto_vec<std::pair<tree, tree_code>, 2> converts; /* Supportable by target? */ switch (modifier) |