diff options
123 files changed, 4163 insertions, 139 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b2def9f..e725926 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,138 @@ +2025-10-17 David Faust <david.faust@oracle.com> + + PR target/122139 + * config/bpf/bpf.cc (bpf_expand_setmem): Duplicate byte value + across to new mode when using larger modes for store. + +2025-10-17 Tamar Christina <tamar.christina@arm.com> + Jennifer Schmitz <jschmitz@nvidia.com> + + PR target/121604 + * config/aarch64/aarch64-sve-builtins-shapes.cc (apply_predication): + Store gp_index. + (struct pmov_to_vector_lane_def): Mark instruction as has no GP. + * config/aarch64/aarch64-sve-builtins.h (function_instance::gp_value, + function_instance::inactive_values, function_instance::gp_index, + function_shape::has_gp_argument_p): New. + * config/aarch64/aarch64-sve-builtins.cc (gimple_folder::fold_pfalse): + Simplify code and use GP helpers. + +2025-10-17 Richard Biener <rguenther@suse.de> + + PR tree-optimization/122308 + * gimple-loop-jam.cc (tree_loop_unroll_and_jam): Do LIM + after applying unroll-and-jam. + +2025-10-17 Josef Melcr <jmelcr02@gmail.com> + + * Makefile.in: Add attr-callback.o to OBJS. + * builtin-attrs.def (ATTR_CALLBACK): Callback attr identifier. + (DEF_CALLBACK_ATTRIBUTE): Macro for callback attr creation. + (GOMP): Attr for libgomp functions. + (ATTR_CALLBACK_GOMP_LIST): ATTR_NOTHROW_LIST with GOMP callback + attr added. + * cgraph.cc (cgraph_add_edge_to_call_site_hash): Always hash the + callback-carrying edge. + (cgraph_node::get_edge): Always return the callback-carrying + edge. + (cgraph_edge::set_call_stmt): Add cascade for callback edges. + (symbol_table::create_edge): Allow callback edges to share call + stmts, initialize new flags. + (cgraph_edge::make_callback): New method, derives a new callback + edge. + (cgraph_edge::get_callback_carrying_edge): New method. + (cgraph_edge::first_callback_edge): Likewise. + (cgraph_edge::next_callback_edge): Likewise. + (cgraph_edge::purge_callback_edges): Likewise. + (cgraph_edge::redirect_callee): When redirecting a callback + edge, redirect its ref as well. + (cgraph_edge::redirect_call_stmt_to_callee): Add callback edge + redirection logic, set update_derived_edges to true hwne + redirecting the carrying edge. + (cgraph_node::remove_callers): Add cascade for callback edges. + (cgraph_edge::dump_edge_flags): Print callback flags. + (cgraph_node::verify_node): Add sanity checks for callback + edges. + * cgraph.h: Add new 1 bit flags and 16 bit callback_id to + cgraph_edge class. + * cgraphclones.cc (cgraph_edge::clone): Copy over callback data. + * cif-code.def (CALLBACK_EDGE): Add CIF_CALLBACK_EDGE code. + * ipa-cp.cc (purge_useless_callback_edges): New function, + deletes callback edges when necessary. + (ipcp_decision_stage): Call purge_useless_callback_edges. + * ipa-fnsummary.cc (ipa_call_summary_t::duplicate): Add + an exception for callback edges. + (analyze_function_body): Copy over summary from carrying to + callback edge. + * ipa-inline-analysis.cc (do_estimate_growth_1): Skip callback + edges when estimating growth. + * ipa-inline-transform.cc (inline_transform): Add redirection + cascade for callback edges. + * ipa-param-manipulation.cc + (drop_decl_attribute_if_params_changed_p): New function. + (ipa_param_adjustments::build_new_function_type): Add + args_modified out param. + (ipa_param_adjustments::adjust_decl): Drop callback attrs when + modifying args. + * ipa-param-manipulation.h: Adjust decl of + build_new_function_type. + * ipa-prop.cc (ipa_duplicate_jump_function): Add decl. + (init_callback_edge_summary): New function. + (ipa_compute_jump_functions_for_edge): Add callback edge + creation logic. + * lto-cgraph.cc (lto_output_edge): Stream out callback data. + (input_edge): Input callback data. + * omp-builtins.def (BUILT_IN_GOMP_PARALLEL_LOOP_STATIC): Use new + attr list. + (BUILT_IN_GOMP_PARALLEL_LOOP_GUIDED): Likewise. + (BUILT_IN_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC): Likewise. + (BUILT_IN_GOMP_PARALLEL_LOOP_NONMONOTONIC_RUNTIME): Likewise. + (BUILT_IN_GOMP_PARALLEL): Likewise. + (BUILT_IN_GOMP_PARALLEL_SECTIONS): Likewise. + (BUILT_IN_GOMP_TEAMS_REG): Likewise. + * tree-core.h (ECF_CB_1_2): New constant for callback(1,2). + * tree-inline.cc (copy_bb): Copy callback edges when copying the + carrying edge. + (redirect_all_calls): Redirect callback edges. + * tree.cc (set_call_expr_flags): Create callback attr according + to the ECF_CB flag. + * attr-callback.cc: New file. + * attr-callback.h: New file. + +2025-10-17 Richard Biener <rguenther@suse.de> + + PR tree-optimization/122301 + * tree-vect-patterns.cc (vect_recog_over_widening_pattern): + Fix reduction guard. + (vect_mark_pattern_stmts): Fix reduction def check. + +2025-10-17 Avinash Jayakar <avinashd@linux.ibm.com> + + PR tree-optimization/104116 + * tree-vect-patterns.cc (add_code_for_floorceilround_divmod): patt recog + for {FLOOR,ROUND,CEIL}_{DIV,MOD}_EXPR. + (vect_recog_divmod_pattern): Call add_code_for_floorceilround_divmod + after computing div/mod for each control path. + +2025-10-17 Andrew Pinski <andrew.pinski@oss.qualcomm.com> + + PR tree-optimization/122296 + * match.pd (`(a != b) | ((a|b) != 0)`): Reuse both + the ior and zero instead of recreating them. + (`(a == b) & ((a|b) == 0)`): Likewise + +2025-10-17 Andrew Pinski <andrew.pinski@oss.qualcomm.com> + + PR tree-optimization/122296 + * match.pd (`(a == b) | ((a|b) != 0)`): Fix true value. + +2025-10-17 Hu, Lin1 <lin1.hu@intel.com> + + PR target/122119 + * config/i386/amxmovrsintrin.h + (_tile_loaddrs_internal): Use __PTRDIFF_TYPE__ instead of long. + (_tile_loaddrst1_internal): Ditto. + 2025-10-16 David Malcolm <dmalcolm@redhat.com> * Makefile.in (OBJS-libcommon): Add diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 9456ec9..13c94d5 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20251017 +20251018 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 8d9528c..5c24a9a 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1853,6 +1853,7 @@ OBJS = \ web.o \ wide-int.o \ wide-int-print.o \ + attr-callback.o \ $(out_object_file) \ $(ANALYZER_OBJS) \ $(EXTRA_OBJS) \ diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog index 08d7148..24d03d4 100644 --- a/gcc/ada/ChangeLog +++ b/gcc/ada/ChangeLog @@ -1,3 +1,16 @@ +2025-10-17 Eric Botcazou <ebotcazou@adacore.com> + + PR ada/122295 + * sem_ch12.adb (Analyze_Package_Instantiation): Force Style_Check + to False only after possibly installing the parent. + * aspects.adb (UAD_Pragma_Map): Fix style violation. + * inline.adb (To_Pending_Instantiations): Likewise. + * lib.ads (Unit_Names): Likewise. + * repinfo.adb (Relevant_Entities): Likewise. + * sem_ch7.adb (Subprogram_Table): Likewise. + (Traversed_Table): Likewise. + * sem_util.adb (Interval_Sorting): Likewise. + 2025-10-07 Eric Botcazou <ebotcazou@adacore.com> Revert: diff --git a/gcc/ada/aspects.adb b/gcc/ada/aspects.adb index 44b7494..c9eaea1 100644 --- a/gcc/ada/aspects.adb +++ b/gcc/ada/aspects.adb @@ -578,7 +578,7 @@ package body Aspects is return UAD_Pragma_Map_Header is (UAD_Pragma_Map_Header (Chars mod UAD_Pragma_Map_Size)); - package UAD_Pragma_Map is new GNAT.Htable.Simple_Htable + package UAD_Pragma_Map is new GNAT.HTable.Simple_HTable (Header_Num => UAD_Pragma_Map_Header, Key => Name_Id, Element => Opt_N_Pragma_Id, diff --git a/gcc/ada/inline.adb b/gcc/ada/inline.adb index a592494..9e60fa8 100644 --- a/gcc/ada/inline.adb +++ b/gcc/ada/inline.adb @@ -151,7 +151,7 @@ package body Inline is function Node_Hash (Id : Node_Id) return Node_Header_Num; -- Simple hash function for Node_Ids - package To_Pending_Instantiations is new GNAT.Htable.Simple_HTable + package To_Pending_Instantiations is new GNAT.HTable.Simple_HTable (Header_Num => Node_Header_Num, Element => Int, No_Element => -1, diff --git a/gcc/ada/lib.ads b/gcc/ada/lib.ads index 928f6f8..f5c6571 100644 --- a/gcc/ada/lib.ads +++ b/gcc/ada/lib.ads @@ -901,7 +901,7 @@ private function Unit_Name_Hash (Id : Unit_Name_Type) return Unit_Name_Header_Num; -- Simple hash function for Unit_Name_Types - package Unit_Names is new GNAT.Htable.Simple_HTable + package Unit_Names is new GNAT.HTable.Simple_HTable (Header_Num => Unit_Name_Header_Num, Element => Unit_Number_Type, No_Element => No_Unit, diff --git a/gcc/ada/repinfo.adb b/gcc/ada/repinfo.adb index e236e4e..41afbb7 100644 --- a/gcc/ada/repinfo.adb +++ b/gcc/ada/repinfo.adb @@ -119,7 +119,7 @@ package body Repinfo is function Entity_Hash (Id : Entity_Id) return Entity_Header_Num; -- Simple hash function for Entity_Ids - package Relevant_Entities is new GNAT.Htable.Simple_HTable + package Relevant_Entities is new GNAT.HTable.Simple_HTable (Header_Num => Entity_Header_Num, Element => Boolean, No_Element => False, diff --git a/gcc/ada/sem_ch12.adb b/gcc/ada/sem_ch12.adb index de9cff1..3575b04 100644 --- a/gcc/ada/sem_ch12.adb +++ b/gcc/ada/sem_ch12.adb @@ -4990,14 +4990,6 @@ package body Sem_Ch12 is Preanalyze_Actuals (N, Act_Decl_Id); - -- Turn off style checking in instances. If the check is enabled on the - -- generic unit, a warning in an instance would just be noise. If not - -- enabled on the generic, then a warning in an instance is just wrong. - -- This must be done after analyzing the actuals, which do come from - -- source and are subject to style checking. - - Style_Check := False; - Init_Env; Env_Installed := True; @@ -5016,6 +5008,14 @@ package body Sem_Ch12 is Check_Generic_Child_Unit (Gen_Id, Parent_Installed); end if; + -- Turn off style checking in instances. If the check is enabled on the + -- generic unit, a warning in an instance would just be noise. If not + -- enabled on the generic, then a warning in an instance is just wrong. + -- This must be done after analyzing the actuals and possibly installing + -- the parent, which come from source and are subject to style checking. + + Style_Check := False; + Gen_Unit := Entity (Gen_Id); -- A package instantiation is Ghost when it is subject to pragma Ghost diff --git a/gcc/ada/sem_ch7.adb b/gcc/ada/sem_ch7.adb index 1d838e2..90219ac 100644 --- a/gcc/ada/sem_ch7.adb +++ b/gcc/ada/sem_ch7.adb @@ -206,7 +206,7 @@ package body Sem_Ch7 is function Node_Hash (Id : Entity_Id) return Entity_Header_Num; -- Simple hash function for Entity_Ids - package Subprogram_Table is new GNAT.Htable.Simple_HTable + package Subprogram_Table is new GNAT.HTable.Simple_HTable (Header_Num => Entity_Header_Num, Element => Boolean, No_Element => False, @@ -216,7 +216,7 @@ package body Sem_Ch7 is -- Hash table to record which subprograms are referenced. It is declared -- at library level to avoid elaborating it for every call to Analyze. - package Traversed_Table is new GNAT.Htable.Simple_HTable + package Traversed_Table is new GNAT.HTable.Simple_HTable (Header_Num => Entity_Header_Num, Element => Boolean, No_Element => False, diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb index 9e2083b..7f864d6 100644 --- a/gcc/ada/sem_util.adb +++ b/gcc/ada/sem_util.adb @@ -31148,7 +31148,7 @@ package body Sem_Util is ---------------------- package Interval_Sorting is - new Gnat.Heap_Sort_G (Move_Interval, Lt_Interval); + new GNAT.Heap_Sort_G (Move_Interval, Lt_Interval); ------------- -- Is_Null -- diff --git a/gcc/attr-callback.cc b/gcc/attr-callback.cc new file mode 100644 index 0000000..83d2754 --- /dev/null +++ b/gcc/attr-callback.cc @@ -0,0 +1,367 @@ +/* Callback attribute handling + Copyright (C) 2025 Free Software Foundation, Inc. + Contributed by Josef Melcr <jmelcr@gcc.gnu.org> + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "tree.h" +#include "gimple.h" +#include "alloc-pool.h" +#include "cgraph.h" +#include "diagnostic.h" +#include "builtins.h" +#include "options.h" +#include "gimple-range.h" +#include "attribs.h" +#include "attr-callback.h" + +/* Returns a callback attribute with callback index FN_IDX, and ARG_COUNT + arguments specified by VA_ARGS. */ +tree +callback_build_attr (unsigned fn_idx, unsigned arg_count...) +{ + va_list args; + va_start (args, arg_count); + + tree cblist = NULL_TREE; + tree *pp = &cblist; + unsigned i; + for (i = 0; i < arg_count; i++) + { + int num = va_arg (args, int); + tree tnum = build_int_cst (integer_type_node, num); + *pp = build_tree_list (NULL, tnum PASS_MEM_STAT); + pp = &TREE_CHAIN (*pp); + } + cblist + = tree_cons (NULL_TREE, build_int_cst (integer_type_node, fn_idx), cblist); + tree attr + = tree_cons (get_identifier (CALLBACK_ATTR_IDENT), cblist, NULL_TREE); + return attr; +} + +/* Returns TRUE if a function should be treated as if it had a callback + attribute despite the DECL not having it. STMT can be passed NULL + if the call statement is not available at the time, for example WPA, but it + should be called with the statement itself whenever possible. */ +bool +callback_is_special_cased (tree decl, gcall *stmt) +{ + if (fndecl_built_in_p (decl, BUILT_IN_GOMP_TASK)) + { + if (stmt) + return gimple_call_arg (stmt, 2) == null_pointer_node; + return true; + } + return false; +} + +/* Returns an attribute for a special cased function. */ +tree +callback_special_case_attr (tree decl) +{ + if (fndecl_built_in_p (decl, BUILT_IN_GOMP_TASK)) + return callback_build_attr (1, 1, 2); + gcc_unreachable (); +} + +/* Given an instance of callback attribute, return the 0-based + index of the called function in question. */ +int +callback_get_fn_index (tree cb_attr) +{ + tree args = TREE_VALUE (cb_attr); + int idx = TREE_INT_CST_LOW (TREE_VALUE (args)) - 1; + return idx; +} + +/* For a given callback pair, retrieves the callback attribute used + to create E from the callee of CARRYING. */ +tree +callback_fetch_attr_by_edge (cgraph_edge *e, cgraph_edge *carrying) +{ + gcc_checking_assert (e->call_stmt == carrying->call_stmt + && e->lto_stmt_uid == carrying->lto_stmt_uid); + + if (callback_is_special_cased (carrying->callee->decl, e->call_stmt)) + return callback_special_case_attr (carrying->callee->decl); + + tree cb_attr = lookup_attribute (CALLBACK_ATTR_IDENT, + DECL_ATTRIBUTES (carrying->callee->decl)); + gcc_checking_assert (cb_attr); + tree res = NULL_TREE; + for (; cb_attr; + cb_attr = lookup_attribute (CALLBACK_ATTR_IDENT, TREE_CHAIN (cb_attr))) + { + unsigned id = callback_get_fn_index (cb_attr); + if (id == e->callback_id) + { + res = cb_attr; + break; + } + } + gcc_checking_assert (res != NULL_TREE); + return res; +} + +/* Given an instance of callback attribute, return the 0-base indices + of arguments passed to the callback. For a callback function taking + n parameters, returns a vector of n indices of their values in the parameter + list of it's caller. Indices with unknown positions contain -1. */ +auto_vec<int> +callback_get_arg_mapping (cgraph_edge *e, cgraph_edge *carrying) +{ + tree attr = callback_fetch_attr_by_edge (e, carrying); + gcc_checking_assert (attr); + tree args = TREE_VALUE (attr); + auto_vec<int> res; + tree it; + + /* Skip over the first argument, which denotes + which argument is the called function. */ + for (it = TREE_CHAIN (args); it != NULL_TREE; it = TREE_CHAIN (it)) + { + int idx = TREE_INT_CST_LOW (TREE_VALUE (it)); + /* Subtract 1 to account for 1-based indexing. If the value is unknown, + use constant -1 instead. */ + idx = idx == CB_UNKNOWN_POS ? -1 : idx - 1; + res.safe_push (idx); + } + + return res; +} + +/* For a callback pair, returns the 0-based index of the address of + E's callee in the argument list of CARRYING's callee decl. */ +int +callback_fetch_fn_position (cgraph_edge *e, cgraph_edge *carrying) +{ + tree attr = callback_fetch_attr_by_edge (e, carrying); + return callback_get_fn_index (attr); +} + +/* Returns the element at index idx in the list or NULL_TREE if + the list isn't long enough. NULL_TREE is used as the endpoint. */ +static tree +get_nth_list_elem (tree list, unsigned idx) +{ + tree res = NULL_TREE; + unsigned i = 0; + tree it; + for (it = list; it != NULL_TREE; it = TREE_CHAIN (it), i++) + { + if (i == idx) + { + res = TREE_VALUE (it); + break; + } + } + return res; +} + +/* Handle a "callback" attribute; arguments as in + struct attribute_spec.handler. */ +tree +handle_callback_attribute (tree *node, tree name, tree args, + int ARG_UNUSED (flags), bool *no_add_attrs) +{ + tree decl = *node; + if (TREE_CODE (decl) != FUNCTION_DECL) + { + error_at (DECL_SOURCE_LOCATION (decl), + "%qE attribute can only be used on functions", name); + *no_add_attrs = true; + } + + tree cb_fn_idx_node = TREE_VALUE (args); + if (TREE_CODE (cb_fn_idx_node) != INTEGER_CST) + { + error_at (DECL_SOURCE_LOCATION (decl), + "argument specifying callback function position is not an " + "integer constant"); + *no_add_attrs = true; + return NULL_TREE; + } + /* We have to use the function type for validation, as + DECL_ARGUMENTS returns NULL at this point. */ + int callback_fn_idx = TREE_INT_CST_LOW (cb_fn_idx_node); + tree decl_type_args = TYPE_ARG_TYPES (TREE_TYPE (decl)); + tree it; + int decl_nargs = list_length (decl_type_args); + for (it = decl_type_args; it != NULL_TREE; it = TREE_CHAIN (it)) + if (it == void_list_node) + { + --decl_nargs; + break; + } + if (callback_fn_idx == CB_UNKNOWN_POS) + { + error_at (DECL_SOURCE_LOCATION (decl), + "callback function position cannot be marked as unknown"); + *no_add_attrs = true; + return NULL_TREE; + } + --callback_fn_idx; + if (callback_fn_idx >= decl_nargs) + { + error_at (DECL_SOURCE_LOCATION (decl), + "callback function position out of range"); + *no_add_attrs = true; + return NULL_TREE; + } + + /* Search for the type of the callback function + in parameters of the original function. */ + tree cfn = get_nth_list_elem (decl_type_args, callback_fn_idx); + if (cfn == NULL_TREE) + { + error_at (DECL_SOURCE_LOCATION (decl), + "could not retrieve callback function from arguments"); + *no_add_attrs = true; + return NULL_TREE; + } + tree cfn_pointee_type = TREE_TYPE (cfn); + if (TREE_CODE (cfn) != POINTER_TYPE + || TREE_CODE (cfn_pointee_type) != FUNCTION_TYPE) + { + error_at (DECL_SOURCE_LOCATION (decl), + "argument no. %d is not an address of a function", + callback_fn_idx + 1); + *no_add_attrs = true; + return NULL_TREE; + } + + tree type_args = TYPE_ARG_TYPES (cfn_pointee_type); + /* Compare the length of the list of argument indices + and the real number of parameters the callback takes. */ + unsigned cfn_nargs = list_length (TREE_CHAIN (args)); + unsigned type_nargs = list_length (type_args); + for (it = type_args; it != NULL_TREE; it = TREE_CHAIN (it)) + if (it == void_list_node) + { + --type_nargs; + break; + } + if (cfn_nargs != type_nargs) + { + error_at (DECL_SOURCE_LOCATION (decl), + "argument number mismatch, %d expected, got %d", type_nargs, + cfn_nargs); + *no_add_attrs = true; + return NULL_TREE; + } + + unsigned curr = 0; + tree cfn_it; + /* Validate type compatibility of the arguments passed + from caller function to callback. "it" is used to step + through the parameters of the caller, "cfn_it" is + stepping through the parameters of the callback. */ + for (it = type_args, cfn_it = TREE_CHAIN (args); curr < type_nargs; + it = TREE_CHAIN (it), cfn_it = TREE_CHAIN (cfn_it), curr++) + { + if (TREE_CODE (TREE_VALUE (cfn_it)) != INTEGER_CST) + { + error_at (DECL_SOURCE_LOCATION (decl), + "argument no. %d is not an integer constant", curr + 1); + *no_add_attrs = true; + continue; + } + + int arg_idx = TREE_INT_CST_LOW (TREE_VALUE (cfn_it)); + + /* No need to check for type compatibility, + if we don't know what we are passing. */ + if (arg_idx == CB_UNKNOWN_POS) + continue; + + arg_idx -= 1; + /* Report an error if the position is out of bounds, + but we can still check the rest of the arguments. */ + if (arg_idx >= decl_nargs) + { + error_at (DECL_SOURCE_LOCATION (decl), + "callback argument index %d is out of range", arg_idx + 1); + *no_add_attrs = true; + continue; + } + + tree arg_type = get_nth_list_elem (decl_type_args, arg_idx); + tree expected_type = TREE_VALUE (it); + /* Check the type of the value we are about to pass ("arg_type") + for compatibility with the actual type the callback function + expects ("expected_type"). */ + if (!types_compatible_p (expected_type, arg_type)) + { + error_at (DECL_SOURCE_LOCATION (decl), + "argument type at index %d is not compatible with callback " + "argument type at index %d", + arg_idx + 1, curr + 1); + *no_add_attrs = true; + continue; + } + } + + /* Check that the decl does not already have a callback attribute describing + the same argument. */ + it = lookup_attribute (CALLBACK_ATTR_IDENT, DECL_ATTRIBUTES (decl)); + for (; it; it = lookup_attribute (CALLBACK_ATTR_IDENT, TREE_CHAIN (it))) + if (callback_get_fn_index (it) == callback_fn_idx) + { + error_at (DECL_SOURCE_LOCATION (decl), + "function declaration has multiple callback attributes " + "describing argument no. %d", + callback_fn_idx + 1); + *no_add_attrs = true; + break; + } + + return NULL_TREE; +} + +/* Returns TRUE if E is considered useful in the callgraph, FALSE otherwise. If + this predicate returns FALSE, then E wasn't used to optimize its callee and + can be safely removed from the callgraph. */ +bool +callback_edge_useful_p (cgraph_edge *e) +{ + gcc_checking_assert (e->callback); + /* If the edge is not pointing towards a clone, it is no longer useful as its + entire purpose is to produce clones of callbacks. */ + if (!e->callee->clone_of) + return false; + return true; +} + +/* Returns the number of arguments the callback function described by ATTR + takes. */ + +size_t +callback_num_args (tree attr) +{ + tree args = TREE_VALUE (attr); + size_t res = 0; + tree it; + + for (it = TREE_CHAIN (args); it != NULL_TREE; it = TREE_CHAIN (it), ++res) + ; + return res; +} diff --git a/gcc/attr-callback.h b/gcc/attr-callback.h new file mode 100644 index 0000000..b0b0843 --- /dev/null +++ b/gcc/attr-callback.h @@ -0,0 +1,78 @@ +/* Callback attribute handling + Copyright (C) 2025 Free Software Foundation, Inc. + Contributed by Josef Melcr <jmelcr@gcc.gnu.org> + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef ATTR_CALLBACK_H +#define ATTR_CALLBACK_H + +enum callback_position +{ + /* Value used when an argument of a callback function + is unknown or when multiple values may be used. */ + CB_UNKNOWN_POS = 0 +}; + +#define CALLBACK_ATTR_IDENT " callback" + +/* Returns a callback attribute with callback index FN_IDX, and ARG_COUNT + arguments specified by VA_ARGS. */ +tree callback_build_attr (unsigned fn_idx, unsigned arg_count...); + +/* Returns TRUE if a function should be treated as if it had a callback + attribute despite the DECL not having it. STMT can be passed NULL + if the call statement is not available at the time, for example WPA, but it + should be called with the statement itself whenever possible. */ +bool callback_is_special_cased (tree decl, gcall *stmt); + +/* Returns an attribute for a special cased function. */ +tree callback_special_case_attr (tree decl); + +/* Given an instance of callback attribute, return the 0-based + index of the called function in question. */ +int callback_get_fn_index (tree cb_attr); + +/* For a given callback pair, retrieves the callback attribute used + to create E from the callee of CARRYING. */ +tree callback_fetch_attr_by_edge (cgraph_edge *e, cgraph_edge *carrying); + +/* Given an instance of callback attribute, return the 0-base indices + of arguments passed to the callback. For a callback function taking + n parameters, returns a vector of n indices of their values in the parameter + list of it's caller. Indices with unknown positions contain -1. */ +auto_vec<int> callback_get_arg_mapping (cgraph_edge *e, cgraph_edge *carrying); + +/* For a callback pair, returns the 0-based index of the address of + E's callee in the argument list of CARRYING's callee decl. */ +int callback_fetch_fn_position (cgraph_edge *e, cgraph_edge *carrying); + +/* Handle a "callback" attribute; arguments as in + struct attribute_spec.handler. */ +tree handle_callback_attribute (tree *node, tree name, tree args, int flags, + bool *no_add_attrs); + +/* Returns TRUE if E is considered useful in the callgraph, FALSE otherwise. If + this predicate returns FALSE, then E wasn't used to optimize its callee and + can be safely removed from the callgraph. */ +bool callback_edge_useful_p (cgraph_edge *e); + +/* Returns the number of arguments the callback function described by ATTR + takes. */ +size_t callback_num_args (tree attr); + +#endif /* ATTR_CALLBACK_H */ diff --git a/gcc/builtin-attrs.def b/gcc/builtin-attrs.def index 2b82fc2..dedb841 100644 --- a/gcc/builtin-attrs.def +++ b/gcc/builtin-attrs.def @@ -130,6 +130,7 @@ DEF_ATTR_IDENT (ATTR_TM_TMPURE, "transaction_pure") DEF_ATTR_IDENT (ATTR_RETURNS_TWICE, "returns_twice") DEF_ATTR_IDENT (ATTR_RETURNS_NONNULL, "returns_nonnull") DEF_ATTR_IDENT (ATTR_WARN_UNUSED_RESULT, "warn_unused_result") +DEF_ATTR_IDENT (ATTR_CALLBACK, " callback") DEF_ATTR_TREE_LIST (ATTR_NOVOPS_LIST, ATTR_NOVOPS, ATTR_NULL, ATTR_NULL) @@ -430,6 +431,16 @@ DEF_FORMAT_ATTRIBUTE_NOTHROW(STRFMON,3,3_4) #undef DEF_FORMAT_ATTRIBUTE_NOTHROW #undef DEF_FORMAT_ATTRIBUTE_BOTH +/* Construct callback attributes for GOMP builtins. */ +#define DEF_CALLBACK_ATTRIBUTE(TYPE, CA, VALUES) \ + DEF_ATTR_TREE_LIST (ATTR_CALLBACK_##TYPE##_##CA##_##VALUES, ATTR_CALLBACK,\ + ATTR_##CA, ATTR_LIST_##VALUES) + +DEF_CALLBACK_ATTRIBUTE(GOMP, 1, 2) +DEF_ATTR_TREE_LIST(ATTR_CALLBACK_GOMP_LIST, ATTR_CALLBACK, + ATTR_CALLBACK_GOMP_1_2, ATTR_NOTHROW_LIST) +#undef DEF_CALLBACK_ATTRIBUTE + /* Transactional memory variants of the above. */ DEF_ATTR_TREE_LIST (ATTR_TM_NOTHROW_LIST, diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 16cdcf3..30a66de 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,7 @@ +2025-10-17 Josef Melcr <jmelcr02@gmail.com> + + * c-attribs.cc: Define callback attr. + 2025-10-14 Jakub Jelinek <jakub@redhat.com> * c.opt (Wflex-array-member-not-at-end, Wignored-qualifiers, diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc index cf82cdf..8ca767a 100644 --- a/gcc/c-family/c-attribs.cc +++ b/gcc/c-family/c-attribs.cc @@ -49,6 +49,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-pretty-print.h" #include "gcc-rich-location.h" #include "gcc-urlifier.h" +#include "attr-callback.h" static tree handle_packed_attribute (tree *, tree, tree, int, bool *); static tree handle_nocommon_attribute (tree *, tree, tree, int, bool *); @@ -484,6 +485,8 @@ const struct attribute_spec c_common_gnu_attributes[] = handle_tm_attribute, NULL }, { "transaction_may_cancel_outer", 0, 0, false, true, false, false, handle_tm_attribute, NULL }, + { CALLBACK_ATTR_IDENT, 1, -1, true, false, false, false, + handle_callback_attribute, NULL }, /* ??? These two attributes didn't make the transition from the Intel language document to the multi-vendor language document. */ { "transaction_pure", 0, 0, false, true, false, false, diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc index 07966a6..d1b2e2a 100644 --- a/gcc/cgraph.cc +++ b/gcc/cgraph.cc @@ -69,6 +69,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-nested.h" #include "symtab-thunks.h" #include "symtab-clones.h" +#include "attr-callback.h" /* FIXME: Only for PROP_loops, but cgraph shouldn't have to know about this. */ #include "tree-pass.h" @@ -871,11 +872,22 @@ cgraph_add_edge_to_call_site_hash (cgraph_edge *e) one indirect); always hash the direct one. */ if (e->speculative && e->indirect_unknown_callee) return; + /* We always want to hash the carrying edge of a callback, not the edges + pointing to the callbacks themselves, as their call statement doesn't + exist. */ + if (e->callback) + return; cgraph_edge **slot = e->caller->call_site_hash->find_slot_with_hash (e->call_stmt, cgraph_edge_hasher::hash (e->call_stmt), INSERT); if (*slot) { - gcc_assert (((cgraph_edge *)*slot)->speculative); + cgraph_edge *edge = (cgraph_edge *) *slot; + gcc_assert (edge->speculative || edge->has_callback); + if (edge->has_callback) + /* If the slot is already occupied, then the hashed edge is the + callback-carrying edge, which is desired behavior, so we can safely + return. */ + gcc_checking_assert (edge == e); if (e->callee && (!e->prev_callee || !e->prev_callee->speculative || e->prev_callee->call_stmt != e->call_stmt)) @@ -919,6 +931,13 @@ cgraph_node::get_edge (gimple *call_stmt) n++; } + /* We want to work with the callback-carrying edge whenever possible. When it + comes to callback edges, a call statement might have multiple callback + edges attached to it. These can be easily obtained from the carrying edge + instead. */ + if (e && e->callback) + e = e->get_callback_carrying_edge (); + if (n > 100) { call_site_hash = hash_table<cgraph_edge_hasher>::create_ggc (120); @@ -931,15 +950,16 @@ cgraph_node::get_edge (gimple *call_stmt) return e; } - -/* Change field call_stmt of edge E to NEW_STMT. If UPDATE_SPECULATIVE and E +/* Change field call_stmt of edge E to NEW_STMT. If UPDATE_DERIVED_EDGES and E is any component of speculative edge, then update all components. - Speculations can be resolved in the process and EDGE can be removed and - deallocated. Return the edge that now represents the call. */ + speculations can be resolved in the process and edge can be removed and + deallocated. if update_derived_edges and e is a part of a callback pair, + update all associated edges and return their carrying edge. return the edge + that now represents the call. */ cgraph_edge * cgraph_edge::set_call_stmt (cgraph_edge *e, gcall *new_stmt, - bool update_speculative) + bool update_derived_edges) { tree decl; @@ -955,7 +975,7 @@ cgraph_edge::set_call_stmt (cgraph_edge *e, gcall *new_stmt, /* Speculative edges has three component, update all of them when asked to. */ - if (update_speculative && e->speculative + if (update_derived_edges && e->speculative /* If we are about to resolve the speculation by calling make_direct below, do not bother going over all the speculative edges now. */ && !new_direct_callee) @@ -991,6 +1011,27 @@ cgraph_edge::set_call_stmt (cgraph_edge *e, gcall *new_stmt, if (new_direct_callee) e = make_direct (e, new_direct_callee); + /* When updating a callback or a callback-carrying edge, update every edge + involved. */ + if (update_derived_edges && (e->callback || e->has_callback)) + { + cgraph_edge *current, *next, *carrying; + carrying = e->has_callback ? e : e->get_callback_carrying_edge (); + + current = e->first_callback_edge (); + if (current) + { + for (cgraph_edge *d = current; d; d = next) + { + next = d->next_callback_edge (); + cgraph_edge *d2 = set_call_stmt (d, new_stmt, false); + gcc_assert (d2 == d); + } + } + carrying = set_call_stmt (carrying, new_stmt, false); + return carrying; + } + /* Only direct speculative edges go to call_site_hash. */ if (e->caller->call_site_hash && (!e->speculative || !e->indirect_unknown_callee) @@ -1036,7 +1077,7 @@ symbol_table::create_edge (cgraph_node *caller, cgraph_node *callee, construction of call stmt hashtable. */ cgraph_edge *e; gcc_checking_assert (!(e = caller->get_edge (call_stmt)) - || e->speculative); + || e->speculative || e->has_callback || e->callback); gcc_assert (is_gimple_call (call_stmt)); } @@ -1063,6 +1104,9 @@ symbol_table::create_edge (cgraph_node *caller, cgraph_node *callee, edge->indirect_info = NULL; edge->indirect_inlining_edge = 0; edge->speculative = false; + edge->has_callback = false; + edge->callback = false; + edge->callback_id = 0; edge->indirect_unknown_callee = indir_unknown_callee; if (call_stmt && caller->call_site_hash) cgraph_add_edge_to_call_site_hash (edge); @@ -1286,6 +1330,119 @@ cgraph_edge::make_speculative (cgraph_node *n2, profile_count direct_count, return e2; } +/* Create a callback edge calling N2. Callback edges + never get turned into actual calls, they are just used + as clues and allow for optimizing functions which do not + have any callsites during compile time, e.g. functions + passed to standard library functions. + + The edge will be attached to the same call statement as + the callback-carrying edge, which is the instance this method + is called on. + + callback_id is used to pair the returned edge with the attribute that + originated it. + + Return the resulting callback edge. */ + +cgraph_edge * +cgraph_edge::make_callback (cgraph_node *n2, unsigned int callback_id) +{ + cgraph_node *n = caller; + cgraph_edge *e2; + + has_callback = true; + e2 = n->create_edge (n2, call_stmt, count); + if (dump_file) + fprintf ( + dump_file, + "Created callback edge %s -> %s belonging to carrying edge %s -> %s\n", + e2->caller->dump_name (), e2->callee->dump_name (), caller->dump_name (), + callee->dump_name ()); + e2->inline_failed = CIF_CALLBACK_EDGE; + e2->callback = true; + e2->callback_id = callback_id; + if (TREE_NOTHROW (n2->decl)) + e2->can_throw_external = false; + else + e2->can_throw_external = can_throw_external; + e2->lto_stmt_uid = lto_stmt_uid; + n2->mark_address_taken (); + return e2; +} + +/* Returns the callback_carrying edge of a callback edge on which + it is called on or NULL when no such edge can be found. + + An edge is taken to be the callback-carrying if it has it's has_callback + flag set and the edges share their call statements. */ + +cgraph_edge * +cgraph_edge::get_callback_carrying_edge () +{ + gcc_checking_assert (callback); + cgraph_edge *e; + for (e = caller->callees; e; e = e->next_callee) + { + if (e->has_callback && e->call_stmt == call_stmt + && e->lto_stmt_uid == lto_stmt_uid) + break; + } + return e; +} + +/* Returns the first callback edge in the list of callees of the caller node. + Note that the edges might be in arbitrary order. Must be called on a + callback or callback-carrying edge. */ + +cgraph_edge * +cgraph_edge::first_callback_edge () +{ + gcc_checking_assert (has_callback || callback); + cgraph_edge *e = NULL; + for (e = caller->callees; e; e = e->next_callee) + { + if (e->callback && e->call_stmt == call_stmt + && e->lto_stmt_uid == lto_stmt_uid) + break; + } + return e; +} + +/* Given a callback edge, returns the next callback edge belonging to the same + carrying edge. Must be called on a callback edge, not the callback-carrying + edge. */ + +cgraph_edge * +cgraph_edge::next_callback_edge () +{ + gcc_checking_assert (callback); + cgraph_edge *e = NULL; + for (e = next_callee; e; e = e->next_callee) + { + if (e->callback && e->call_stmt == call_stmt + && e->lto_stmt_uid == lto_stmt_uid) + break; + } + return e; +} + +/* When called on a callback-carrying edge, removes all of its attached callback + edges and sets has_callback to FALSE. */ + +void +cgraph_edge::purge_callback_edges () +{ + gcc_checking_assert (has_callback); + cgraph_edge *e, *next; + for (e = first_callback_edge (); e; e = next) + { + next = e->next_callback_edge (); + cgraph_edge::remove (e); + } + has_callback = false; +} + /* Speculative call consists of an indirect edge and one or more direct edge+ref pairs. @@ -1521,12 +1678,27 @@ void cgraph_edge::redirect_callee (cgraph_node *n) { bool loc = callee->comdat_local_p (); + cgraph_node *old_callee = callee; + /* Remove from callers list of the current callee. */ remove_callee (); /* Insert to callers list of the new callee. */ set_callee (n); + if (callback) + { + /* When redirecting a callback callee, redirect its ref as well. */ + ipa_ref *old_ref = caller->find_reference (old_callee, call_stmt, + lto_stmt_uid, IPA_REF_ADDR); + gcc_checking_assert(old_ref); + old_ref->remove_reference (); + ipa_ref *new_ref = caller->create_reference (n, IPA_REF_ADDR, call_stmt); + new_ref->lto_stmt_uid = lto_stmt_uid; + if (!old_callee->referred_to_p ()) + old_callee->address_taken = 0; + } + if (!inline_failed) return; if (!loc && n->comdat_local_p ()) @@ -1643,6 +1815,27 @@ cgraph_edge::redirect_call_stmt_to_callee (cgraph_edge *e, || decl == e->callee->decl) return e->call_stmt; + /* When redirecting a callback edge, all we need to do is replace + the original address with the address of the function we are + redirecting to. */ + if (e->callback) + { + cgraph_edge *carrying = e->get_callback_carrying_edge (); + if (!callback_is_special_cased (carrying->callee->decl, e->call_stmt) + && !lookup_attribute (CALLBACK_ATTR_IDENT, + DECL_ATTRIBUTES (carrying->callee->decl))) + /* Callback attribute is removed if the dispatching function changes + signature, as the indices wouldn't be correct anymore. These edges + will get cleaned up later, ignore their redirection for now. */ + return e->call_stmt; + int fn_idx = callback_fetch_fn_position (e, carrying); + tree previous_arg = gimple_call_arg (e->call_stmt, fn_idx); + location_t loc = EXPR_LOCATION (previous_arg); + tree new_addr = build_fold_addr_expr_loc (loc, e->callee->decl); + gimple_call_set_arg (e->call_stmt, fn_idx, new_addr); + return e->call_stmt; + } + if (decl && ipa_saved_clone_sources) { tree *p = ipa_saved_clone_sources->get (e->callee); @@ -1752,7 +1945,9 @@ cgraph_edge::redirect_call_stmt_to_callee (cgraph_edge *e, maybe_remove_unused_call_args (DECL_STRUCT_FUNCTION (e->caller->decl), new_stmt); - e->caller->set_call_stmt_including_clones (e->call_stmt, new_stmt, false); + /* Update callback edges if setting the carrying edge's statement, or else + their pairing would fall apart. */ + e->caller->set_call_stmt_including_clones (e->call_stmt, new_stmt, e->has_callback); if (symtab->dump_file) { @@ -1944,6 +2139,17 @@ cgraph_node::remove_callers (void) for (e = callers; e; e = f) { f = e->next_caller; + /* When removing a callback-carrying edge, remove all its attached edges + as well. */ + if (e->has_callback) + { + cgraph_edge *cbe, *next_cbe = NULL; + for (cbe = e->first_callback_edge (); cbe; cbe = next_cbe) + { + next_cbe = cbe->next_callback_edge (); + cgraph_edge::remove (cbe); + } + } symtab->call_edge_removal_hooks (e); e->remove_caller (); symtab->free_edge (e); @@ -2253,6 +2459,10 @@ cgraph_edge::dump_edge_flags (FILE *f) { if (speculative) fprintf (f, "(speculative) "); + if (callback) + fprintf (f, "(callback) "); + if (has_callback) + fprintf (f, "(has_callback) "); if (!inline_failed) fprintf (f, "(inlined) "); if (call_stmt_cannot_inline_p) @@ -3866,6 +4076,8 @@ cgraph_node::verify_node (void) if (gimple_has_body_p (e->caller->decl) && !e->caller->inlined_to && !e->speculative + && !e->callback + && !e->has_callback /* Optimized out calls are redirected to __builtin_unreachable. */ && (e->count.nonzero_p () || ! e->callee->decl @@ -4071,7 +4283,12 @@ cgraph_node::verify_node (void) } if (!e->indirect_unknown_callee) { - if (e->verify_corresponds_to_fndecl (decl)) + /* Callback edges violate this assertion + because their call statement doesn't exist, + their associated statement belongs to the + callback-dispatching function. */ + if (!e->callback + && e->verify_corresponds_to_fndecl (decl)) { error ("edge points to wrong declaration:"); debug_tree (e->callee->decl); @@ -4113,7 +4330,58 @@ cgraph_node::verify_node (void) for (e = callees; e; e = e->next_callee) { - if (!e->aux && !e->speculative) + if (!e->callback && e->callback_id) + { + error ("non-callback edge has callback_id set"); + error_found = true; + } + + if (e->callback && e->has_callback) + { + error ("edge has both callback and has_callback set"); + error_found = true; + } + + if (e->callback) + { + if (!e->get_callback_carrying_edge ()) + { + error ("callback edge %s->%s has no callback-carrying", + identifier_to_locale (e->caller->name ()), + identifier_to_locale (e->callee->name ())); + error_found = true; + } + } + + if (e->has_callback + && !callback_is_special_cased (e->callee->decl, e->call_stmt)) + { + int ncallbacks = 0; + int nfound_edges = 0; + for (tree cb = lookup_attribute (CALLBACK_ATTR_IDENT, DECL_ATTRIBUTES ( + e->callee->decl)); + cb; cb = lookup_attribute (CALLBACK_ATTR_IDENT, TREE_CHAIN (cb)), + ncallbacks++) + ; + for (cgraph_edge *cbe = callees; cbe; cbe = cbe->next_callee) + { + if (cbe->callback && cbe->call_stmt == e->call_stmt + && cbe->lto_stmt_uid == e->lto_stmt_uid) + { + nfound_edges++; + } + } + if (ncallbacks < nfound_edges) + { + error ("callback edge %s->%s callback edge count mismatch, " + "expected at most %d, found %d", + identifier_to_locale (e->caller->name ()), + identifier_to_locale (e->callee->name ()), ncallbacks, + nfound_edges); + } + } + + if (!e->aux && !e->speculative && !e->callback && !e->has_callback) { error ("edge %s->%s has no corresponding call_stmt", identifier_to_locale (e->caller->name ()), diff --git a/gcc/cgraph.h b/gcc/cgraph.h index b68a8df..069e007 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -1738,12 +1738,14 @@ public: /* Remove EDGE from the cgraph. */ static void remove (cgraph_edge *edge); - /* Change field call_stmt of edge E to NEW_STMT. If UPDATE_SPECULATIVE and E - is any component of speculative edge, then update all components. + /* Change field call_stmt of edge E to NEW_STMT. If UPDATE_DERIVED_EDGES and + E is any component of speculative edge, then update all components. Speculations can be resolved in the process and EDGE can be removed and - deallocated. Return the edge that now represents the call. */ + deallocated. Return the edge that now represents the call. If + UPDATE_DERIVED_EDGES and E is a part of a callback edge, update all + associated edges and return the callback-carrying edge. */ static cgraph_edge *set_call_stmt (cgraph_edge *e, gcall *new_stmt, - bool update_speculative = true); + bool update_derived_edges = true); /* Redirect callee of the edge to N. The function does not update underlying call expression. */ @@ -1769,6 +1771,32 @@ public: cgraph_edge *make_speculative (cgraph_node *n2, profile_count direct_count, unsigned int speculative_id = 0); + /* Create a callback edge, representing an indirect call to n2 + passed to a function by argument. Sets has_callback flag of the original + edge. Both edges are attached to the same call statement. Returns created + callback edge. */ + cgraph_edge *make_callback (cgraph_node *n2, unsigned int callback_hash); + + /* Returns the callback-carrying edge of a callback edge or NULL, if such edge + cannot be found. An edge is considered callback-carrying, if it has it's + has_callback flag set and shares it's call statement with the edge + this method is caled on. */ + cgraph_edge *get_callback_carrying_edge (); + + /* Returns the first callback edge in the list of callees of the caller node. + Note that the edges might be in arbitrary order. Must be called on a + callback or callback-carrying edge. */ + cgraph_edge *first_callback_edge (); + + /* Given a callback edge, returns the next callback edge belonging to the same + callback-carrying edge. Must be called on a callback edge, not the + callback-carrying edge. */ + cgraph_edge *next_callback_edge (); + + /* When called on a callback-carrying edge, removes all of its attached + callback edges and sets has_callback to FALSE. */ + void purge_callback_edges (); + /* Speculative call consists of an indirect edge and one or more direct edge+ref pairs. Speculative will expand to the following sequence: @@ -1990,6 +2018,23 @@ public: Optimizers may later redirect direct call to clone, so 1) and 3) do not need to necessarily agree with destination. */ unsigned int speculative : 1; + /* Edges with CALLBACK flag represent indirect calls to functions passed + to their callers by argument. This is useful in cases, where the body + of these caller functions is not known, e. g. qsort in glibc or + GOMP_parallel in libgomp. These edges are never made into real calls, + but are used instead to optimize these callback functions and later replace + their addresses with their optimized versions. Edges with this flag set + share their call statement with their callback-carrying edge. */ + unsigned int callback : 1; + /* Edges with this flag set have one or more callback edges attached. They + share their call statements with this edge. This flag represents the fact + that the callee of this edge takes a function and it's parameters by + argument and calls it at a later time. */ + unsigned int has_callback : 1; + /* Used to pair callback edges and the attributes that originated them + together. Currently the index of the callback argument, retrieved + from the attribute. */ + unsigned int callback_id : 16; /* Set to true when caller is a constructor or destructor of polymorphic type. */ unsigned in_polymorphic_cdtor : 1; diff --git a/gcc/cgraphclones.cc b/gcc/cgraphclones.cc index cc7e78d..49f0e58 100644 --- a/gcc/cgraphclones.cc +++ b/gcc/cgraphclones.cc @@ -144,6 +144,9 @@ cgraph_edge::clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid, new_edge->can_throw_external = can_throw_external; new_edge->call_stmt_cannot_inline_p = call_stmt_cannot_inline_p; new_edge->speculative = speculative; + new_edge->callback = callback; + new_edge->has_callback = has_callback; + new_edge->callback_id = callback_id; new_edge->in_polymorphic_cdtor = in_polymorphic_cdtor; /* Update IPA profile. Local profiles need no updating in original. */ diff --git a/gcc/cif-code.def b/gcc/cif-code.def index 8735eaf..a54116f 100644 --- a/gcc/cif-code.def +++ b/gcc/cif-code.def @@ -142,3 +142,8 @@ DEFCIFCODE(EXTERN_LIVE_ONLY_STATIC, CIF_FINAL_ERROR, /* We proved that the call is unreachable. */ DEFCIFCODE(UNREACHABLE, CIF_FINAL_ERROR, N_("unreachable")) + +/* Callback edges cannot be inlined, as the corresponding call + statement does not exist. */ +DEFCIFCODE(CALLBACK_EDGE, CIF_FINAL_ERROR, + N_("callback edges cannot be inlined")) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 0d5b02a..eaa8d57 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4647,7 +4647,7 @@ ;; <su><addsub>w<q>. -(define_expand "widen_ssum<mode>3" +(define_expand "widen_ssum<Vdblw><mode>3" [(set (match_operand:<VDBLW> 0 "register_operand") (plus:<VDBLW> (sign_extend:<VDBLW> (match_operand:VQW 1 "register_operand")) @@ -4664,7 +4664,7 @@ } ) -(define_expand "widen_ssum<mode>3" +(define_expand "widen_ssum<Vwide><mode>3" [(set (match_operand:<VWIDE> 0 "register_operand") (plus:<VWIDE> (sign_extend:<VWIDE> (match_operand:VD_BHSI 1 "register_operand")) @@ -4675,7 +4675,7 @@ DONE; }) -(define_expand "widen_usum<mode>3" +(define_expand "widen_usum<Vdblw><mode>3" [(set (match_operand:<VDBLW> 0 "register_operand") (plus:<VDBLW> (zero_extend:<VDBLW> (match_operand:VQW 1 "register_operand")) @@ -4692,7 +4692,7 @@ } ) -(define_expand "widen_usum<mode>3" +(define_expand "widen_usum<Vwide><mode>3" [(set (match_operand:<VWIDE> 0 "register_operand") (plus:<VWIDE> (zero_extend:<VWIDE> (match_operand:VD_BHSI 1 "register_operand")) @@ -4703,6 +4703,38 @@ DONE; }) +(define_expand "widen_ssum<mode><vsi2qi>3" + [(set (match_operand:VS 0 "register_operand") + (plus:VS (sign_extend:VS + (match_operand:<VSI2QI> 1 "register_operand")) + (match_operand:VS 2 "register_operand")))] + "TARGET_DOTPROD" + { + rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode)); + emit_insn (gen_sdot_prod<mode><vsi2qi> (operands[0], operands[1], ones, + operands[2])); + DONE; + } +) + +;; Use dot product to perform double widening sum reductions by +;; changing += a into += (a * 1). i.e. we seed the multiplication with 1. +(define_expand "widen_usum<mode><vsi2qi>3" + [(set (match_operand:VS 0 "register_operand") + (plus:VS (zero_extend:VS + (match_operand:<VSI2QI> 1 "register_operand")) + (match_operand:VS 2 "register_operand")))] + "TARGET_DOTPROD" + { + rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode)); + emit_insn (gen_udot_prod<mode><vsi2qi> (operands[0], operands[1], ones, + operands[2])); + DONE; + } +) + +;; Use dot product to perform double widening sum reductions by +;; changing += a into += (a * 1). i.e. we seed the multiplication with 1. (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc index 74a3338..b315dc91 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc @@ -21,7 +21,10 @@ #include "system.h" #include "coretypes.h" #include "tm.h" +#include "basic-block.h" #include "tree.h" +#include "function.h" +#include "gimple.h" #include "rtl.h" #include "tm_p.h" #include "memmodel.h" @@ -68,23 +71,36 @@ za_group_is_pure_overload (const function_group_info &group) types in ARGUMENT_TYPES. RETURN_TYPE is the type returned by the function. */ static void -apply_predication (const function_instance &instance, tree return_type, +apply_predication (function_instance &instance, tree return_type, vec<tree> &argument_types) { + /* Initially mark the function as not being predicated. */ + instance.gp_index = -1; + /* There are currently no SME ZA instructions that have both merging and unpredicated forms, so for simplicity, the predicates are always included in the original format string. */ if (instance.pred != PRED_none && instance.pred != PRED_za_m) { argument_types.quick_insert (0, instance.gp_type ()); + instance.gp_index = 0; /* For unary merge operations, the first argument is a vector with the same type as the result. For unary_convert_narrowt it also provides the "bottom" half of active elements, and is present for all types of predication. */ auto nargs = argument_types.length () - 1; if (instance.shape->has_merge_argument_p (instance, nargs)) - argument_types.quick_insert (0, return_type); + { + argument_types.quick_insert (0, return_type); + instance.gp_index = 1; + } } + + /* In this case the predicate type we added above is a non-governing + predicate operand (and there is no GP), so update the gp_index value + accordingly. */ + if (!instance.shape->has_gp_argument_p (instance)) + instance.gp_index = -1; } /* Parse and move past an element type in FORMAT and return it as a type @@ -3332,6 +3348,14 @@ struct pmov_to_vector_lane_def : public overloaded_base<0> but it doesn't currently have the necessary information. */ return c.require_immediate_range (1, 1, bytes - 1); } + + /* This function has a predicate argument, and is a merging instruction, but + the predicate is not a GP. */ + bool + has_gp_argument_p (const function_instance &) const override + { + return false; + } }; SHAPE (pmov_to_vector_lane) diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 4956e36..b2b03dc8c 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -3632,24 +3632,22 @@ gimple_folder::redirect_pred_x () gimple * gimple_folder::fold_pfalse () { - if (pred == PRED_none) + tree gp = gp_value (call); + /* If there isn't a GP then we can't do any folding as the instruction isn't + predicated. */ + if (!gp) return nullptr; - tree arg0 = gimple_call_arg (call, 0); + if (pred == PRED_m) { - /* Unary function shapes with _m predication are folded to the - inactive vector (arg0), while other function shapes are folded - to op1 (arg1). */ - tree arg1 = gimple_call_arg (call, 1); - if (is_pfalse (arg1)) - return fold_call_to (arg0); - if (is_pfalse (arg0)) - return fold_call_to (arg1); + tree val = inactive_values (call); + if (is_pfalse (gp)) + return fold_call_to (val); return nullptr; } - if ((pred == PRED_x || pred == PRED_z) && is_pfalse (arg0)) + if ((pred == PRED_x || pred == PRED_z) && is_pfalse (gp)) return fold_call_to (build_zero_cst (TREE_TYPE (lhs))); - if (pred == PRED_implicit && is_pfalse (arg0)) + if (pred == PRED_implicit && is_pfalse (gp)) { unsigned int flags = call_properties (); /* Folding to lhs = {0, ...} is not appropriate for intrinsics with diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h index d6a58b4..6098d8f 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.h +++ b/gcc/config/aarch64/aarch64-sve-builtins.h @@ -403,6 +403,8 @@ public: bool could_trap_p () const; vector_type_index gp_type_index () const; + tree gp_value (gcall *) const; + tree inactive_values (gcall *) const; tree gp_type () const; unsigned int vectors_per_tuple () const; @@ -436,6 +438,7 @@ public: group_suffix_index group_suffix_id; predication_index pred; fpm_mode_index fpm_mode; + int gp_index; }; class registered_function; @@ -801,6 +804,8 @@ public: virtual bool has_merge_argument_p (const function_instance &, unsigned int) const; + virtual bool has_gp_argument_p (const function_instance &) const; + virtual bool explicit_type_suffix_p (unsigned int) const = 0; /* True if the group suffix is present in overloaded names. @@ -949,6 +954,33 @@ function_instance::gp_type () const return acle_vector_types[0][gp_type_index ()]; } +/* Return the tree value that should be used as the governing predicate of + this function. If none then return NULL_TREE. */ +inline tree +function_instance::gp_value (gcall *call) const +{ + if (gp_index < 0) + return NULL_TREE; + + return gimple_call_arg (call, gp_index); +} + +/* Return the tree value that should be used for the inactive lanes should this + function be a predicated function with a gp. Otherwise return NULL_TREE. */ +inline tree +function_instance::inactive_values (gcall *call) const +{ + if (gp_index < 0) + return NULL_TREE; + + /* Function is unary with m predicate. */ + if (gp_index == 1) + return gimple_call_arg (call, 0); + + /* Else the inactive values are the next element. */ + return gimple_call_arg (call, 1); +} + /* If the function operates on tuples of vectors, return the number of vectors in the tuples, otherwise return 1. */ inline unsigned int @@ -1123,6 +1155,14 @@ function_shape::has_merge_argument_p (const function_instance &instance, return nargs == 1 && instance.pred == PRED_m; } +/* Return true if INSTANCE has an predicate argument that can be used as the global + predicate. */ +inline bool +function_shape::has_gp_argument_p (const function_instance &instance) const +{ + return instance.pred != PRED_none; +} + /* Return the mode of the result of a call. */ inline machine_mode function_expander::result_mode () const diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 8c47d44..550ff0a 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -7722,6 +7722,22 @@ [(set_attr "sve_type" "sve_int_dot")] ) +;; Define double widen_[su]sum as dotproduct +;; Use dot product to perform double widening sum reductions by +;; changing += a into += (a * 1). i.e. we seed the multiplication with 1. +(define_expand "widen_<sur>sum<mode><vsi2qi>3" + [(set (match_operand:SVE_FULL_SDI 0 "register_operand") + (plus:SVE_FULL_SDI + (unspec:SVE_FULL_SDI + [(match_operand:<VSI2QI> 1 "register_operand") + (match_dup 3)] + DOTPROD) + (match_operand:SVE_FULL_SDI 2 "register_operand")))] + "TARGET_SVE" +{ + operands[3] = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode)); +}) + ;; ------------------------------------------------------------------------- ;; ---- [INT] Sum of absolute differences ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 69a3767..9109183 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -2377,6 +2377,58 @@ [(set_attr "sve_type" "sve_int_general")] ) +;; Define single step widening for widen_ssum using SADDWB and SADDWT +(define_expand "widen_ssum<mode><Vnarrow>3" + [(set (match_operand:SVE_FULL_HSDI 0 "register_operand") + (unspec:SVE_FULL_HSDI + [(match_operand:SVE_FULL_HSDI 2 "register_operand") + (match_operand:<VNARROW> 1 "register_operand")] + UNSPEC_SADDWB)) + (set (match_dup 0) + (unspec:SVE_FULL_HSDI + [(match_dup 0) + (match_dup 1)] + UNSPEC_SADDWT))] + "TARGET_SVE2" +{ + /* Use dot product to perform double widening sum reductions by + changing += a into += (a * 1). i.e. we seed the multiplication with 1. */ + if (TARGET_SVE2p1_OR_SME2 + && <VNARROW>mode == VNx8HImode + && <MODE>mode == VNx4SImode) + { + rtx ones = force_reg (VNx8HImode, CONST1_RTX (VNx8HImode)); + emit_insn (gen_sdot_prodvnx4sivnx8hi (operands[0], operands[1], + ones, operands[2])); + DONE; + } +}) + +;; Define single step widening for widen_usum using UADDWB and UADDWT +(define_expand "widen_usum<mode><Vnarrow>3" + [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") + (unspec:SVE_FULL_HSDI + [(match_operand:SVE_FULL_HSDI 2 "register_operand" "w") + (match_operand:<VNARROW> 1 "register_operand" "w")] + UNSPEC_UADDWB)) + (set (match_dup 0) + (unspec:SVE_FULL_HSDI + [(match_dup 0) + (match_dup 1)] + UNSPEC_UADDWT))] + "TARGET_SVE2" +{ + if (TARGET_SVE2p1_OR_SME2 + && <VNARROW>mode == VNx8HImode + && <MODE>mode == VNx4SImode) + { + rtx ones = force_reg (VNx8HImode, CONST1_RTX (VNx8HImode)); + emit_insn (gen_udot_prodvnx4sivnx8hi (operands[0], operands[1], + ones, operands[2])); + DONE; + } +}) + ;; ------------------------------------------------------------------------- ;; ---- [INT] Long binary arithmetic ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 332e7ff..3757998 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1901,6 +1901,11 @@ (V4HI "V2SI") (V8HI "V4SI") (V2SI "DI") (V4SI "V2DI")]) +;; Modes with double-width elements. +(define_mode_attr Vdblw [(V8QI "v4hi") (V16QI "v8hi") + (V4HI "v2si") (V8HI "v4si") + (V2SI "di") (V4SI "v2di")]) + (define_mode_attr VQUADW [(V8QI "V4SI") (V16QI "V8SI") (V4HI "V2DI") (V8HI "V4DI")]) @@ -1930,6 +1935,11 @@ (VNx2DI "VNx4SI") (VNx2DF "VNx4SF") (VNx8SI "VNx8HI") (VNx16SI "VNx16QI") (VNx8DI "VNx8HI")]) +(define_mode_attr Vnarrow [(VNx8HI "vnx16qi") + (VNx4SI "vnx8hi") (VNx4SF "vnx8hf") + (VNx2DI "vnx4si") (VNx2DF "vnx4sf") + (VNx8SI "vnx8hi") (VNx16SI "vnx16qi") + (VNx8DI "vnx8hi")]) ;; Suffix mapping Advanced SIMD modes to be expanded as SVE instructions. (define_mode_attr sve_di_suf [(VNx16QI "") (VNx8HI "") (VNx4SI "") (VNx2DI "") @@ -2003,7 +2013,9 @@ (define_mode_attr VWIDE_PRED [(VNx8HF "VNx4BI") (VNx4SF "VNx2BI")]) ;; Widened modes of vector modes, lowercase -(define_mode_attr Vwide [(V2SF "v2df") (V4HF "v4sf") +(define_mode_attr Vwide [(V2SI "v2di") (V4HI "v4si") + (V2SF "v2df") (V4HF "v4sf") + (V8QI "v8hi") (VNx16QI "vnx8hi") (VNx8HI "vnx4si") (VNx4SI "vnx2di") (VNx8HF "vnx4sf") (VNx4SF "vnx2df") diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index eb519e7..dfbe027 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -1788,6 +1788,11 @@ (V4HI "V2SI") (V8HI "V4SI") (V2SI "DI") (V4SI "V2DI")]) +;; Modes with double-width elements. +(define_mode_attr v_double_width [(V8QI "v4hi") (V16QI "v8hi") + (V4HI "v2si") (V8HI "v4si") + (V2SI "di") (V4SI "v2di")]) + ;; Double-sized modes with the same element size. ;; Used for neon_vdup_lane, where the second operand is double-sized ;; even when the first one is quad. diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index c887e7f..4ca1527 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -981,7 +981,7 @@ ;; Widening operations -(define_expand "widen_ssum<mode>3" +(define_expand "widen_ssum<v_double_width><mode>3" [(set (match_operand:<V_double_width> 0 "s_register_operand") (plus:<V_double_width> (sign_extend:<V_double_width> @@ -1040,7 +1040,7 @@ } [(set_attr "type" "neon_add_widen")]) -(define_insn "widen_ssum<mode>3" +(define_insn "widen_ssum<V_widen_l><mode>3" [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") (plus:<V_widen> (sign_extend:<V_widen> @@ -1051,7 +1051,7 @@ [(set_attr "type" "neon_add_widen")] ) -(define_expand "widen_usum<mode>3" +(define_expand "widen_usum<v_double_width><mode>3" [(set (match_operand:<V_double_width> 0 "s_register_operand") (plus:<V_double_width> (zero_extend:<V_double_width> @@ -1110,7 +1110,7 @@ } [(set_attr "type" "neon_add_widen")]) -(define_insn "widen_usum<mode>3" +(define_insn "widen_usum<V_widen_l><mode>3" [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") (plus:<V_widen> (zero_extend:<V_widen> (match_operand:VW 1 "s_register_operand" "%w")) diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc index 2e7474b..a28018b 100644 --- a/gcc/config/bpf/bpf.cc +++ b/gcc/config/bpf/bpf.cc @@ -1427,25 +1427,82 @@ bpf_expand_setmem (rtx *operands) unsigned inc = GET_MODE_SIZE (mode); unsigned offset = 0; + /* If val is a constant, then build a new constant value duplicating + the byte across to the size of stores we might do. + e.g. if val is 0xab and we can store in 4-byte chunks, build + 0xabababab and use that to do the memset. + If val is not a constant, then by constraint it is a QImode register + and we similarly duplicate the byte across. */ + rtx src; + if (CONST_INT_P (val)) + { + unsigned HOST_WIDE_INT tmp = UINTVAL (val) & 0xff; + /* Need src in the proper mode. */ + switch (mode) + { + case DImode: + src = gen_rtx_CONST_INT (DImode, tmp * 0x0101010101010101); + break; + case SImode: + src = gen_rtx_CONST_INT (SImode, tmp * 0x01010101); + break; + case HImode: + src = gen_rtx_CONST_INT (HImode, tmp * 0x0101); + break; + default: + src = val; + break; + } + } + else + { + /* VAL is a subreg:QI (reg:DI N). + Copy that byte to fill the whole register. */ + src = gen_reg_rtx (mode); + emit_move_insn (src, gen_rtx_ZERO_EXTEND (mode, val)); + + /* We can fill the whole register with copies of the byte by multiplying + by 0x010101... + For DImode this requires a tmp reg with lldw, but only if we will + actually do nonzero iterations of stxdw. */ + if (mode < DImode || iters == 0) + emit_move_insn (src, gen_rtx_MULT (mode, src, GEN_INT (0x01010101))); + else + { + rtx tmp = gen_reg_rtx (mode); + emit_move_insn (tmp, GEN_INT (0x0101010101010101)); + emit_move_insn (src, gen_rtx_MULT (mode, src, tmp)); + } + } + for (unsigned int i = 0; i < iters; i++) { - emit_move_insn (adjust_address (dst, mode, offset), val); + emit_move_insn (adjust_address (dst, mode, offset), src); offset += inc; } if (remainder & 4) { - emit_move_insn (adjust_address (dst, SImode, offset), val); + emit_move_insn (adjust_address (dst, SImode, offset), + REG_P (src) + ? simplify_gen_subreg (SImode, src, mode, 0) + : src); offset += 4; remainder -= 4; } if (remainder & 2) { - emit_move_insn (adjust_address (dst, HImode, offset), val); + emit_move_insn (adjust_address (dst, HImode, offset), + REG_P (src) + ? simplify_gen_subreg (HImode, src, mode, 0) + : src); offset += 2; remainder -= 2; } if (remainder & 1) - emit_move_insn (adjust_address (dst, QImode, offset), val); + emit_move_insn (adjust_address (dst, QImode, offset), + REG_P (src) + ? simplify_gen_subreg (QImode, src, mode, 0) + : src); return true; } diff --git a/gcc/config/ia64/vect.md b/gcc/config/ia64/vect.md index f17c514..9d9df1d 100644 --- a/gcc/config/ia64/vect.md +++ b/gcc/config/ia64/vect.md @@ -584,7 +584,7 @@ operands[1] = gen_lowpart (DImode, operands[1]); }) -(define_expand "widen_usumv8qi3" +(define_expand "widen_usumv4hiv8qi3" [(match_operand:V4HI 0 "gr_register_operand" "") (match_operand:V8QI 1 "gr_register_operand" "") (match_operand:V4HI 2 "gr_register_operand" "")] @@ -594,7 +594,7 @@ DONE; }) -(define_expand "widen_usumv4hi3" +(define_expand "widen_usumv2siv4hi3" [(match_operand:V2SI 0 "gr_register_operand" "") (match_operand:V4HI 1 "gr_register_operand" "") (match_operand:V2SI 2 "gr_register_operand" "")] @@ -604,7 +604,7 @@ DONE; }) -(define_expand "widen_ssumv8qi3" +(define_expand "widen_ssumv4hiv8qi3" [(match_operand:V4HI 0 "gr_register_operand" "") (match_operand:V8QI 1 "gr_register_operand" "") (match_operand:V4HI 2 "gr_register_operand" "")] @@ -614,7 +614,7 @@ DONE; }) -(define_expand "widen_ssumv4hi3" +(define_expand "widen_ssumv2siv4hi3" [(match_operand:V2SI 0 "gr_register_operand" "") (match_operand:V4HI 1 "gr_register_operand" "") (match_operand:V2SI 2 "gr_register_operand" "")] diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 7edc288..fa33680 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -3772,7 +3772,7 @@ DONE; }) -(define_expand "widen_usum<mode>3" +(define_expand "widen_usumv4si<mode>3" [(set (match_operand:V4SI 0 "register_operand" "=v") (plus:V4SI (match_operand:V4SI 2 "register_operand" "v") (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v")] @@ -3786,7 +3786,7 @@ DONE; }) -(define_expand "widen_ssumv16qi3" +(define_expand "widen_ssumv4siv16qi3" [(set (match_operand:V4SI 0 "register_operand" "=v") (plus:V4SI (match_operand:V4SI 2 "register_operand" "v") (unspec:V4SI [(match_operand:V16QI 1 "register_operand" "v")] @@ -3800,7 +3800,7 @@ DONE; }) -(define_expand "widen_ssumv8hi3" +(define_expand "widen_ssumv4siv8hi3" [(set (match_operand:V4SI 0 "register_operand" "=v") (plus:V4SI (match_operand:V4SI 2 "register_operand" "v") (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index bdc7e6a..ed0d69c 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -14761,10 +14761,11 @@ depset::hash::add_namespace_entities (tree ns, bitmap partitions) /* Seed any using-directives so that we emit the relevant namespaces. */ for (tree udir : NAMESPACE_LEVEL (ns)->using_directives) - if (TREE_CODE (udir) == USING_DECL && DECL_MODULE_EXPORT_P (udir)) + if (TREE_CODE (udir) == USING_DECL && DECL_MODULE_PURVIEW_P (udir)) { make_dependency (USING_DECL_DECLS (udir), depset::EK_NAMESPACE); - count++; + if (DECL_MODULE_EXPORT_P (udir)) + count++; } if (count) @@ -17397,14 +17398,16 @@ module_state::write_using_directives (elf_out *to, depset::hash &table, tree parent = parent_dep->get_entity (); for (auto udir : NAMESPACE_LEVEL (parent)->using_directives) { - if (TREE_CODE (udir) != USING_DECL || !DECL_MODULE_EXPORT_P (udir)) + if (TREE_CODE (udir) != USING_DECL || !DECL_MODULE_PURVIEW_P (udir)) continue; + bool exported = DECL_MODULE_EXPORT_P (udir); tree target = USING_DECL_DECLS (udir); depset *target_dep = table.find_dependency (target); gcc_checking_assert (target_dep); dump () && dump ("Writing using-directive in %N for %N", parent, target); + sec.u (exported); write_namespace (sec, parent_dep); write_namespace (sec, target_dep); ++num; @@ -17441,13 +17444,15 @@ module_state::read_using_directives (unsigned num) for (unsigned ix = 0; ix != num; ++ix) { + bool exported = sec.u (); tree parent = read_namespace (sec); tree target = read_namespace (sec); if (sec.get_overrun ()) break; dump () && dump ("Read using-directive in %N for %N", parent, target); - add_using_namespace (parent, target); + if (exported || is_module () || is_partition ()) + add_using_namespace (parent, target); } dump.outdent (); diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 44e1149..97d21b9 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5847,15 +5847,15 @@ equal or wider than the mode of the absolute difference. The result is placed in operand 0, which is of the same mode as operand 3. @var{m} is the mode of operand 1 and operand 2. -@cindex @code{widen_ssum@var{m}3} instruction pattern -@cindex @code{widen_usum@var{m}3} instruction pattern -@item @samp{widen_ssum@var{m}3} -@itemx @samp{widen_usum@var{m}3} +@cindex @code{widen_ssum@var{n}@var{m}3} instruction pattern +@cindex @code{widen_usum@var{n}@var{m}3} instruction pattern +@item @samp{widen_ssum@var{n}@var{m}3} +@itemx @samp{widen_usum@var{n}@var{m}3} Operands 0 and 2 are of the same mode, which is wider than the mode of operand 1. Add operand 1 to operand 2 and place the widened result in operand 0. (This is used express accumulation of elements into an accumulator of a wider mode.) -@var{m} is the mode of operand 1. +@var{m} is the mode of operand 1 and @var{n} is the mode of operand 0. @cindex @code{smulhs@var{m}3} instruction pattern @cindex @code{umulhs@var{m}3} instruction pattern diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index b84ce2f..c5eb7f2 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,8 @@ +2025-10-17 Josef Melcr <jmelcr02@gmail.com> + + * f95-lang.cc (ATTR_CALLBACK_GOMP_LIST): New attr list + corresponding to the list in builtin-attrs.def. + 2025-10-13 Paul Thomas <pault@gcc.gnu.org> PR fortran/121191 diff --git a/gcc/fortran/f95-lang.cc b/gcc/fortran/f95-lang.cc index bb4ce6d..06ffc67 100644 --- a/gcc/fortran/f95-lang.cc +++ b/gcc/fortran/f95-lang.cc @@ -580,6 +580,7 @@ gfc_builtin_function (tree decl) #define ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST \ (ECF_COLD | ECF_NORETURN | \ ECF_NOTHROW | ECF_LEAF) +#define ATTR_CALLBACK_GOMP_LIST (ECF_CB_1_2 | ATTR_NOTHROW_LIST) #define ATTR_PURE_NOTHROW_LIST (ECF_PURE | ECF_NOTHROW) static void diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc index f419f5c..1c49ccf 100644 --- a/gcc/fortran/resolve.cc +++ b/gcc/fortran/resolve.cc @@ -5060,14 +5060,17 @@ resolve_conditional (gfc_expr *expr) /* TODO: support more data types for conditional expressions */ if (true_expr->ts.type != BT_INTEGER && true_expr->ts.type != BT_LOGICAL - && true_expr->ts.type != BT_REAL && true_expr->ts.type != BT_COMPLEX) + && true_expr->ts.type != BT_REAL && true_expr->ts.type != BT_COMPLEX + && true_expr->ts.type != BT_CHARACTER) { - gfc_error ("Sorry, only integer, logical, real and complex types " - "are currently supported for conditional expressions at %L", - &expr->where); + gfc_error ( + "Sorry, only integer, logical, real, complex and character types are " + "currently supported for conditional expressions at %L", + &expr->where); return false; } + /* TODO: support arrays in conditional expressions */ if (true_expr->rank > 0) { gfc_error ("Sorry, array is currently unsupported for conditional " diff --git a/gcc/fortran/trans-const.cc b/gcc/fortran/trans-const.cc index ea1501a..f70f362 100644 --- a/gcc/fortran/trans-const.cc +++ b/gcc/fortran/trans-const.cc @@ -438,4 +438,12 @@ gfc_conv_constant (gfc_se * se, gfc_expr * expr) structure, too. */ if (expr->ts.type == BT_CHARACTER) se->string_length = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (se->expr))); + + if (se->want_pointer) + { + if (expr->ts.type == BT_CHARACTER) + gfc_conv_string_parameter (se); + else + se->expr = gfc_build_addr_expr (NULL_TREE, se->expr); + } } diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc index 271d263..21f256b 100644 --- a/gcc/fortran/trans-expr.cc +++ b/gcc/fortran/trans-expr.cc @@ -4418,6 +4418,11 @@ gfc_conv_conditional_expr (gfc_se *se, gfc_expr *expr) se->expr = fold_build3_loc (input_location, COND_EXPR, type, condition, true_val, false_val); + if (expr->ts.type == BT_CHARACTER) + se->string_length + = fold_build3_loc (input_location, COND_EXPR, gfc_charlen_type_node, + condition, true_se.string_length, + false_se.string_length); } /* If a string's length is one, we convert it to a single character. */ @@ -11546,6 +11551,29 @@ gfc_conv_string_parameter (gfc_se * se) return; } + if (TREE_CODE (se->expr) == COND_EXPR) + { + tree cond = TREE_OPERAND (se->expr, 0); + tree lhs = TREE_OPERAND (se->expr, 1); + tree rhs = TREE_OPERAND (se->expr, 2); + + gfc_se lse, rse; + gfc_init_se (&lse, NULL); + gfc_init_se (&rse, NULL); + + lse.expr = lhs; + lse.string_length = se->string_length; + gfc_conv_string_parameter (&lse); + + rse.expr = rhs; + rse.string_length = se->string_length; + gfc_conv_string_parameter (&rse); + + se->expr + = fold_build3_loc (input_location, COND_EXPR, TREE_TYPE (lse.expr), + cond, lse.expr, rse.expr); + } + if ((TREE_CODE (TREE_TYPE (se->expr)) == ARRAY_TYPE || TREE_CODE (TREE_TYPE (se->expr)) == INTEGER_TYPE) && TYPE_STRING_FLAG (TREE_TYPE (se->expr))) diff --git a/gcc/gimple-loop-jam.cc b/gcc/gimple-loop-jam.cc index 5e6c04a..5c74f80 100644 --- a/gcc/gimple-loop-jam.cc +++ b/gcc/gimple-loop-jam.cc @@ -641,6 +641,7 @@ tree_loop_unroll_and_jam (void) { cleanup_tree_cfg (); todo &= ~TODO_cleanup_cfg; + todo |= loop_invariant_motion_in_fun (cfun, false); } rewrite_into_loop_closed_ssa (NULL, 0); scev_reset (); diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc index f1623c1..2105c9a 100644 --- a/gcc/ipa-cp.cc +++ b/gcc/ipa-cp.cc @@ -131,7 +131,7 @@ along with GCC; see the file COPYING3. If not see #include "dbgcnt.h" #include "symtab-clones.h" #include "gimple-range.h" - +#include "attr-callback.h" /* Allocation pools for values and their sources in ipa-cp. */ @@ -6214,6 +6214,72 @@ identify_dead_nodes (struct cgraph_node *node) } } +/* Removes all useless callback edges from the callgraph. Useless callback + edges might mess up the callgraph, because they might be impossible to + redirect and so on, leading to crashes. Their usefulness is evaluated + through callback_edge_useful_p. */ + +static void +purge_useless_callback_edges () +{ + if (dump_file) + fprintf (dump_file, "\nPurging useless callback edges:\n"); + + cgraph_edge *e; + cgraph_node *node; + FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) + { + for (e = node->callees; e; e = e->next_callee) + { + if (e->has_callback) + { + if (dump_file) + fprintf (dump_file, "\tExamining callbacks of edge %s -> %s:\n", + e->caller->dump_name (), e->callee->dump_name ()); + if (!lookup_attribute (CALLBACK_ATTR_IDENT, + DECL_ATTRIBUTES (e->callee->decl)) + && !callback_is_special_cased (e->callee->decl, e->call_stmt)) + { + if (dump_file) + fprintf ( + dump_file, + "\t\tPurging callbacks, because the callback-dispatching" + "function no longer has any callback attributes.\n"); + e->purge_callback_edges (); + continue; + } + cgraph_edge *cbe, *next; + for (cbe = e->first_callback_edge (); cbe; cbe = next) + { + next = cbe->next_callback_edge (); + if (!callback_edge_useful_p (cbe)) + { + if (dump_file) + fprintf (dump_file, + "\t\tCallback edge %s -> %s not deemed " + "useful, removing.\n", + cbe->caller->dump_name (), + cbe->callee->dump_name ()); + cgraph_edge::remove (cbe); + } + else + { + if (dump_file) + fprintf (dump_file, + "\t\tKept callback edge %s -> %s " + "because it looks useful.\n", + cbe->caller->dump_name (), + cbe->callee->dump_name ()); + } + } + } + } + } + + if (dump_file) + fprintf (dump_file, "\n"); +} + /* The decision stage. Iterate over the topological order of call graph nodes TOPO and make specialized clones if deemed beneficial. */ @@ -6244,6 +6310,11 @@ ipcp_decision_stage (class ipa_topo_info *topo) if (change) identify_dead_nodes (node); } + + /* Currently, the primary use of callback edges is constant propagation. + Constant propagation is now over, so we have to remove unused callback + edges. */ + purge_useless_callback_edges (); } /* Look up all VR and bits information that we have discovered and copy it diff --git a/gcc/ipa-fnsummary.cc b/gcc/ipa-fnsummary.cc index 28f79aa..e7b81d8 100644 --- a/gcc/ipa-fnsummary.cc +++ b/gcc/ipa-fnsummary.cc @@ -990,7 +990,10 @@ ipa_call_summary_t::duplicate (struct cgraph_edge *src, info->predicate = NULL; edge_set_predicate (dst, srcinfo->predicate); info->param = srcinfo->param.copy (); - if (!dst->indirect_unknown_callee && src->indirect_unknown_callee) + if (!dst->indirect_unknown_callee && src->indirect_unknown_callee + /* Don't subtract the size when dealing with callback pairs, since the + edge has no real size. */ + && !src->has_callback && !dst->callback) { info->call_stmt_size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost); @@ -3107,6 +3110,25 @@ analyze_function_body (struct cgraph_node *node, bool early) es, es3); } } + + /* If dealing with a carrying edge, copy its summary over to its + attached edges as well. */ + if (edge->has_callback) + { + cgraph_edge *cbe; + for (cbe = edge->first_callback_edge (); cbe; + cbe = cbe->next_callback_edge ()) + { + ipa_call_summary *es2 = ipa_call_summaries->get (cbe); + es2 = ipa_call_summaries->get_create (cbe); + ipa_call_summaries->duplicate (edge, cbe, es, es2); + /* Unlike speculative edges, callback edges have no real + size or time; the call doesn't exist. Reflect that in + their summaries. */ + es2->call_stmt_size = 0; + es2->call_stmt_time = 0; + } + } } /* TODO: When conditional jump or switch is known to be constant, but diff --git a/gcc/ipa-inline-analysis.cc b/gcc/ipa-inline-analysis.cc index c5472cb..c6ab256 100644 --- a/gcc/ipa-inline-analysis.cc +++ b/gcc/ipa-inline-analysis.cc @@ -417,6 +417,11 @@ do_estimate_growth_1 (struct cgraph_node *node, void *data) { gcc_checking_assert (e->inline_failed); + /* Don't count callback edges into growth, since they are never inlined + anyway. */ + if (e->callback) + continue; + if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR || !opt_for_fn (e->caller->decl, optimize)) { diff --git a/gcc/ipa-inline-transform.cc b/gcc/ipa-inline-transform.cc index 99969aa..9dc662138 100644 --- a/gcc/ipa-inline-transform.cc +++ b/gcc/ipa-inline-transform.cc @@ -845,7 +845,17 @@ inline_transform (struct cgraph_node *node) if (!e->inline_failed) has_inline = true; next = e->next_callee; - cgraph_edge::redirect_call_stmt_to_callee (e); + if (e->has_callback) + { + /* Redirect callback edges when redirecting their carrying edge. */ + cgraph_edge *cbe; + cgraph_edge::redirect_call_stmt_to_callee (e); + for (cbe = e->first_callback_edge (); cbe; + cbe = cbe->next_callback_edge ()) + cgraph_edge::redirect_call_stmt_to_callee (cbe); + } + else + cgraph_edge::redirect_call_stmt_to_callee (e); } node->remove_all_references (); diff --git a/gcc/ipa-param-manipulation.cc b/gcc/ipa-param-manipulation.cc index 4878867..11f843c 100644 --- a/gcc/ipa-param-manipulation.cc +++ b/gcc/ipa-param-manipulation.cc @@ -50,6 +50,7 @@ along with GCC; see the file COPYING3. If not see #include "sreal.h" #include "ipa-cp.h" #include "ipa-prop.h" +#include "attr-callback.h" /* Actual prefixes of different newly synthetized parameters. Keep in sync with IPA_PARAM_PREFIX_* defines. */ @@ -308,6 +309,16 @@ drop_type_attribute_if_params_changed_p (tree name) return false; } +/* Return TRUE if the attribute should be dropped in the decl it is sitting on + changes. Primarily affects attributes working with the decls arguments. */ +static bool +drop_decl_attribute_if_params_changed_p (tree name) +{ + if (is_attribute_p (CALLBACK_ATTR_IDENT, name)) + return true; + return false; +} + /* Build and return a function type just like ORIG_TYPE but with parameter types given in NEW_PARAM_TYPES - which can be NULL if, but only if, ORIG_TYPE itself has NULL TREE_ARG_TYPEs. If METHOD2FUNC is true, also make @@ -488,11 +499,12 @@ ipa_param_adjustments::method2func_p (tree orig_type) performing all atored modifications. TYPE_ORIGINAL_P should be true when OLD_TYPE refers to the type before any IPA transformations, as opposed to a type that can be an intermediate one in between various IPA - transformations. */ + transformations. Set pointee of ARGS_MODIFIED (if provided) to TRUE if the + type's arguments were changed. */ tree -ipa_param_adjustments::build_new_function_type (tree old_type, - bool type_original_p) +ipa_param_adjustments::build_new_function_type ( + tree old_type, bool type_original_p, bool *args_modified /* = NULL */) { auto_vec<tree,16> new_param_types, *new_param_types_p; if (prototype_p (old_type)) @@ -518,6 +530,8 @@ ipa_param_adjustments::build_new_function_type (tree old_type, || get_original_index (index) != (int)index) modified = true; + if (args_modified) + *args_modified = modified; return build_adjusted_function_type (old_type, new_param_types_p, method2func_p (old_type), m_skip_return, @@ -536,10 +550,11 @@ ipa_param_adjustments::adjust_decl (tree orig_decl) { tree new_decl = copy_node (orig_decl); tree orig_type = TREE_TYPE (orig_decl); + bool args_modified = false; if (prototype_p (orig_type) || (m_skip_return && !VOID_TYPE_P (TREE_TYPE (orig_type)))) { - tree new_type = build_new_function_type (orig_type, false); + tree new_type = build_new_function_type (orig_type, false, &args_modified); TREE_TYPE (new_decl) = new_type; } if (method2func_p (orig_type)) @@ -556,6 +571,20 @@ ipa_param_adjustments::adjust_decl (tree orig_decl) if (m_skip_return) DECL_IS_MALLOC (new_decl) = 0; + /* If the decl's arguments changed, we might need to drop some attributes. */ + if (args_modified && DECL_ATTRIBUTES (new_decl)) + { + tree t = DECL_ATTRIBUTES (new_decl); + tree *last = &DECL_ATTRIBUTES (new_decl); + DECL_ATTRIBUTES (new_decl) = NULL; + for (; t; t = TREE_CHAIN (t)) + if (!drop_decl_attribute_if_params_changed_p (get_attribute_name (t))) + { + *last = copy_node (t); + TREE_CHAIN (*last) = NULL; + last = &TREE_CHAIN (*last); + } + } return new_decl; } diff --git a/gcc/ipa-param-manipulation.h b/gcc/ipa-param-manipulation.h index 7c7661c..8121ad6 100644 --- a/gcc/ipa-param-manipulation.h +++ b/gcc/ipa-param-manipulation.h @@ -229,7 +229,8 @@ public: /* Return if the first parameter is left intact. */ bool first_param_intact_p (); /* Build a function type corresponding to the modified call. */ - tree build_new_function_type (tree old_type, bool type_is_original_p); + tree build_new_function_type (tree old_type, bool type_is_original_p, + bool *args_modified = NULL); /* Build a declaration corresponding to the target of the modified call. */ tree adjust_decl (tree orig_decl); /* Fill a vector marking which parameters are intact by the described diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc index bf52ffe..c8438d6 100644 --- a/gcc/ipa-prop.cc +++ b/gcc/ipa-prop.cc @@ -61,6 +61,8 @@ along with GCC; see the file COPYING3. If not see #include "value-range-storage.h" #include "vr-values.h" #include "lto-streamer.h" +#include "attribs.h" +#include "attr-callback.h" /* Function summary where the parameter infos are actually stored. */ ipa_node_params_t *ipa_node_params_sum = NULL; @@ -324,6 +326,10 @@ ipa_get_param_decl_index (class ipa_node_params *info, tree ptree) return ipa_get_param_decl_index_1 (info->descriptors, ptree); } +static void +ipa_duplicate_jump_function (cgraph_edge *src, cgraph_edge *dst, + ipa_jump_func *src_jf, ipa_jump_func *dst_jf); + /* Populate the param_decl field in parameter DESCRIPTORS that correspond to NODE. */ @@ -2416,6 +2422,18 @@ skip_a_safe_conversion_op (tree t) return t; } +/* Initializes ipa_edge_args summary of CBE given its callback-carrying edge. + This primarily means allocating the correct amount of jump functions. */ + +static inline void +init_callback_edge_summary (struct cgraph_edge *cbe, tree attr) +{ + ipa_edge_args *cb_args = ipa_edge_args_sum->get_create (cbe); + size_t jf_vec_length = callback_num_args(attr); + vec_safe_grow_cleared (cb_args->jump_functions, + jf_vec_length, true); +} + /* Compute jump function for all arguments of callsite CS and insert the information in the jump_functions array in the ipa_edge_args corresponding to this callsite. */ @@ -2441,6 +2459,7 @@ ipa_compute_jump_functions_for_edge (struct ipa_func_body_info *fbi, if (ipa_func_spec_opts_forbid_analysis_p (cs->caller)) return; + auto_vec<cgraph_edge*> callback_edges; for (n = 0; n < arg_num; n++) { struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, n); @@ -2519,10 +2538,57 @@ ipa_compute_jump_functions_for_edge (struct ipa_func_body_info *fbi, arg = skip_a_safe_conversion_op (arg); if (is_gimple_ip_invariant (arg) - || (VAR_P (arg) - && is_global_var (arg) - && TREE_READONLY (arg))) - ipa_set_jf_constant (jfunc, arg, cs); + || (VAR_P (arg) && is_global_var (arg) && TREE_READONLY (arg))) + { + ipa_set_jf_constant (jfunc, arg, cs); + if (TREE_CODE (arg) == ADDR_EXPR) + { + tree pointee = TREE_OPERAND (arg, 0); + if (TREE_CODE (pointee) == FUNCTION_DECL && !cs->callback + && cs->callee) + { + /* Argument is a pointer to a function. Look for a callback + attribute describing this argument. */ + tree callback_attr + = lookup_attribute (CALLBACK_ATTR_IDENT, + DECL_ATTRIBUTES (cs->callee->decl)); + for (; callback_attr; + callback_attr + = lookup_attribute (CALLBACK_ATTR_IDENT, + TREE_CHAIN (callback_attr))) + if (callback_get_fn_index (callback_attr) == n) + break; + + /* If no callback attribute is found, check if the function is + a special case. */ + if (!callback_attr + && callback_is_special_cased (cs->callee->decl, call)) + { + callback_attr + = callback_special_case_attr (cs->callee->decl); + /* Check if the special attribute describes the correct + attribute, as a special cased function might have + multiple callbacks. */ + if (callback_get_fn_index (callback_attr) != n) + callback_attr = NULL; + } + + /* If a callback attribute describing this pointer is found, + create a callback edge to the pointee function to + allow for further optimizations. */ + if (callback_attr) + { + cgraph_node *kernel_node + = cgraph_node::get_create (pointee); + unsigned callback_id = n; + cgraph_edge *cbe + = cs->make_callback (kernel_node, callback_id); + init_callback_edge_summary (cbe, callback_attr); + callback_edges.safe_push (cbe); + } + } + } + } else if (!is_gimple_reg_type (TREE_TYPE (arg)) && TREE_CODE (arg) == PARM_DECL) { @@ -2580,6 +2646,34 @@ ipa_compute_jump_functions_for_edge (struct ipa_func_body_info *fbi, || POINTER_TYPE_P (param_type))) determine_known_aggregate_parts (fbi, call, arg, param_type, jfunc); } + + if (!callback_edges.is_empty ()) + { + /* For every callback edge, fetch jump functions of arguments + passed to them and copy them over to their respective summaries. + This avoids recalculating them for every callback edge, since their + arguments are just passed through. */ + unsigned j; + for (j = 0; j < callback_edges.length (); j++) + { + cgraph_edge *callback_edge = callback_edges[j]; + ipa_edge_args *cb_summary + = ipa_edge_args_sum->get_create (callback_edge); + auto_vec<int> arg_mapping + = callback_get_arg_mapping (callback_edge, cs); + unsigned i; + for (i = 0; i < arg_mapping.length (); i++) + { + if (arg_mapping[i] == -1) + continue; + class ipa_jump_func *src + = ipa_get_ith_jump_func (args, arg_mapping[i]); + class ipa_jump_func *dst = ipa_get_ith_jump_func (cb_summary, i); + ipa_duplicate_jump_function (cs, callback_edge, src, dst); + } + } + } + if (!useful_context) vec_free (args->polymorphic_call_contexts); } diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc index 0af2e88..5708ba0 100644 --- a/gcc/lto-cgraph.cc +++ b/gcc/lto-cgraph.cc @@ -274,6 +274,9 @@ lto_output_edge (struct lto_simple_output_block *ob, struct cgraph_edge *edge, bp_pack_value (&bp, edge->speculative_id, 16); bp_pack_value (&bp, edge->indirect_inlining_edge, 1); bp_pack_value (&bp, edge->speculative, 1); + bp_pack_value (&bp, edge->callback, 1); + bp_pack_value (&bp, edge->has_callback, 1); + bp_pack_value (&bp, edge->callback_id, 16); bp_pack_value (&bp, edge->call_stmt_cannot_inline_p, 1); gcc_assert (!edge->call_stmt_cannot_inline_p || edge->inline_failed != CIF_BODY_NOT_AVAILABLE); @@ -1539,6 +1542,9 @@ input_edge (class lto_input_block *ib, vec<symtab_node *> nodes, edge->indirect_inlining_edge = bp_unpack_value (&bp, 1); edge->speculative = bp_unpack_value (&bp, 1); + edge->callback = bp_unpack_value(&bp, 1); + edge->has_callback = bp_unpack_value(&bp, 1); + edge->callback_id = bp_unpack_value(&bp, 16); edge->lto_stmt_uid = stmt_id; edge->speculative_id = speculative_id; edge->inline_failed = inline_failed; diff --git a/gcc/match.pd b/gcc/match.pd index ab5b51b..bfc51e6 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -6844,11 +6844,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (bit_and:c (ne:c @0 @1) (ne (bit_ior @0 @1) integer_zerop)) (ne @0 @1)) (simplify - (bit_ior:c (ne:c @0 @1) (ne (bit_ior @0 @1) integer_zerop)) - (ne (bit_ior @0 @1) { integer_zero_node; })) + (bit_ior:c (ne:c @0 @1) (ne (bit_ior@2 @0 @1) integer_zerop@3)) + (ne @2 @3)) (simplify - (bit_and:c (eq:c @0 @1) (eq (bit_ior @0 @1) integer_zerop)) - (eq (bit_ior @0 @1) { integer_zero_node; })) + (bit_and:c (eq:c @0 @1) (eq (bit_ior@2 @0 @1) integer_zerop@3)) + (eq @2 @3)) (simplify (bit_ior:c (eq:c @0 @1) (eq (bit_ior @0 @1) integer_zerop)) (eq @0 @1)) diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index db1ec96..010885f 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -358,35 +358,35 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL_LOOP_STATIC, "GOMP_parallel_loop_static", BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG_UINT, - ATTR_NOTHROW_LIST) + ATTR_CALLBACK_GOMP_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL_LOOP_DYNAMIC, "GOMP_parallel_loop_dynamic", BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG_UINT, - ATTR_NOTHROW_LIST) + ATTR_CALLBACK_GOMP_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL_LOOP_GUIDED, "GOMP_parallel_loop_guided", BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG_UINT, - ATTR_NOTHROW_LIST) + ATTR_CALLBACK_GOMP_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL_LOOP_RUNTIME, "GOMP_parallel_loop_runtime", BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_UINT, - ATTR_NOTHROW_LIST) + ATTR_CALLBACK_GOMP_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC, "GOMP_parallel_loop_nonmonotonic_dynamic", BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG_UINT, - ATTR_NOTHROW_LIST) + ATTR_CALLBACK_GOMP_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED, "GOMP_parallel_loop_nonmonotonic_guided", BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG_UINT, - ATTR_NOTHROW_LIST) + ATTR_CALLBACK_GOMP_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL_LOOP_NONMONOTONIC_RUNTIME, "GOMP_parallel_loop_nonmonotonic_runtime", BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_UINT, - ATTR_NOTHROW_LIST) + ATTR_CALLBACK_GOMP_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME, "GOMP_parallel_loop_maybe_nonmonotonic_runtime", BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_UINT, - ATTR_NOTHROW_LIST) + ATTR_CALLBACK_GOMP_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_END, "GOMP_loop_end", BT_FN_VOID, ATTR_NOTHROW_LEAF_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_END_CANCEL, "GOMP_loop_end_cancel", @@ -409,10 +409,10 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_INTEROP, "GOMP_interop", BT_FN_VOID_INT_INT_PTR_PTR_PTR_INT_PTR_INT_PTR_UINT_PTR, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL, "GOMP_parallel", - BT_FN_VOID_OMPFN_PTR_UINT_UINT, ATTR_NOTHROW_LIST) + BT_FN_VOID_OMPFN_PTR_UINT_UINT, ATTR_CALLBACK_GOMP_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL_REDUCTIONS, "GOMP_parallel_reductions", - BT_FN_UINT_OMPFN_PTR_UINT_UINT, ATTR_NOTHROW_LIST) + BT_FN_UINT_OMPFN_PTR_UINT_UINT, ATTR_CALLBACK_GOMP_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASK, "GOMP_task", BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT_PTR, ATTR_NOTHROW_LIST) @@ -430,7 +430,7 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SECTIONS_NEXT, "GOMP_sections_next", BT_FN_UINT, ATTR_NOTHROW_LEAF_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL_SECTIONS, "GOMP_parallel_sections", - BT_FN_VOID_OMPFN_PTR_UINT_UINT_UINT, ATTR_NOTHROW_LIST) + BT_FN_VOID_OMPFN_PTR_UINT_UINT_UINT, ATTR_CALLBACK_GOMP_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SECTIONS_END, "GOMP_sections_end", BT_FN_VOID, ATTR_NOTHROW_LEAF_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SECTIONS_END_CANCEL, @@ -471,7 +471,7 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_MAP_INDIRECT_PTR, DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS4, "GOMP_teams4", BT_FN_BOOL_UINT_UINT_UINT_BOOL, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS_REG, "GOMP_teams_reg", - BT_FN_VOID_OMPFN_PTR_UINT_UINT_UINT, ATTR_NOTHROW_LIST) + BT_FN_VOID_OMPFN_PTR_UINT_UINT_UINT, ATTR_CALLBACK_GOMP_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASKGROUP_REDUCTION_REGISTER, "GOMP_taskgroup_reduction_register", BT_FN_VOID_PTR, ATTR_NOTHROW_LEAF_LIST) diff --git a/gcc/optabs.cc b/gcc/optabs.cc index 5c9450f..0865fc2 100644 --- a/gcc/optabs.cc +++ b/gcc/optabs.cc @@ -322,6 +322,10 @@ expand_widen_pattern_expr (const_sepops ops, rtx op0, rtx op1, rtx wide_op, icode = find_widening_optab_handler (widen_pattern_optab, TYPE_MODE (TREE_TYPE (ops->op2)), tmode0); + else if (ops->code == WIDEN_SUM_EXPR) + icode = find_widening_optab_handler (widen_pattern_optab, + TYPE_MODE (TREE_TYPE (ops->op1)), + tmode0); else icode = optab_handler (widen_pattern_optab, tmode0); gcc_assert (icode != CODE_FOR_nothing); diff --git a/gcc/optabs.def b/gcc/optabs.def index 790e43f..6cfcef9 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -85,6 +85,8 @@ OPTAB_CD(smsub_widen_optab, "msub$b$a4") OPTAB_CD(umsub_widen_optab, "umsub$b$a4") OPTAB_CD(ssmsub_widen_optab, "ssmsub$b$a4") OPTAB_CD(usmsub_widen_optab, "usmsub$a$b4") +OPTAB_CD(ssum_widen_optab, "widen_ssum$a$b3") +OPTAB_CD(usum_widen_optab, "widen_usum$a$b3") OPTAB_CD(crc_optab, "crc$a$b4") OPTAB_CD(crc_rev_optab, "crc_rev$a$b4") OPTAB_CD(vec_load_lanes_optab, "vec_load_lanes$a$b") @@ -415,8 +417,6 @@ OPTAB_D (savg_floor_optab, "avg$a3_floor") OPTAB_D (uavg_floor_optab, "uavg$a3_floor") OPTAB_D (savg_ceil_optab, "avg$a3_ceil") OPTAB_D (uavg_ceil_optab, "uavg$a3_ceil") -OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3") -OPTAB_D (usum_widen_optab, "widen_usum$I$a3") OPTAB_D (usad_optab, "usad$I$a") OPTAB_D (ssad_optab, "ssad$I$a") OPTAB_D (smulhs_optab, "smulhs$a3") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6964114..2c250a1 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,81 @@ +2025-10-17 David Faust <david.faust@oracle.com> + + PR target/122139 + * gcc.target/bpf/memset-3.c: New. + * gcc.target/bpf/memset-4.c: New. + +2025-10-17 Tamar Christina <tamar.christina@arm.com> + Jennifer Schmitz <jschmitz@nvidia.com> + + PR target/121604 + * gcc.target/aarch64/sve/pr121604_brk.c: New test. + * gcc.target/aarch64/sve2/pr121604_pmov.c: New test. + +2025-10-17 Richard Biener <rguenther@suse.de> + + PR tree-optimization/122308 + * gcc.dg/vect/vect-pr122308.c: New testcase. + +2025-10-17 Josef Melcr <jmelcr02@gmail.com> + + * gcc.dg/ipa/ipcp-cb-spec1.c: New test. + * gcc.dg/ipa/ipcp-cb-spec2.c: New test. + * gcc.dg/ipa/ipcp-cb1.c: New test. + +2025-10-17 Eric Botcazou <ebotcazou@adacore.com> + + * gnat.dg/specs/style1.ads: New test. + +2025-10-17 Richard Biener <rguenther@suse.de> + + PR tree-optimization/122301 + * gcc.dg/vect/vect-pr122301.c: New testcase. + +2025-10-17 Avinash Jayakar <avinashd@linux.ibm.com> + + PR tree-optimization/104116 + * gcc.dg/vect/pr104116-ceil-div-2.c: New test. + * gcc.dg/vect/pr104116-ceil-div-pow2.c: New test. + * gcc.dg/vect/pr104116-ceil-div.c: New test. + * gcc.dg/vect/pr104116-ceil-mod-2.c: New test. + * gcc.dg/vect/pr104116-ceil-mod-pow2.c: New test. + * gcc.dg/vect/pr104116-ceil-mod.c: New test. + * gcc.dg/vect/pr104116-ceil-udiv-2.c: New test. + * gcc.dg/vect/pr104116-ceil-udiv-pow2.c: New test. + * gcc.dg/vect/pr104116-ceil-udiv.c: New test. + * gcc.dg/vect/pr104116-ceil-umod-2.c: New test. + * gcc.dg/vect/pr104116-ceil-umod-pow2.c: New test. + * gcc.dg/vect/pr104116-ceil-umod.c: New test. + * gcc.dg/vect/pr104116-floor-div-2.c: New test. + * gcc.dg/vect/pr104116-floor-div-pow2.c: New test. + * gcc.dg/vect/pr104116-floor-div.c: New test. + * gcc.dg/vect/pr104116-floor-mod-2.c: New test. + * gcc.dg/vect/pr104116-floor-mod-pow2.c: New test. + * gcc.dg/vect/pr104116-floor-mod.c: New test. + * gcc.dg/vect/pr104116-round-div-2.c: New test. + * gcc.dg/vect/pr104116-round-div-pow2.c: New test. + * gcc.dg/vect/pr104116-round-div.c: New test. + * gcc.dg/vect/pr104116-round-mod-2.c: New test. + * gcc.dg/vect/pr104116-round-mod-pow2.c: New test. + * gcc.dg/vect/pr104116-round-mod.c: New test. + * gcc.dg/vect/pr104116-round-udiv-2.c: New test. + * gcc.dg/vect/pr104116-round-udiv-pow2.c: New test. + * gcc.dg/vect/pr104116-round-udiv.c: New test. + * gcc.dg/vect/pr104116-round-umod-2.c: New test. + * gcc.dg/vect/pr104116-round-umod-pow2.c: New test. + * gcc.dg/vect/pr104116-round-umod.c: New test. + * gcc.dg/vect/pr104116.h: New test. + +2025-10-17 Andrew Pinski <andrew.pinski@oss.qualcomm.com> + + PR tree-optimization/122296 + * gcc.dg/torture/int-bwise-opt-1.c: New test. + +2025-10-17 Andrew Pinski <andrew.pinski@oss.qualcomm.com> + + PR tree-optimization/122296 + * gcc.dg/tree-ssa/int-bwise-opt-vect01.c: New test. + 2025-10-16 David Malcolm <dmalcolm@redhat.com> * gcc.dg/plugin/diagnostic_plugin_test_graphs.cc diff --git a/gcc/testsuite/g++.dg/modules/namespace-13_b.C b/gcc/testsuite/g++.dg/modules/namespace-13_b.C index 1b30961..4c2a7d9 100644 --- a/gcc/testsuite/g++.dg/modules/namespace-13_b.C +++ b/gcc/testsuite/g++.dg/modules/namespace-13_b.C @@ -6,6 +6,7 @@ module; namespace gmf::blah {} namespace gmf::other {} +using namespace gmf::other; // not emitted export module b; export import a; @@ -21,7 +22,7 @@ namespace c { using namespace a; } -// { dg-final { scan-lang-dump {Using-directives 3} module } } +// { dg-final { scan-lang-dump {Using-directives 4} module } } // { dg-final { scan-lang-dump {Writing using-directive in '::b' for '::a'} module } } // { dg-final { scan-lang-dump {Writing using-directive in '::b' for '::gmf::blah'} module } } @@ -30,3 +31,4 @@ namespace c { // { dg-final { scan-lang-dump {Writing namespace:[0-9]* '::gmf::blah', public} module } } // { dg-final { scan-lang-dump-not {Writing namespace:[0-9]* '::gmf::other'} module } } +// { dg-final { scan-lang-dump-not {Writing using-directive in '::' for '::gmf::other'} module } } diff --git a/gcc/testsuite/g++.dg/modules/namespace-13_c.C b/gcc/testsuite/g++.dg/modules/namespace-13_c.C index d04ef37..51f4dfc 100644 --- a/gcc/testsuite/g++.dg/modules/namespace-13_c.C +++ b/gcc/testsuite/g++.dg/modules/namespace-13_c.C @@ -15,3 +15,6 @@ static_assert(b::f() == 42); static_assert(b::g() == 123); static_assert(c::other::h() == 99); static_assert(y::i() == 5); + +// unexported 'using namespace a'; should not be visible in 'c' +int result = c::f(); // { dg-error "'f' is not a member of 'c'" } diff --git a/gcc/testsuite/g++.dg/modules/namespace-14_a.C b/gcc/testsuite/g++.dg/modules/namespace-14_a.C new file mode 100644 index 0000000..c26964e --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/namespace-14_a.C @@ -0,0 +1,11 @@ +// PR c++/122279 +// { dg-additional-options "-fmodules" } +// { dg-module-cmi M:part } + +module M:part; +namespace foo { + void go(); +} +namespace bar { + using namespace foo; +} diff --git a/gcc/testsuite/g++.dg/modules/namespace-14_b.C b/gcc/testsuite/g++.dg/modules/namespace-14_b.C new file mode 100644 index 0000000..987c768 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/namespace-14_b.C @@ -0,0 +1,12 @@ +// PR c++/122279 +// { dg-additional-options "-fmodules" } +// { dg-module-cmi M } + +export module M; +import :part; +namespace qux { + using namespace bar; +} +void test1() { + bar::go(); +} diff --git a/gcc/testsuite/g++.dg/modules/namespace-14_c.C b/gcc/testsuite/g++.dg/modules/namespace-14_c.C new file mode 100644 index 0000000..0896609 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/namespace-14_c.C @@ -0,0 +1,7 @@ +// PR c++/122279 +// { dg-additional-options "-fmodules" } + +module M; +void test2() { + qux::go(); +} diff --git a/gcc/testsuite/g++.dg/modules/namespace-14_d.C b/gcc/testsuite/g++.dg/modules/namespace-14_d.C new file mode 100644 index 0000000..e5a55ed --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/namespace-14_d.C @@ -0,0 +1,10 @@ +// PR c++/122279 +// { dg-additional-options "-fmodules" } +// { dg-module-cmi M:other_part } + +module M:other_part; +import :part; + +void test3() { + bar::go(); +} diff --git a/gcc/testsuite/gcc.dg/ipa/ipcp-cb-spec1.c b/gcc/testsuite/gcc.dg/ipa/ipcp-cb-spec1.c new file mode 100644 index 0000000..a85e623 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/ipcp-cb-spec1.c @@ -0,0 +1,19 @@ +/* Test that GOMP_task is special cased when cpyfn is NULL. */ + +/* { dg-do run } */ +/* { dg-options "-O3 -fopenmp -flto -std=gnu99 -fdump-ipa-cp-details" } */ +/* { dg-require-effective-target fopenmp } */ +/* { dg-require-effective-target lto } */ + +void test(int c) { + for (int i = 0; i < c; i++) + if (!__builtin_constant_p(c)) + __builtin_abort(); +} +int main() { +#pragma omp task + test(7); + return 0; +} + +/* { dg-final { scan-wpa-ipa-dump "Creating a specialized node of main._omp_fn" "cp" } } */ diff --git a/gcc/testsuite/gcc.dg/ipa/ipcp-cb-spec2.c b/gcc/testsuite/gcc.dg/ipa/ipcp-cb-spec2.c new file mode 100644 index 0000000..01d7425 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/ipcp-cb-spec2.c @@ -0,0 +1,21 @@ +/* Check that GOMP_task doesn't produce callback edges when cpyfn is not + NULL. */ + +/* { dg-do run } */ +/* { dg-options "-O3 -fopenmp -flto -std=gnu99 -fdump-ipa-cp-details" } */ +/* { dg-require-effective-target fopenmp } */ +/* { dg-require-effective-target lto } */ + +void test(int *a) { + for (int i = 0; i < 100; i++) { + a[i] = i; + } +} +int main() { + int a[100]; + __builtin_memset (a, 0, sizeof (a)); + #pragma omp task + test (a); +} + +/* { dg-final { scan-ipa-dump-not "Created callback edge" "cp" } } */ diff --git a/gcc/testsuite/gcc.dg/ipa/ipcp-cb1.c b/gcc/testsuite/gcc.dg/ipa/ipcp-cb1.c new file mode 100644 index 0000000..3418b5d --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/ipcp-cb1.c @@ -0,0 +1,25 @@ +/* Test that we can propagate constants into outlined OpenMP kernels. + This tests the underlying callback attribute and its related edges. */ + +/* { dg-do run } */ +/* { dg-options "-O3 -fopenmp -flto -std=gnu99 -fdump-ipa-cp-details" } */ +/* { dg-require-effective-target fopenmp } */ +/* { dg-require-effective-target lto } */ + +int a[100]; +void test(int c) { +#pragma omp parallel for + for (int i = 0; i < c; i++) { + if (!__builtin_constant_p(c)) { + __builtin_abort(); + } + a[i] = i; + } +} +int main() { + test(100); + return a[5] - 5; +} + +/* { dg-final { scan-wpa-ipa-dump "Creating a specialized node of test._omp_fn" "cp" } } */ +/* { dg-final { scan-wpa-ipa-dump "Aggregate replacements: 0\\\[0]=100\\(by_ref\\)" "cp" } } */ diff --git a/gcc/testsuite/gcc.dg/torture/int-bwise-opt-1.c b/gcc/testsuite/gcc.dg/torture/int-bwise-opt-1.c new file mode 100644 index 0000000..ceea95b --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/int-bwise-opt-1.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ + +/* PR tree-optimization/122296 */ + +typedef unsigned type1; +typedef unsigned __attribute__((vector_size(sizeof(unsigned) ))) type2; +type1 g(type1 a, type1 b) +{ + type1 c = a == b; + type1 d = (a|b) == 0; + return c & d; +} + +type1 f(type1 a, type1 b) +{ + type1 c = a != b; + type1 d = (a|b) != 0; + return c | d; +} +type2 g2(type2 a, type2 b) +{ + type2 c = a == b; + type2 d = (a|b) == 0; + return c & d; +} + +type2 f2(type2 a, type2 b) +{ + type2 c = a != b; + type2 d = (a|b) != 0; + return c | d; +} diff --git a/gcc/testsuite/gcc.dg/torture/pr122012.c b/gcc/testsuite/gcc.dg/torture/pr122012.c new file mode 100644 index 0000000..055915a --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr122012.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +#include <stdlib.h> + +void foo(); + +void test(size_t step) { + char *buf = malloc(64); + char *p = buf; + size_t i; + + for(i = 0; i < 64; ++i) { + p += 4; + if (__builtin_object_size (p, 2) != 0) + foo(); + p += step; + } + free(buf); +} diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-2.c b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-2.c new file mode 100644 index 0000000..7078776 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-2.c @@ -0,0 +1,29 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN(__CEIL_DIV, 2, div) + +int main (void) +{ + check_vect (); + int *a = (int*)&arr; + init_arr(a, N); + div(a); + for (int i=0; i<N; i++) + { + int expected = cl_div (i - N/2, 2); + if (expected != a[i]) + abort(); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-pow2.c b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-pow2.c new file mode 100644 index 0000000..7aa9ae8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-pow2.c @@ -0,0 +1,30 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + + +TEST_FN(__CEIL_DIV, 8, div) + +int main (void) +{ + check_vect (); + int *a = (int*)&arr; + init_arr(a, N); + div(a); + for (int i=0; i<N; i++) + { + int expected = cl_div (i - N/2, 8); + if (expected != a[i]) + abort(); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div.c b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div.c new file mode 100644 index 0000000..6f903ff --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div.c @@ -0,0 +1,30 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + + +TEST_FN(__CEIL_DIV, 19, div) + +int main (void) +{ + check_vect (); + int *a = (int*)&arr; + init_arr(a, N); + div(a); + for (int i=0; i<N; i++) + { + int expected = cl_div (i - N/2, 19); + if (expected != a[i]) + abort(); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-2.c b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-2.c new file mode 100644 index 0000000..ee6dfb9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-2.c @@ -0,0 +1,30 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + + +TEST_FN(__CEIL_MOD, 2, div) + +int main (void) +{ + check_vect (); + int *a = (int*)&arr; + init_arr(a, N); + div(a); + for (int i=0; i<N; i++) + { + int expected = cl_mod (i - N/2, 2); + if (expected != a[i]) + abort(); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-pow2.c b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-pow2.c new file mode 100644 index 0000000..de409ea --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-pow2.c @@ -0,0 +1,30 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + + +TEST_FN(__CEIL_MOD, 8, div) + +int main (void) +{ + check_vect (); + unsigned int *a = (unsigned int*)&arr; + init_arr(a, N); + div(a); + for (int i=0; i<N; i++) + { + unsigned int expected = cl_mod (i - N/2, 8); + if (expected != a[i]) + abort(); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod.c b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod.c new file mode 100644 index 0000000..f2ba936 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod.c @@ -0,0 +1,30 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + + +TEST_FN(__CEIL_MOD, 19, div) + +int main (void) +{ + check_vect (); + int *a = (int*)&arr; + init_arr(a, N); + div(a); + for (int i=0; i<N; i++) + { + int expected = cl_mod (i - N/2, 19); + if (expected != a[i]) + abort(); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-2.c b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-2.c new file mode 100644 index 0000000..db1f797 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-2.c @@ -0,0 +1,29 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN_UNSIGNED(__CEIL_DIV, 2u, udiv) + +int main (void) +{ + check_vect (); + unsigned int *ua = (unsigned int*)↑ + init_uarr(ua, N); + udiv(ua); + for (int i=0; i<N; i++) + { + unsigned int expected = cl_udiv (0xf0000000 + i, 2); + if (expected != ua[i]) + abort(); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-pow2.c b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-pow2.c new file mode 100644 index 0000000..06b4257 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-pow2.c @@ -0,0 +1,29 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN_UNSIGNED(__CEIL_DIV, 8u, udiv) + +int main (void) +{ + check_vect (); + unsigned int *ua = (unsigned int*)↑ + init_uarr(ua, N); + udiv(ua); + for (int i=0; i<N; i++) + { + unsigned int expected = cl_udiv (0xf0000000 + i, 8); + if (expected != ua[i]) + abort(); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv.c b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv.c new file mode 100644 index 0000000..ef6e856 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv.c @@ -0,0 +1,29 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN_UNSIGNED(__CEIL_DIV, 19u, udiv) + +int main (void) +{ + check_vect (); + unsigned int *ua = (unsigned int*)↑ + init_uarr(ua, N); + udiv(ua); + for (int i=0; i<N; i++) + { + unsigned int expected = cl_udiv (0xf0000000 + i, 19); + if (expected != ua[i]) + abort(); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-2.c b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-2.c new file mode 100644 index 0000000..2d0a5db --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-2.c @@ -0,0 +1,30 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + + +TEST_FN_UNSIGNED (__CEIL_MOD, 2u, mod) + +int main (void) +{ + check_vect (); + unsigned int *a = (unsigned int*)↑ + init_uarr(a, N); + mod(a); + for (int i=0; i<N; i++) + { + unsigned int expected = cl_umod (0xf0000000 + i, 2); + if (expected != a[i]) + abort(); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-pow2.c b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-pow2.c new file mode 100644 index 0000000..2d0a5db --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-pow2.c @@ -0,0 +1,30 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + + +TEST_FN_UNSIGNED (__CEIL_MOD, 2u, mod) + +int main (void) +{ + check_vect (); + unsigned int *a = (unsigned int*)↑ + init_uarr(a, N); + mod(a); + for (int i=0; i<N; i++) + { + unsigned int expected = cl_umod (0xf0000000 + i, 2); + if (expected != a[i]) + abort(); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod.c b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod.c new file mode 100644 index 0000000..949a509 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod.c @@ -0,0 +1,30 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + + +TEST_FN_UNSIGNED (__CEIL_MOD, 19u, mod) + +int main (void) +{ + check_vect (); + unsigned int *a = (unsigned int*)↑ + init_uarr(a, N); + mod(a); + for (int i=0; i<N; i++) + { + unsigned int expected = cl_umod (0xf0000000 + i, 19); + if (expected != a[i]) + abort(); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-div-2.c b/gcc/testsuite/gcc.dg/vect/pr104116-floor-div-2.c new file mode 100644 index 0000000..d93e051 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-div-2.c @@ -0,0 +1,30 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN(__FLOOR_DIV, 2, div_2) + +int main (void) +{ + check_vect (); + int * a = (int*)&arr; + init_arr(a, N); + div_2(a); + for (int i=0; i<N; i++) + { + int expected = fl_div (i - N/2, 2); + if (expected != a[i]) + abort (); + } + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-div-pow2.c b/gcc/testsuite/gcc.dg/vect/pr104116-floor-div-pow2.c new file mode 100644 index 0000000..9e986a7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-div-pow2.c @@ -0,0 +1,30 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN(__FLOOR_DIV, 8, div_2) + +int main (void) +{ + check_vect (); + int * a = (int*)&arr; + init_arr(a, N); + div_2(a); + for (int i=0; i<N; i++) + { + int expected = fl_div (i - N/2, 8); + if (expected != a[i]) + abort (); + } + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-div.c b/gcc/testsuite/gcc.dg/vect/pr104116-floor-div.c new file mode 100644 index 0000000..89dd270 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-div.c @@ -0,0 +1,30 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN(__FLOOR_DIV, 19, div_2) + +int main (void) +{ + check_vect (); + int * a = (int*)&arr; + init_arr(a, N); + div_2(a); + for (int i=0; i<N; i++) + { + int expected = fl_div (i - N/2, 19); + if (expected != a[i]) + abort (); + } + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-2.c b/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-2.c new file mode 100644 index 0000000..0c5c162 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-2.c @@ -0,0 +1,31 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN(__FLOOR_MOD, 2, mod) + +int main (void) +{ + check_vect (); + int * a = (int*)&arr; + init_arr(a, N); + mod(a); + for (int i=0; i<N; i++) + { + int expected = fl_mod (i - N/2, 2); + if (expected != a[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-pow2.c b/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-pow2.c new file mode 100644 index 0000000..f3de145 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-pow2.c @@ -0,0 +1,31 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN(__FLOOR_MOD, 8, mod) + +int main (void) +{ + check_vect (); + int * a = (int*)&arr; + init_arr(a, N); + mod(a); + for (int i=0; i<N; i++) + { + int expected = fl_mod (i - N/2, 8); + if (expected != a[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod.c b/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod.c new file mode 100644 index 0000000..3e6bbe9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod.c @@ -0,0 +1,31 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN(__FLOOR_MOD, 19, mod) + +int main (void) +{ + check_vect (); + int * a = (int*)&arr; + init_arr(a, N); + mod(a); + for (int i=0; i<N; i++) + { + int expected = fl_mod (i - N/2, 19); + if (expected != a[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-div-2.c b/gcc/testsuite/gcc.dg/vect/pr104116-round-div-2.c new file mode 100644 index 0000000..c242ccb --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-div-2.c @@ -0,0 +1,31 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN(__ROUND_DIV, 2, div) + +int main (void) +{ + check_vect (); + int * a = (int*)&arr; + init_arr(a, N); + div(a); + for (int i=0; i<N; i++) + { + int expected = rd_div (i - N/2, 2); + if (expected != a[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-div-pow2.c b/gcc/testsuite/gcc.dg/vect/pr104116-round-div-pow2.c new file mode 100644 index 0000000..365c2c59 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-div-pow2.c @@ -0,0 +1,31 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN(__ROUND_DIV, 8, div) + +int main (void) +{ + check_vect (); + int * a = (int*)&arr; + init_arr(a, N); + div(a); + for (int i=0; i<N; i++) + { + int expected = rd_div (i - N/2, 8); + if (expected != a[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-div.c b/gcc/testsuite/gcc.dg/vect/pr104116-round-div.c new file mode 100644 index 0000000..5c377d1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-div.c @@ -0,0 +1,31 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN(__ROUND_DIV, 19, div) + +int main (void) +{ + check_vect (); + int * a = (int*)&arr; + init_arr(a, N); + div(a); + for (int i=0; i<N; i++) + { + int expected = rd_div (i - N/2, 19); + if (expected != a[i]) + abort(); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-mod-2.c b/gcc/testsuite/gcc.dg/vect/pr104116-round-mod-2.c new file mode 100644 index 0000000..6430b3e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-mod-2.c @@ -0,0 +1,31 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN(__ROUND_MOD, 2, mod) + +int main (void) +{ + check_vect (); + int * a = (int*)&arr; + init_arr(a, N); + mod(a); + for (int i=0; i<N; i++) + { + int expected = rd_mod (i - N/2, 2); + if (expected != a[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-mod-pow2.c b/gcc/testsuite/gcc.dg/vect/pr104116-round-mod-pow2.c new file mode 100644 index 0000000..46c1789 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-mod-pow2.c @@ -0,0 +1,31 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN(__ROUND_MOD, 8, mod) + +int main (void) +{ + check_vect (); + int * a = (int*)&arr; + init_arr(a, N); + mod(a); + for (int i=0; i<N; i++) + { + int expected = rd_mod (i - N/2, 8); + if (expected != a[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-mod.c b/gcc/testsuite/gcc.dg/vect/pr104116-round-mod.c new file mode 100644 index 0000000..e7ca44e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-mod.c @@ -0,0 +1,31 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN(__ROUND_MOD, 19, mod) + +int main (void) +{ + check_vect (); + int * a = (int*)&arr; + init_arr(a, N); + mod(a); + for (int i=0; i<N; i++) + { + int expected = rd_mod (i - N/2, 19); + if (expected != a[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-2.c b/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-2.c new file mode 100644 index 0000000..4d42f4e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-2.c @@ -0,0 +1,31 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN_UNSIGNED(__ROUND_DIV, 2u, div) + +int main (void) +{ + check_vect (); + unsigned int * a = (unsigned int*)↑ + init_uarr(a, N); + div(a); + for (unsigned int i=0; i<N; i++) + { + unsigned int expected = rd_udiv (0xf0000000 + i, 2); + if (expected != a[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-pow2.c b/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-pow2.c new file mode 100644 index 0000000..137b249 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-pow2.c @@ -0,0 +1,31 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN_UNSIGNED(__ROUND_DIV, 8u, div) + +int main (void) +{ + check_vect (); + unsigned int * a = (unsigned int*)↑ + init_uarr(a, N); + div(a); + for (unsigned int i=0; i<N; i++) + { + unsigned int expected = rd_udiv (0xf0000000 + i, 8); + if (expected != a[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv.c b/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv.c new file mode 100644 index 0000000..183a930 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv.c @@ -0,0 +1,32 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN_UNSIGNED(__ROUND_DIV, 19u, div) + +int main (void) +{ + check_vect (); + unsigned int * a = (unsigned int*)↑ + init_uarr(a, N); + div(a); + for (unsigned int i=0; i<N; i++) + { + unsigned int expected = rd_udiv (0xf0000000 + i, 19); + if (expected != a[i]) + abort (); + + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-umod-2.c b/gcc/testsuite/gcc.dg/vect/pr104116-round-umod-2.c new file mode 100644 index 0000000..f321e0e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-umod-2.c @@ -0,0 +1,31 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN_UNSIGNED(__ROUND_MOD, 2u, mod) + +int main (void) +{ + check_vect (); + unsigned int * a = (unsigned int*)↑ + init_uarr(a, N); + mod(a); + for (unsigned int i=0; i<N; i++) + { + unsigned int expected = rd_umod (0xf0000000 + i, 2); + if (expected != a[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-umod-pow2.c b/gcc/testsuite/gcc.dg/vect/pr104116-round-umod-pow2.c new file mode 100644 index 0000000..041ecd1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-umod-pow2.c @@ -0,0 +1,31 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN_UNSIGNED(__ROUND_MOD, 8u, mod) + +int main (void) +{ + check_vect (); + unsigned int * a = (unsigned int*)↑ + init_uarr(a, N); + mod(a); + for (unsigned int i=0; i<N; i++) + { + unsigned int expected = rd_umod (0xf0000000 + i, 8); + if (expected != a[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-umod.c b/gcc/testsuite/gcc.dg/vect/pr104116-round-umod.c new file mode 100644 index 0000000..b5ddad1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-umod.c @@ -0,0 +1,31 @@ +/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */ +/* { dg-require-effective-target vect_int} */ +/* { dg-require-effective-target vect_condition} */ +/* { dg-require-effective-target vect_shift} */ + + + +#include "pr104116.h" +#include "tree-vect.h" + +TEST_FN_UNSIGNED(__ROUND_MOD, 19u, mod) + +int main (void) +{ + check_vect (); + unsigned int * a = (unsigned int*)↑ + init_uarr(a, N); + mod(a); + for (unsigned int i=0; i<N; i++) + { + unsigned int expected = rd_umod (0xf0000000 + i, 19); + if (expected != a[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */ + + diff --git a/gcc/testsuite/gcc.dg/vect/pr104116.h b/gcc/testsuite/gcc.dg/vect/pr104116.h new file mode 100644 index 0000000..6f14e4b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104116.h @@ -0,0 +1,201 @@ +#define TEST_FN(OP, CONST, NAME) \ +__attribute__((noinline)) \ +void __GIMPLE (ssa,guessed_local(10737416)) \ +NAME (int * a) \ +{ \ + int i; \ + long unsigned int _1; \ + long unsigned int _2; \ + int * _3; \ + int _4; \ + int _5; \ + unsigned int _12; \ + unsigned int _13; \ + \ + __BB(2,guessed_local(10737416)): \ + goto __BB3(precise(134217728)); \ + \ + __BB(3,loop_header(1),guessed_local(1063004408)): \ + i_14 = __PHI (__BB5: i_11, __BB2: 0); \ + _13 = __PHI (__BB5: _12, __BB2: 1024u); \ + _1 = (long unsigned int) i_14; \ + _2 = _1 * 4ul; \ + _3 = a_9(D) + _2; \ + _4 = __MEM <int> (_3); \ + _5 = _4 OP CONST; \ + __MEM <int> (_3) = _5; \ + i_11 = i_14 + 1; \ + _12 = _13 - 1u; \ + if (_12 != 0u) \ + goto __BB5(guessed(132861994)); \ + else \ + goto __BB4(guessed(1355734)); \ + \ + __BB(5,guessed_local(1052266995)): \ + goto __BB3(precise(134217728)); \ + \ + __BB(4,guessed_local(10737416)): \ + return; \ + \ +} \ + +#define TEST_FN_UNSIGNED(OP, CONST, NAME) \ +__attribute__((noinline)) \ +void __GIMPLE (ssa,guessed_local(10737416)) \ +NAME (unsigned int * a) \ +{ \ + int i; \ + long unsigned int _1; \ + long unsigned int _2; \ + unsigned int * _3; \ + unsigned int _4; \ + unsigned int _5; \ + unsigned int _12; \ + unsigned int _13; \ + \ + __BB(2,guessed_local(10737416)): \ + goto __BB3(precise(134217728)); \ + \ + __BB(3,loop_header(1),guessed_local(1063004408)): \ + i_14 = __PHI (__BB5: i_11, __BB2: 0); \ + _13 = __PHI (__BB5: _12, __BB2: 1024u); \ + _1 = (long unsigned int) i_14; \ + _2 = _1 * 4ul; \ + _3 = a_9(D) + _2; \ + _4 = __MEM <unsigned int> (_3); \ + _5 = _4 OP CONST; \ + __MEM <unsigned int> (_3) = _5; \ + i_11 = i_14 + 1; \ + _12 = _13 - 1u; \ + if (_12 != 0u) \ + goto __BB5(guessed(132861994)); \ + else \ + goto __BB4(guessed(1355734)); \ + \ + __BB(5,guessed_local(1052266995)): \ + goto __BB3(precise(134217728)); \ + \ + __BB(4,guessed_local(10737416)): \ + return; \ +} \ + + +#define N 1024 +int arr[N]; +__attribute__((optimize("O0"))) +void init_arr (int *a, int n) +{ + for (int i=0; i<n; i++) + a[i] = i - n/2; +} + +unsigned int uarr[N]; +__attribute__((optimize("O0"))) +void init_uarr (unsigned int *a, int n) +{ + for (unsigned int i=0; i<n; i++) + a[i] = 0xf0000000 + i; +} + +int cl_div (int x, int y) +{ + int r = x % y; + int q = x / y; + if (r != 0 && (x ^ y) >= 0) + q++; + return q; +} + +unsigned int cl_udiv (unsigned int x, unsigned int y) +{ + unsigned int r = x % y; + unsigned int q = x / y; + if (r > 0) + q++; + return q; +} + +int cl_mod (int x, int y) +{ + int r = x % y; + if (r != 0 && (x ^ y) >= 0) + r -= y; + return r; +} + +unsigned int cl_umod (unsigned int x, unsigned int y) +{ + unsigned int r = x % y; + unsigned int q = x / y; + if (r > 0) + r-=y; + return r; +} + +int fl_div (int x, int y) +{ + int r = x % y; + int q = x / y; + if (r != 0 && (x ^ y) < 0) + q--; + return q; +} + + +int fl_mod (int x, int y) +{ + int r = x % y; + if (r != 0 && (x ^ y) < 0) + r += y; + return r; +} + +int abs(int x) +{ + if (x < 0) return -x; + return x; +} + +int rd_mod (int x, int y) +{ + int r = x % y; + if (abs(r) > abs((y-1) >> 1)) + { + if ((x ^ y) < 0) + r += y; + else + r -= y; + } + return r; +} + +int rd_div (int x, int y) +{ + int r = x % y; + int q = x / y; + if (abs(r) > abs((y-1) >> 1)) + { + if ((x ^ y) < 0) + q--; + else + q++; + } + return q; +} + +unsigned int rd_umod (unsigned int x, unsigned int y) +{ + unsigned int r = x % y; + if (r > ((y-1) >> 1)) + r -= y; + return r; +} + +unsigned int rd_udiv (unsigned int x, unsigned int y) +{ + unsigned int r = x % y; + unsigned int q = x / y; + if (r > ((y-1) >> 1)) + q++; + return q; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr122301.c b/gcc/testsuite/gcc.dg/vect/vect-pr122301.c new file mode 100644 index 0000000..acc7aed --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-pr122301.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ + +int get_prev_frame_segid(unsigned char *p, int n) +{ + int tem; + unsigned seg_id = 8; + for (int x = 0; x < n; x++) + { + int a = seg_id; + tem = a < p[x] ? a : p[x]; + seg_id = tem; + } + return tem; +} + +/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" { target { vect_int && { ! vect_no_int_min_max } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr122308.c b/gcc/testsuite/gcc.dg/vect/vect-pr122308.c new file mode 100644 index 0000000..58c73ae --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-pr122308.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -fdump-tree-unrolljam-optimized" } */ + +int a[1024]; +int b[2048]; +int c[2048]; + +void foo(int n) +{ + for (int i = 0; i < n; i++) + { + int index = c[i]; + + for (int j = 0; j < 1024; ++j) + a[j] += b[index + j]; + } +} + +/* { dg-final { scan-tree-dump "optimized: applying unroll and jam" "unrolljam" } } */ +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target vect_int } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/pr122069_1.c b/gcc/testsuite/gcc.target/aarch64/pr122069_1.c new file mode 100644 index 0000000..b2f9732 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr122069_1.c @@ -0,0 +1,46 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only --param vect-epilogues-nomask=0 -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 -fdump-tree-vect-details" }*/ +/* { dg-final { check-function-bodies "**" "" } } */ + +inline char char_abs(char i) { + return (i < 0 ? -i : i); +} + +/* +** foo_int: +** ... +** sub v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b +** zip1 v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b +** zip2 v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b +** uaddw v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4h +** uaddw2 v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.8h +** uaddw v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4h +** uaddw2 v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.8h +** ... +*/ +int foo_int(unsigned char *x, unsigned char * restrict y) { + int sum = 0; + for (int i = 0; i < 8000; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +/* +** foo2_int: +** ... +** add v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h +** uaddw v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4h +** uaddw2 v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.8h +** ... +*/ +int foo2_int(unsigned short *x, unsigned short * restrict y) { + int sum = 0; + for (int i = 0; i < 8000; i++) + { + x[i] = x[i] + y[i]; + sum += x[i]; + } + return sum; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/pr122069_2.c b/gcc/testsuite/gcc.target/aarch64/pr122069_2.c new file mode 100644 index 0000000..8019bf9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr122069_2.c @@ -0,0 +1,80 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/ + +inline char char_abs(char i) { + return (i < 0 ? -i : i); +} + +__attribute__((noipa)) +int foo_int(unsigned char *x, unsigned char * restrict y) { + int sum = 0; + for (int i = 0; i < 100; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +__attribute__((noipa)) +int foo2_int(unsigned short *x, unsigned short * restrict y, + unsigned short * restrict z) { + int sum = 0; + for (int i = 0; i < 100; i++) + { + z[i] = x[i] + y[i]; + sum += z[i]; + } + return sum; +} + +__attribute__((noipa)) +int foo_int2(unsigned char *x, unsigned char * restrict y) { + int sum = 0; +#pragma GCC novector + for (int i = 0; i < 100; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +__attribute__((noipa)) +int foo2_int2(unsigned short *x, unsigned short * restrict y, + unsigned short * restrict z) { + int sum = 0; +#pragma GCC novector + for (int i = 0; i < 100; i++) + { + z[i] = x[i] + y[i]; + sum += z[i]; + } + return sum; +} + +int main () +{ + unsigned short a[100]; + unsigned short b[100]; + unsigned short r1[100]; + unsigned short r2[100]; + unsigned char c[100]; + unsigned char d[100]; +#pragma GCC novector + for (int i = 0; i < 100; i++) + { + a[i] = c[i] = i; + b[i] = d[i] = 100 - i; + } + + if (foo_int (c, d) != foo_int2 (c, d)) + __builtin_abort(); + + + if (foo2_int (a, b, r1) != foo2_int2 (a, b, r2)) + __builtin_abort(); + +#pragma GCC novector + for (int i = 0; i < 100; i++) + if (r1[i] != r2[i]) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/pr122069_3.c b/gcc/testsuite/gcc.target/aarch64/pr122069_3.c new file mode 100644 index 0000000..0e832c43 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr122069_3.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8.2-a+dotprod -mautovec-preference=asimd-only --param vect-epilogues-nomask=0 -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 -fdump-tree-vect-details" }*/ +/* { dg-final { check-function-bodies "**" "" } } */ + +inline char char_abs(char i) { + return (i < 0 ? -i : i); +} + +/* +** foo_int: +** ... +** sub v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b +** udot v[0-9]+.4s, v[0-9]+.16b, v[0-9]+.16b +** ... +*/ +int foo_int(unsigned char *x, unsigned char * restrict y) { + int sum = 0; + for (int i = 0; i < 8000; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +/* +** foo2_int: +** ... +** add v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h +** uaddw v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4h +** uaddw2 v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.8h +** ... +*/ +int foo2_int(unsigned short *x, unsigned short * restrict y) { + int sum = 0; + for (int i = 0; i < 8000; i++) + { + x[i] = x[i] + y[i]; + sum += x[i]; + } + return sum; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/pr122069_4.c b/gcc/testsuite/gcc.target/aarch64/pr122069_4.c new file mode 100644 index 0000000..22d5f63 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr122069_4.c @@ -0,0 +1,81 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw }*/ +/* { dg-options "-O3 -march=armv8.2-a+dotprod -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/ + +inline char char_abs(char i) { + return (i < 0 ? -i : i); +} + +__attribute__((noipa)) +int foo_int(unsigned char *x, unsigned char * restrict y) { + int sum = 0; + for (int i = 0; i < 100; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +__attribute__((noipa)) +int foo2_int(unsigned short *x, unsigned short * restrict y, + unsigned short * restrict z) { + int sum = 0; + for (int i = 0; i < 100; i++) + { + z[i] = x[i] + y[i]; + sum += z[i]; + } + return sum; +} + +__attribute__((noipa)) +int foo_int2(unsigned char *x, unsigned char * restrict y) { + int sum = 0; +#pragma GCC novector + for (int i = 0; i < 100; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +__attribute__((noipa)) +int foo2_int2(unsigned short *x, unsigned short * restrict y, + unsigned short * restrict z) { + int sum = 0; +#pragma GCC novector + for (int i = 0; i < 100; i++) + { + z[i] = x[i] + y[i]; + sum += z[i]; + } + return sum; +} + +int main () +{ + unsigned short a[100]; + unsigned short b[100]; + unsigned short r1[100]; + unsigned short r2[100]; + unsigned char c[100]; + unsigned char d[100]; +#pragma GCC novector + for (int i = 0; i < 100; i++) + { + a[i] = c[i] = i; + b[i] = d[i] = 100 - i; + } + + if (foo_int (c, d) != foo_int2 (c, d)) + __builtin_abort(); + + + if (foo2_int (a, b, r1) != foo2_int2 (a, b, r2)) + __builtin_abort(); + +#pragma GCC novector + for (int i = 0; i < 100; i++) + if (r1[i] != r2[i]) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr121604_brk.c b/gcc/testsuite/gcc.target/aarch64/sve/pr121604_brk.c new file mode 100644 index 0000000..a474a20 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr121604_brk.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include <arm_sve.h> + +/* +** foo: +** ptrue p0\.b, all +** brkb p0\.b, p0/z, p0\.b +** ret +*/ +svbool_t foo () { + return svbrkb_b_m (svpfalse (), svptrue_b8 (), svptrue_b8 ()); +} + +/* +** bar: +** ptrue p0\.b, all +** brka p0\.b, p0/z, p0\.b +** ret +*/ +svbool_t bar () { + return svbrka_b_m (svpfalse (), svptrue_b8 (), svptrue_b8 ()); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122069_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pr122069_1.c new file mode 100644 index 0000000..5d1f61f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122069_1.c @@ -0,0 +1,45 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only --param vect-epilogues-nomask=0 -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 -fdump-tree-vect-details" }*/ +/* { dg-final { check-function-bodies "**" "" } } */ + +inline char char_abs(char i) { + return (i < 0 ? -i : i); +} + +/* +** foo_int: +** ... +** sub z[0-9]+.b, z[0-9]+.b, z[0-9]+.b +** udot z[0-9]+.s, z[0-9]+.b, z[0-9]+.b +** ... +*/ +int foo_int(unsigned char *x, unsigned char * restrict y) { + int sum = 0; + for (int i = 0; i < 8000; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +/* +** foo2_int: +** ... +** add z[0-9]+.h, z[0-9]+.h, z[0-9]+.h +** punpklo p[0-9]+.h, p[0-9]+.b +** uunpklo z[0-9]+.s, z[0-9]+.h +** add z[0-9]+.s, p[0-9]+/m, z[0-9]+.s, z[0-9]+.s +** punpkhi p[0-9]+.h, p[0-9]+.b +** uunpkhi z[0-9]+.s, z[0-9]+.h +** add z[0-9]+.s, p[0-9]+/m, z[0-9]+.s, z[0-9]+.s +** ... +*/ +int foo2_int(unsigned short *x, unsigned short * restrict y) { + int sum = 0; + for (int i = 0; i < 8000; i++) + { + x[i] = x[i] + y[i]; + sum += x[i]; + } + return sum; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122069_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pr122069_2.c new file mode 100644 index 0000000..b9e0010 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122069_2.c @@ -0,0 +1,81 @@ +/* { dg-do run } */ +/* { dg-require-effective-target aarch64_sve_hw } */ +/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only -fdump-tree-vect-details" }*/ + +inline char char_abs(char i) { + return (i < 0 ? -i : i); +} + +__attribute__((noipa)) +int foo_int(unsigned char *x, unsigned char * restrict y) { + int sum = 0; + for (int i = 0; i < 100; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +__attribute__((noipa)) +int foo2_int(unsigned short *x, unsigned short * restrict y, + unsigned short * restrict z) { + int sum = 0; + for (int i = 0; i < 100; i++) + { + z[i] = x[i] + y[i]; + sum += z[i]; + } + return sum; +} + +__attribute__((noipa)) +int foo_int2(unsigned char *x, unsigned char * restrict y) { + int sum = 0; +#pragma GCC novector + for (int i = 0; i < 100; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +__attribute__((noipa)) +int foo2_int2(unsigned short *x, unsigned short * restrict y, + unsigned short * restrict z) { + int sum = 0; +#pragma GCC novector + for (int i = 0; i < 100; i++) + { + z[i] = x[i] + y[i]; + sum += z[i]; + } + return sum; +} + +int main () +{ + unsigned short a[100]; + unsigned short b[100]; + unsigned short r1[100]; + unsigned short r2[100]; + unsigned char c[100]; + unsigned char d[100]; +#pragma GCC novector + for (int i = 0; i < 100; i++) + { + a[i] = c[i] = i; + b[i] = d[i] = 100 - i; + } + + if (foo_int (c, d) != foo_int2 (c, d)) + __builtin_abort(); + + + if (foo2_int (a, b, r1) != foo2_int2 (a, b, r2)) + __builtin_abort(); + +#pragma GCC novector + for (int i = 0; i < 100; i++) + if (r1[i] != r2[i]) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr121604_pmov.c b/gcc/testsuite/gcc.target/aarch64/sve2/pr121604_pmov.c new file mode 100644 index 0000000..16844ee --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr121604_pmov.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv8.2-a+sve2p1" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include <arm_sve.h> + +/* +** f: +** pfalse p([0-7]+)\.b +** mov z0\.b, #-1 +** pmov z0\[1\], p\1\.d +** ret +*/ +svuint64_t f () { + return svpmov_lane_u64_m (svdup_u64 (~0UL), svpfalse (), 1); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr122069_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/pr122069_1.c new file mode 100644 index 0000000..6a34707 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr122069_1.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve2 -mautovec-preference=sve-only --param vect-epilogues-nomask=0 -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 -fdump-tree-vect-details" }*/ +/* { dg-final { check-function-bodies "**" "" } } */ + +inline char char_abs(char i) { + return (i < 0 ? -i : i); +} + +/* +** foo_int: +** ... +** sub z[0-9]+.b, z[0-9]+.b, z[0-9]+.b +** udot z[0-9]+.s, z[0-9]+.b, z[0-9]+.b +** ... +*/ +int foo_int(unsigned char *x, unsigned char * restrict y) { + int sum = 0; + for (int i = 0; i < 8000; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +/* +** foo2_int: +** ... +** add z[0-9]+.h, z[0-9]+.h, z[0-9]+.h +** uaddwb z[0-9]+.s, z[0-9]+.s, z[0-9]+.h +** uaddwt z[0-9]+.s, z[0-9]+.s, z[0-9]+.h +** ... +*/ +int foo2_int(unsigned short *x, unsigned short * restrict y) { + int sum = 0; + for (int i = 0; i < 8000; i++) + { + x[i] = x[i] + y[i]; + sum += x[i]; + } + return sum; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr122069_2.c b/gcc/testsuite/gcc.target/aarch64/sve2/pr122069_2.c new file mode 100644 index 0000000..95f8317 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr122069_2.c @@ -0,0 +1,81 @@ +/* { dg-do run } */ +/* { dg-require-effective-target aarch64_sve2_hw } */ +/* { dg-options "-O3 -march=armv8-a+sve2 -mautovec-preference=sve-only -fdump-tree-vect-details" }*/ + +inline char char_abs(char i) { + return (i < 0 ? -i : i); +} + +__attribute__((noipa)) +int foo_int(unsigned char *x, unsigned char * restrict y) { + int sum = 0; + for (int i = 0; i < 100; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +__attribute__((noipa)) +int foo2_int(unsigned short *x, unsigned short * restrict y, + unsigned short * restrict z) { + int sum = 0; + for (int i = 0; i < 100; i++) + { + z[i] = x[i] + y[i]; + sum += z[i]; + } + return sum; +} + +__attribute__((noipa)) +int foo_int2(unsigned char *x, unsigned char * restrict y) { + int sum = 0; +#pragma GCC novector + for (int i = 0; i < 100; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +__attribute__((noipa)) +int foo2_int2(unsigned short *x, unsigned short * restrict y, + unsigned short * restrict z) { + int sum = 0; +#pragma GCC novector + for (int i = 0; i < 100; i++) + { + z[i] = x[i] + y[i]; + sum += z[i]; + } + return sum; +} + +int main () +{ + unsigned short a[100]; + unsigned short b[100]; + unsigned short r1[100]; + unsigned short r2[100]; + unsigned char c[100]; + unsigned char d[100]; +#pragma GCC novector + for (int i = 0; i < 100; i++) + { + a[i] = c[i] = i; + b[i] = d[i] = 100 - i; + } + + if (foo_int (c, d) != foo_int2 (c, d)) + __builtin_abort(); + + + if (foo2_int (a, b, r1) != foo2_int2 (a, b, r2)) + __builtin_abort(); + +#pragma GCC novector + for (int i = 0; i < 100; i++) + if (r1[i] != r2[i]) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr122069_3.c b/gcc/testsuite/gcc.target/aarch64/sve2/pr122069_3.c new file mode 100644 index 0000000..c50a0cc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr122069_3.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve2p1 -mautovec-preference=sve-only --param vect-epilogues-nomask=0 -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 -fdump-tree-vect-details" }*/ +/* { dg-final { check-function-bodies "**" "" } } */ + +inline char char_abs(char i) { + return (i < 0 ? -i : i); +} + +/* +** foo_int: +** ... +** sub z[0-9]+.b, z[0-9]+.b, z[0-9]+.b +** udot z[0-9]+.s, z[0-9]+.b, z[0-9]+.b +** ... +*/ +int foo_int(unsigned char *x, unsigned char * restrict y) { + int sum = 0; + for (int i = 0; i < 8000; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +/* +** foo2_int: +** ... +** udot z[0-9]+.s, z[0-9]+.h, z[0-9]+.h +** ... +*/ +int foo2_int(unsigned short *x, unsigned short * restrict y) { + int sum = 0; + for (int i = 0; i < 8000; i++) + { + x[i] = x[i] + y[i]; + sum += x[i]; + } + return sum; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr122069_4.c b/gcc/testsuite/gcc.target/aarch64/sve2/pr122069_4.c new file mode 100644 index 0000000..cfa232f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr122069_4.c @@ -0,0 +1,81 @@ +/* { dg-do run } */ +/* { dg-require-effective-target aarch64_sve2p1_hw } */ +/* { dg-options "-O3 -march=armv8-a+sve2p1 -mautovec-preference=sve-only -fdump-tree-vect-details" }*/ + +inline char char_abs(char i) { + return (i < 0 ? -i : i); +} + +__attribute__((noipa)) +int foo_int(unsigned char *x, unsigned char * restrict y) { + int sum = 0; + for (int i = 0; i < 100; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +__attribute__((noipa)) +int foo2_int(unsigned short *x, unsigned short * restrict y, + unsigned short * restrict z) { + int sum = 0; + for (int i = 0; i < 100; i++) + { + z[i] = x[i] + y[i]; + sum += z[i]; + } + return sum; +} + +__attribute__((noipa)) +int foo_int2(unsigned char *x, unsigned char * restrict y) { + int sum = 0; +#pragma GCC novector + for (int i = 0; i < 100; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +__attribute__((noipa)) +int foo2_int2(unsigned short *x, unsigned short * restrict y, + unsigned short * restrict z) { + int sum = 0; +#pragma GCC novector + for (int i = 0; i < 100; i++) + { + z[i] = x[i] + y[i]; + sum += z[i]; + } + return sum; +} + +int main () +{ + unsigned short a[100]; + unsigned short b[100]; + unsigned short r1[100]; + unsigned short r2[100]; + unsigned char c[100]; + unsigned char d[100]; +#pragma GCC novector + for (int i = 0; i < 100; i++) + { + a[i] = c[i] = i; + b[i] = d[i] = 100 - i; + } + + if (foo_int (c, d) != foo_int2 (c, d)) + __builtin_abort(); + + + if (foo2_int (a, b, r1) != foo2_int2 (a, b, r2)) + __builtin_abort(); + +#pragma GCC novector + for (int i = 0; i < 100; i++) + if (r1[i] != r2[i]) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.target/bpf/memset-3.c b/gcc/testsuite/gcc.target/bpf/memset-3.c new file mode 100644 index 0000000..0b044a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/bpf/memset-3.c @@ -0,0 +1,56 @@ +/* Test that inline memset expansion properly duplicates the byte value + across the bytes to fill. PR target/122139. */ +/* { dg-do compile } */ +/* { dg-options "-O1 -masm=normal" } */ + +#define SIZE 63 + +unsigned char cdata[SIZE]; +unsigned short sdata[SIZE / 2 + 1]; +unsigned int idata[SIZE / 4 + 1]; +unsigned long ldata[SIZE / 8 + 1]; + +void +a (void) +{ + __builtin_memset (cdata, 0x54, SIZE); +} +/* 0x54=84 */ +/* { dg-final { scan-assembler-times "\[\t \]stb\[^\r\n\]+,84" 63 } } */ + +void +b (void) +{ + __builtin_memset (sdata, 0x7a, SIZE); +} + +/* 0x7a=122, 0x7a7a=31354 */ +/* { dg-final { scan-assembler-times "\[\t \]sth\[^\r\n\]+,31354" 31 } } */ +/* { dg-final { scan-assembler-times "\[\t \]stb\[^\r\n\]+,122" 1 } } */ + +void +c (void) +{ + __builtin_memset (idata, 0x23, SIZE); +} + +/* 0x23=35, 0x2323=8995, 0x23232323=589505315 */ +/* { dg-final { scan-assembler-times "\[\t \]stw\[^\r\n\]+,589505315" 15 } } */ +/* { dg-final { scan-assembler-times "\[\t \]sth\[^\r\n\]+,8995" 1 } } */ +/* { dg-final { scan-assembler-times "\[\t \]stb\[^\r\n\]+,35" 1 } } */ + +void +d (void) +{ + __builtin_memset (ldata, 0xcb, SIZE); +} + +/* 0xcbcbcbcb_cbcbcbcb = -3761688987579986997, + 0xcbcbcbcb = -875836469 + 0xcbcb = -13365 + 0xcb = -53 */ +/* { dg-final { scan-assembler-times "lddw\t%r.,-3761688987579986997"}} */ +/* { dg-final { scan-assembler-times "stxdw" 7 } } */ +/* { dg-final { scan-assembler-times "\[\t \]stw\[^\r\n\]+,-875836469" 1 } } */ +/* { dg-final { scan-assembler-times "\[\t \]sth\[^\r\n\]+,-13365" 1 } } */ +/* { dg-final { scan-assembler-times "\[\t \]stb\[^\r\n\]+,-53" 1 } } */ diff --git a/gcc/testsuite/gcc.target/bpf/memset-4.c b/gcc/testsuite/gcc.target/bpf/memset-4.c new file mode 100644 index 0000000..0c835c5 --- /dev/null +++ b/gcc/testsuite/gcc.target/bpf/memset-4.c @@ -0,0 +1,24 @@ +/* Test that inline memset expansion properly duplicates the byte value + across the bytes to fill for non-const value. PR target/122139. */ +/* { dg-do compile } */ +/* { dg-options "-O1 -masm=normal" } */ + +#define SIZE 63 + +unsigned char cdata[SIZE]; +unsigned short sdata[SIZE / 2 + 1]; +unsigned int idata[SIZE / 4 + 1]; +unsigned long ldata[SIZE / 8 + 1]; + +void +c (unsigned char byte) +{ + __builtin_memset (idata, byte, SIZE); +} + +/* Hard to verify for non-const value. Look for the mul by 0x01010101 + and the proper number of stores... */ +/* { dg-final { scan-assembler "mul32\[\t \]%r.,16843009" } } */ +/* { dg-final { scan-assembler-times "stxw" 15 } } */ +/* { dg-final { scan-assembler-times "stxh" 1 } } */ +/* { dg-final { scan-assembler-times "stxb" 1 } } */ diff --git a/gcc/testsuite/gfortran.dg/conditional_1.f90 b/gcc/testsuite/gfortran.dg/conditional_1.f90 index ca7d21d..9fd442a 100644 --- a/gcc/testsuite/gfortran.dg/conditional_1.f90 +++ b/gcc/testsuite/gfortran.dg/conditional_1.f90 @@ -6,6 +6,8 @@ program conditional_simple logical :: l = .true. real(4) :: r1 = 1.e-4, r2 = 1.e-5 complex :: z = (3.0, 4.0) + character(kind=1, len=5) :: c1 = "hello", c2 = "world" + character(len=:), allocatable :: c3 i = (i > 0 ? 1 : -1) if (i /= 1) stop 1 @@ -29,4 +31,16 @@ program conditional_simple i = 0 z = (i /= 0 ? z : (-3.0, -4.0)) if (z /= (-3.0, -4.0)) stop 6 + + i = 0 + c1 = (i /= 0 ? c1 : c2) + if (c1 /= "world") stop 7 + + i = 0 + c1 = (i /= 0 ? "abcde" : "bcdef") + if (c1 /= "bcdef") stop 8 + + i = 0 + c3 = (i /= 0 ? "abcde" : c2(1:3)) + if (c3 /= "wor") stop 9 end program conditional_simple diff --git a/gcc/testsuite/gfortran.dg/conditional_2.f90 b/gcc/testsuite/gfortran.dg/conditional_2.f90 index e78cd08..c45b065 100644 --- a/gcc/testsuite/gfortran.dg/conditional_2.f90 +++ b/gcc/testsuite/gfortran.dg/conditional_2.f90 @@ -4,6 +4,8 @@ program conditional_constant implicit none integer :: i = 42 + print *, (.true. ? 1 : -1) + print *, (.false. ? "hello" : "world") i = (.true. ? 1 : -1) if (i /= 1) stop 1 diff --git a/gcc/testsuite/gfortran.dg/conditional_4.f90 b/gcc/testsuite/gfortran.dg/conditional_4.f90 index 38033b9..5ecf9e0 100644 --- a/gcc/testsuite/gfortran.dg/conditional_4.f90 +++ b/gcc/testsuite/gfortran.dg/conditional_4.f90 @@ -10,12 +10,16 @@ program conditional_resolve integer, dimension(1, 1) :: a_2d logical :: l1(2) integer :: i1(2) + type :: Point + real :: x = 0.0 + end type Point + type(Point) :: p1, p2 i = (l1 ? 1 : -1) ! { dg-error "Condition in conditional expression must be a scalar logical" } i = (i ? 1 : -1) ! { dg-error "Condition in conditional expression must be a scalar logical" } i = (i /= 0 ? 1 : "oh no") ! { dg-error "must have the same declared type" } i = (i /= 0 ? k1 : k4) ! { dg-error "must have the same kind parameter" } i = (i /= 0 ? a_1d : a_2d) ! { dg-error "must have the same rank" } - k1 = (i /= 0 ? k1 : k1) ! { dg-error "Sorry, only integer, logical, real and complex types are currently supported for conditional expressions" } + p1 = (i /= 0 ? p1 : p2) ! { dg-error "Sorry, only integer, logical, real, complex and character types are currently supported for conditional expressions" } i1 = (i /= 0 ? i1 : i1 + 1) ! { dg-error "Sorry, array is currently unsupported for conditional expressions" } end program conditional_resolve diff --git a/gcc/testsuite/gfortran.dg/conditional_6.f90 b/gcc/testsuite/gfortran.dg/conditional_6.f90 index c9ac713..931f11c 100644 --- a/gcc/testsuite/gfortran.dg/conditional_6.f90 +++ b/gcc/testsuite/gfortran.dg/conditional_6.f90 @@ -4,8 +4,19 @@ program conditional_arg implicit none integer :: a = 4 integer :: b = 5 + character(kind=1, len=4) :: c4 = "abcd" + character(kind=1, len=5) :: c5 = "bcdef" + call five((a < 5 ? a : b)) if (a /= 5) stop 1 + + if (my_trim_len((b == 5 ? c4 : c5)) /= 4) stop 2 + if (my_trim_len((b == 5 ? "abcd" : "abcde")) /= 4) stop 3 + if (my_trim_len((b /= 5 ? c4 : c5)) /= 5) stop 4 + if (my_trim_len((b /= 5 ? "abcd" : "abcde")) /= 5) stop 5 + + call five_c((b == 5 ? c4 : c5)) + if (c4 /= "bcde") stop 6 contains subroutine five(x) integer, optional :: x @@ -13,4 +24,16 @@ contains x = 5 end if end subroutine five + + integer function my_trim_len(s) + character(len=*), intent(in) :: s + my_trim_len = len_trim(s) + end function my_trim_len + + subroutine five_c(x) + character(len=*), optional :: x + if (present(x)) then + x = c5 + end if + end subroutine five_c end program conditional_arg diff --git a/gcc/testsuite/gnat.dg/specs/style1.ads b/gcc/testsuite/gnat.dg/specs/style1.ads new file mode 100644 index 0000000..e7fd923 --- /dev/null +++ b/gcc/testsuite/gnat.dg/specs/style1.ads @@ -0,0 +1,19 @@ +-- { dg-do compile } +-- { dg-options "-gnatyr" } + +with Ada.Containers.Vectors; +with Ada.Unchecked_Conversion; + +package Style1 is + + package My_Vector is new ada.containers.vectors -- { dg-warning " bad casing" } + (Index_Type => Positive, + Element_Type => Integer); + + type Word is mod 2**32; + + function My_Conv is new ada.unchecked_conversion -- { dg-warning " bad casing" } + (Source => Integer, + Target => Word); + +end Style1; diff --git a/gcc/tree-core.h b/gcc/tree-core.h index 028b6af..145e758 100644 --- a/gcc/tree-core.h +++ b/gcc/tree-core.h @@ -98,6 +98,13 @@ struct die_struct; /* Nonzero if this is a function expected to end with an exception. */ #define ECF_XTHROW (1 << 16) +/* Flags for various callback attribute combinations. These constants are only + meant to be used for the construction of builtin functions. They were only + added because Fortran uses them for attributes of builtins. */ + +/* callback(1, 2) */ +#define ECF_CB_1_2 (1 << 17) + /* Call argument flags. */ /* Nonzero if the argument is not used by the function. */ diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc index 428cf55..7fecf48 100644 --- a/gcc/tree-inline.cc +++ b/gcc/tree-inline.cc @@ -2359,6 +2359,19 @@ copy_bb (copy_body_data *id, basic_block bb, indirect->count = copy_basic_block->count.apply_probability (prob); } + /* If edge is a callback-carrying edge, copy all its + attached edges as well. */ + else if (edge->has_callback) + { + edge + = edge->clone (id->dst_node, call_stmt, + gimple_uid (stmt), num, den, true); + cgraph_edge *e; + for (e = old_edge->first_callback_edge (); e; + e = e->next_callback_edge ()) + edge = e->clone (id->dst_node, call_stmt, + gimple_uid (stmt), num, den, true); + } else { edge = edge->clone (id->dst_node, call_stmt, @@ -3051,8 +3064,18 @@ redirect_all_calls (copy_body_data * id, basic_block bb) { if (!id->killed_new_ssa_names) id->killed_new_ssa_names = new hash_set<tree> (16); - cgraph_edge::redirect_call_stmt_to_callee (edge, - id->killed_new_ssa_names); + cgraph_edge::redirect_call_stmt_to_callee ( + edge, id->killed_new_ssa_names); + if (edge->has_callback) + { + /* When redirecting a carrying edge, we need to redirect its + attached edges as well. */ + cgraph_edge *cbe; + for (cbe = edge->first_callback_edge (); cbe; + cbe = cbe->next_callback_edge ()) + cgraph_edge::redirect_call_stmt_to_callee ( + cbe, id->killed_new_ssa_names); + } if (stmt == last && id->call_stmt && maybe_clean_eh_stmt (stmt)) gimple_purge_dead_eh_edges (bb); diff --git a/gcc/tree-object-size.cc b/gcc/tree-object-size.cc index 8545eff..65bcdd5 100644 --- a/gcc/tree-object-size.cc +++ b/gcc/tree-object-size.cc @@ -2145,12 +2145,11 @@ check_for_plus_in_loops (struct object_size_info *osi, tree var) && gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) { tree basevar = gimple_assign_rhs1 (stmt); - tree cst = gimple_assign_rhs2 (stmt); - - gcc_assert (TREE_CODE (cst) == INTEGER_CST); + tree offset = gimple_assign_rhs2 (stmt); /* Skip non-positive offsets. */ - if (integer_zerop (cst) || compare_tree_int (cst, offset_limit) > 0) + if (TREE_CODE (offset) != INTEGER_CST + || integer_zerop (offset) || compare_tree_int (offset, offset_limit) > 0) return; osi->depths[SSA_NAME_VERSION (basevar)] = 1; diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index becee62..c92fbcd 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -2542,8 +2542,8 @@ vect_recog_widen_sum_pattern (vec_info *vinfo, vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt); - if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR, - unprom0.type, type_out)) + if (!vect_supportable_conv_optab_p (vinfo, type, WIDEN_SUM_EXPR, + unprom0.type, type_out)) return NULL; var = vect_recog_temp_ssa_var (type, NULL); @@ -3001,7 +3001,7 @@ vect_recog_over_widening_pattern (vec_info *vinfo, tree_code code = gimple_assign_rhs_code (last_stmt); /* Punt for reductions where we don't handle the type conversions. */ - if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def) + if (vect_is_reduction (last_stmt_info)) return NULL; /* Keep the first operand of a COND_EXPR as-is: only the other two @@ -4838,6 +4838,281 @@ vect_recog_sat_trunc_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, return NULL; } + +/* Function add_code_for_floorceilround_divmod + A helper function to add compensation code for implementing FLOOR_MOD_EXPR, + FLOOR_DIV_EXPR, CEIL_MOD_EXPR, CEIL_DIV_EXPR, ROUND_MOD_EXPR and + ROUND_DIV_EXPR + The quotient and remainder are needed for implemented these operators. + FLOOR cases + r = x %[fl] y; r = x/[fl] y; + is + r = x % y; if (r && (x ^ y) < 0) r += y; + r = x % y; d = x/y; if (r && (x ^ y) < 0) d--; Respectively + Produce following sequence + v0 = x^y + v1 = -r + v2 = r | -r + v3 = v0 & v2 + v4 = v3 < 0 + if (floor_mod) + v5 = v4 ? y : 0 + v6 = r + v5 + if (floor_div) + v5 = v4 ? 1 : 0 + v6 = d - 1 + Similar sequences of vector instructions are produces for following cases + CEIL cases + r = x %[cl] y; r = x/[cl] y; + is + r = x % y; if (r && (x ^ y) >= 0) r -= y; + r = x % y; if (r) r -= y; (unsigned) + r = x % y; d = x/y; if (r && (x ^ y) >= 0) d++; + r = x % y; d = x/y; if (r) d++; (unsigned) + ROUND cases + r = x %[rd] y; r = x/[rd] y; + is + r = x % y; if (r > ((y-1)/2)) if ((x ^ y) >= 0) r -= y; else r += y; + r = x % y; if (r > ((y-1)/2)) r -= y; (unsigned) + r = x % y; d = x/y; if (r > ((y-1)/2)) if ((x ^ y) >= 0) d++; else d--; + r = x % y; d = x/y; if (r > ((y-1)/2)) d++; (unsigned) + Inputs: + VECTYPE: Vector type of the operands + STMT_VINFO: Statement where pattern begins + RHS_CODE: Should either be FLOOR_MOD_EXPR or FLOOR_DIV_EXPR + Q: The quotient of division + R: Remainder of division + OPRDN0/OPRND1: Actual operands involved + ITYPE: tree type of oprnd0 + Output: + NULL if vectorization not possible + Gimple statement based on rhs_code +*/ +static gimple * +add_code_for_floorceilround_divmod (tree vectype, vec_info *vinfo, + stmt_vec_info stmt_vinfo, + enum tree_code rhs_code, tree q, tree r, + tree oprnd0, tree oprnd1, tree itype) +{ + gimple *def_stmt; + tree mask_vectype = truth_type_for (vectype); + if (!mask_vectype) + return NULL; + tree bool_cond; + bool unsigned_p = TYPE_UNSIGNED (itype); + + switch (rhs_code) + { + case FLOOR_MOD_EXPR: + case FLOOR_DIV_EXPR: + case CEIL_MOD_EXPR: + case CEIL_DIV_EXPR: + { + if (!target_has_vecop_for_code (NEGATE_EXPR, vectype) + || !target_has_vecop_for_code (BIT_XOR_EXPR, vectype) + || !target_has_vecop_for_code (BIT_IOR_EXPR, vectype) + || !target_has_vecop_for_code (PLUS_EXPR, vectype) + || !target_has_vecop_for_code (MINUS_EXPR, vectype) + || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR) + || !expand_vec_cond_expr_p (vectype, mask_vectype)) + return NULL; + if (unsigned_p) + { + gcc_assert (rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR); + + if (!expand_vec_cmp_expr_p (vectype, mask_vectype, GT_EXPR)) + return NULL; + bool is_mod = rhs_code == CEIL_MOD_EXPR; + // r > 0 + bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL); + def_stmt = gimple_build_assign (bool_cond, GT_EXPR, r, + build_int_cst (itype, 0)); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype, + itype); + + // (r > 0) ? y : 0 (mod) + // (r > 0) ? 1 : 0 (ceil) + tree extr_cond = vect_recog_temp_ssa_var (itype, NULL); + def_stmt + = gimple_build_assign (extr_cond, COND_EXPR, bool_cond, + is_mod ? oprnd1 : build_int_cst (itype, 1), + build_int_cst (itype, 0)); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); + + // r -= (r > 0) ? y : 0 (mod) + // d += (x^y < 0 && r) ? -1 : 0 (ceil) + tree result = vect_recog_temp_ssa_var (itype, NULL); + return gimple_build_assign (result, is_mod ? MINUS_EXPR : PLUS_EXPR, + is_mod ? r : q, extr_cond); + } + else + { + bool ceil_p + = (rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR); + if (ceil_p && !target_has_vecop_for_code (BIT_NOT_EXPR, vectype)) + return NULL; + // x ^ y + tree xort = vect_recog_temp_ssa_var (itype, NULL); + def_stmt = gimple_build_assign (xort, BIT_XOR_EXPR, oprnd0, oprnd1); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); + + tree cond_reg = xort; + // ~(x ^ y) (ceil) + if (ceil_p) + { + cond_reg = vect_recog_temp_ssa_var (itype, NULL); + def_stmt = gimple_build_assign (cond_reg, BIT_NOT_EXPR, xort); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); + } + + // -r + tree negate_r = vect_recog_temp_ssa_var (itype, NULL); + def_stmt = gimple_build_assign (negate_r, NEGATE_EXPR, r); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); + + // r | -r , sign bit is set if r!=0 + tree r_or_negr = vect_recog_temp_ssa_var (itype, NULL); + def_stmt + = gimple_build_assign (r_or_negr, BIT_IOR_EXPR, r, negate_r); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); + + // (x ^ y) & (r | -r) + // ~(x ^ y) & (r | -r) (ceil) + tree r_or_negr_and_xor = vect_recog_temp_ssa_var (itype, NULL); + def_stmt = gimple_build_assign (r_or_negr_and_xor, BIT_AND_EXPR, + r_or_negr, cond_reg); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); + + // (x ^ y) & (r | -r) < 0 which is equivalent to (x^y < 0 && r!=0) + bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL); + def_stmt + = gimple_build_assign (bool_cond, LT_EXPR, r_or_negr_and_xor, + build_int_cst (itype, 0)); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype, + itype); + + // (x^y < 0 && r) ? y : 0 (mod) + // (x^y < 0 && r) ? -1 : 0 (div) + bool is_mod + = (rhs_code == FLOOR_MOD_EXPR || rhs_code == CEIL_MOD_EXPR); + tree extr_cond = vect_recog_temp_ssa_var (itype, NULL); + def_stmt = gimple_build_assign (extr_cond, COND_EXPR, bool_cond, + is_mod ? oprnd1 + : build_int_cst (itype, -1), + build_int_cst (itype, 0)); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); + + // r += (x ^ y < 0 && r) ? y : 0 (floor mod) + // d += (x^y < 0 && r) ? -1 : 0 (floor div) + // r -= (x ^ y < 0 && r) ? y : 0 (ceil mod) + // d -= (x^y < 0 && r) ? -1 : 0 (ceil div) + tree result = vect_recog_temp_ssa_var (itype, NULL); + return gimple_build_assign (result, + (rhs_code == FLOOR_MOD_EXPR + || rhs_code == FLOOR_DIV_EXPR) + ? PLUS_EXPR + : MINUS_EXPR, + is_mod ? r : q, extr_cond); + } + } + case ROUND_MOD_EXPR: + case ROUND_DIV_EXPR: + { + if (!target_has_vecop_for_code (BIT_AND_EXPR, vectype) + || !target_has_vecop_for_code (PLUS_EXPR, vectype) + || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR) + || !expand_vec_cmp_expr_p (vectype, mask_vectype, GT_EXPR) + || !expand_vec_cond_expr_p (vectype, mask_vectype)) + return NULL; + + bool is_mod = rhs_code == ROUND_MOD_EXPR; + HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1); + unsigned HOST_WIDE_INT abs_d + = (d >= 0 ? (unsigned HOST_WIDE_INT) d : -(unsigned HOST_WIDE_INT) d); + unsigned HOST_WIDE_INT mid_d = (abs_d - 1) >> 1; + if (!unsigned_p) + { + // check availibility of abs expression for vector + if (!target_has_vecop_for_code (ABS_EXPR, vectype)) + return NULL; + // abs (r) + tree abs_r = vect_recog_temp_ssa_var (itype, NULL); + def_stmt = gimple_build_assign (abs_r, ABS_EXPR, r); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); + + // abs (r) > (abs (y-1) >> 1) + tree round_p = vect_recog_temp_ssa_var (boolean_type_node, NULL); + def_stmt = gimple_build_assign (round_p, GT_EXPR, abs_r, + build_int_cst (itype, mid_d)); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype, + itype); + + // x ^ y + tree cond_reg = vect_recog_temp_ssa_var (itype, NULL); + def_stmt + = gimple_build_assign (cond_reg, BIT_XOR_EXPR, oprnd0, oprnd1); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); + + // x ^ y < 0 + bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL); + def_stmt = gimple_build_assign (bool_cond, LT_EXPR, cond_reg, + build_int_cst (itype, 0)); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype, + itype); + + // x ^ y < 0 ? y : -y (mod) + // x ^ y < 0 ? -1 : 1 (div) + tree val1 = vect_recog_temp_ssa_var (itype, NULL); + def_stmt + = gimple_build_assign (val1, COND_EXPR, bool_cond, + build_int_cst (itype, is_mod ? d : -1), + build_int_cst (itype, is_mod ? -d : 1)); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); + int precision = TYPE_PRECISION (itype); + wide_int wmask = wi::mask (precision, false, precision); + + // abs (r) > (abs (y-1) >> 1) ? 0xffffffff : 0 + tree val2 = vect_recog_temp_ssa_var (itype, NULL); + def_stmt = gimple_build_assign (val2, COND_EXPR, round_p, + wide_int_to_tree (itype, wmask), + build_int_cst (itype, 0)); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); + + tree fval = vect_recog_temp_ssa_var (itype, NULL); + def_stmt = gimple_build_assign (fval, BIT_AND_EXPR, val1, val2); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); + + tree result = vect_recog_temp_ssa_var (itype, NULL); + return gimple_build_assign (result, PLUS_EXPR, is_mod ? r : q, + fval); + } + else + { + // r > (y-1 >> 1) + tree round_p = vect_recog_temp_ssa_var (boolean_type_node, NULL); + def_stmt = gimple_build_assign (round_p, GT_EXPR, r, + build_int_cst (itype, mid_d)); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype, + itype); + + // (r > (y-1)>>1) ? -d : 1 + tree val2 = vect_recog_temp_ssa_var (itype, NULL); + def_stmt + = gimple_build_assign (val2, COND_EXPR, round_p, + build_int_cst (itype, is_mod ? -d : 1), + build_int_cst (itype, 0)); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); + + tree result = vect_recog_temp_ssa_var (itype, NULL); + return gimple_build_assign (result, PLUS_EXPR, is_mod ? r : q, + val2); + } + } + default: + return NULL; + } +} + /* Detect a signed division by a constant that wouldn't be otherwise vectorized: @@ -4882,7 +5157,8 @@ vect_recog_divmod_pattern (vec_info *vinfo, { gimple *last_stmt = stmt_vinfo->stmt; tree oprnd0, oprnd1, vectype, itype, cond; - gimple *pattern_stmt, *def_stmt; + gimple *pattern_stmt = NULL; + gimple *def_stmt = NULL; enum tree_code rhs_code; optab optab; tree q, cst; @@ -4899,6 +5175,12 @@ vect_recog_divmod_pattern (vec_info *vinfo, case TRUNC_DIV_EXPR: case EXACT_DIV_EXPR: case TRUNC_MOD_EXPR: + case FLOOR_MOD_EXPR: + case FLOOR_DIV_EXPR: + case CEIL_MOD_EXPR: + case CEIL_DIV_EXPR: + case ROUND_MOD_EXPR: + case ROUND_DIV_EXPR: break; default: return NULL; @@ -4930,9 +5212,16 @@ vect_recog_divmod_pattern (vec_info *vinfo, } prec = TYPE_PRECISION (itype); + + bool is_flclrd_moddiv_p + = rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR + || rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR + || rhs_code == ROUND_MOD_EXPR || rhs_code == ROUND_DIV_EXPR; if (integer_pow2p (oprnd1)) { - if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1) + if ((TYPE_UNSIGNED (itype) + && (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR)) + || tree_int_cst_sgn (oprnd1) != 1) return NULL; /* Pattern detected. */ @@ -4949,18 +5238,27 @@ vect_recog_divmod_pattern (vec_info *vinfo, tree var_div = vect_recog_temp_ssa_var (itype, NULL); gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift); gimple_call_set_lhs (div_stmt, var_div); - - if (rhs_code == TRUNC_MOD_EXPR) + if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p) { append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt); + tree t1 = vect_recog_temp_ssa_var (itype, NULL); def_stmt - = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), - LSHIFT_EXPR, var_div, shift); + = gimple_build_assign (t1, LSHIFT_EXPR, var_div, shift); append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), - MINUS_EXPR, oprnd0, - gimple_assign_lhs (def_stmt)); + MINUS_EXPR, oprnd0, t1); + if (is_flclrd_moddiv_p) + { + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt); + pattern_stmt + = add_code_for_floorceilround_divmod (vectype, vinfo, + stmt_vinfo, rhs_code, + var_div, t1, oprnd0, + oprnd1, itype); + if (pattern_stmt == NULL) + return NULL; + } } else pattern_stmt = div_stmt; @@ -4974,8 +5272,12 @@ vect_recog_divmod_pattern (vec_info *vinfo, build_int_cst (itype, 0)); append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, truth_type_for (vectype), itype); + tree div_result = NULL_TREE; if (rhs_code == TRUNC_DIV_EXPR - || rhs_code == EXACT_DIV_EXPR) + || rhs_code == EXACT_DIV_EXPR + || rhs_code == FLOOR_DIV_EXPR + || rhs_code == CEIL_DIV_EXPR + || rhs_code == ROUND_DIV_EXPR) { tree var = vect_recog_temp_ssa_var (itype, NULL); tree shift; @@ -4992,12 +5294,17 @@ vect_recog_divmod_pattern (vec_info *vinfo, append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); shift = build_int_cst (itype, tree_log2 (oprnd1)); + div_result = vect_recog_temp_ssa_var (itype, NULL); pattern_stmt - = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), - RSHIFT_EXPR, var, shift); + = gimple_build_assign (div_result, RSHIFT_EXPR, var, shift); } - else + if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p) { + if (rhs_code == FLOOR_DIV_EXPR + || rhs_code == CEIL_DIV_EXPR + || rhs_code == ROUND_DIV_EXPR) + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt); + tree signmask; if (compare_tree_int (oprnd1, 2) == 0) { @@ -5042,10 +5349,21 @@ vect_recog_divmod_pattern (vec_info *vinfo, build_int_cst (itype, 1))); append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); + tree r = vect_recog_temp_ssa_var (itype, NULL); pattern_stmt - = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), - MINUS_EXPR, gimple_assign_lhs (def_stmt), + = gimple_build_assign (r, MINUS_EXPR, gimple_assign_lhs (def_stmt), signmask); + if (is_flclrd_moddiv_p) + { + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt); + pattern_stmt + = add_code_for_floorceilround_divmod (vectype, vinfo, + stmt_vinfo, rhs_code, + div_result, r, oprnd0, + oprnd1, itype); + if (pattern_stmt == NULL) + return NULL; + } } return pattern_stmt; @@ -5352,7 +5670,7 @@ vect_recog_divmod_pattern (vec_info *vinfo, } } - if (rhs_code == TRUNC_MOD_EXPR) + if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p) { tree r, t1; @@ -5367,6 +5685,17 @@ vect_recog_divmod_pattern (vec_info *vinfo, r = vect_recog_temp_ssa_var (itype, NULL); pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1); + + if (is_flclrd_moddiv_p) + { + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt); + pattern_stmt + = add_code_for_floorceilround_divmod (vectype, vinfo, stmt_vinfo, + rhs_code, q, r, oprnd0, oprnd1, + itype); + if (pattern_stmt == NULL) + return NULL; + } } /* Pattern detected. */ @@ -7188,14 +7517,17 @@ vect_mark_pattern_stmts (vec_info *vinfo, break; } /* Try harder to find a mid-entry into an earlier pattern - sequence. This means that the initial 'lookfor' was + sequence. Likewise an entry to a stmt skipping a conversion + on an input. This means that the initial 'lookfor' was bogus. */ if (!found) { for (unsigned i = 0; i < op.num_ops; ++i) if (TREE_CODE (op.ops[i]) == SSA_NAME) if (auto def = vinfo->lookup_def (op.ops[i])) - if (vect_is_reduction (def)) + if (vect_is_reduction (def) + || (is_a <gphi *> (def->stmt) + && STMT_VINFO_REDUC_DEF (def) != NULL)) { STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i; lookfor = gimple_get_lhs (s); diff --git a/gcc/tree.cc b/gcc/tree.cc index 966da80..18abd1f 100644 --- a/gcc/tree.cc +++ b/gcc/tree.cc @@ -75,6 +75,7 @@ along with GCC; see the file COPYING3. If not see #include "dfp.h" #include "asan.h" #include "ubsan.h" +#include "attr-callback.h" /* Names of tree components. Used for printing out the tree and error messages. */ @@ -10000,7 +10001,15 @@ set_call_expr_flags (tree decl, int flags) DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("expected_throw"), NULL, DECL_ATTRIBUTES (decl)); - /* Looping const or pure is implied by noreturn. + + if (flags & ECF_CB_1_2) + { + tree attr = callback_build_attr (1, 1, 2); + TREE_CHAIN (attr) = DECL_ATTRIBUTES (decl); + DECL_ATTRIBUTES (decl) = attr; + } + + /* Looping const or pure is implied by noreturn. There is currently no way to declare looping const or looping pure alone. */ gcc_assert (!(flags & ECF_LOOPING_CONST_OR_PURE) || ((flags & ECF_NORETURN) && (flags & (ECF_CONST | ECF_PURE)))); diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 5c43bc0..c292b3f 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,9 @@ +2025-10-17 Tomasz KamiĆski <tkaminsk@redhat.com> + + * include/bits/atomic_base.h + (__atomic_ref_base::_S_required_alignment): Renamed from... + (__atomic_ref_base::_S_required_aligment): Renamed. + 2025-10-16 Jonathan Wakely <jwakely@redhat.com> * include/std/stacktrace diff --git a/libstdc++-v3/include/bits/atomic_base.h b/libstdc++-v3/include/bits/atomic_base.h index 7e4ad2b..ccea132 100644 --- a/libstdc++-v3/include/bits/atomic_base.h +++ b/libstdc++-v3/include/bits/atomic_base.h @@ -1535,7 +1535,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } static consteval int - _S_required_aligment() + _S_required_alignment() { if constexpr (is_floating_point_v<_Vt> || is_pointer_v<_Vt>) return __alignof__(_Vt); @@ -1555,7 +1555,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION static_assert(is_always_lock_free || !is_volatile_v<_Tp>, "atomic operations on volatile T must be lock-free"); - static constexpr size_t required_alignment = _S_required_aligment(); + static constexpr size_t required_alignment = _S_required_alignment(); __atomic_ref_base() = delete; __atomic_ref_base& operator=(const __atomic_ref_base&) = delete; |