From 03ca5c55f9d12eba4f4adb1dfaddfb079cbe6b42 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Wed, 25 Oct 2017 00:16:17 +0000 Subject: Daily bump. From-SVN: r254066 --- gcc/DATESTAMP | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 39319a6..7e02f03 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20171024 +20171025 -- cgit v1.1 From 57904e87b934e9dd8f94e9a0c4b3e10f3a4863ec Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 25 Oct 2017 07:08:12 +0000 Subject: tree-ssa-pre.c (need_eh_cleanup, [...]): Move inside... 2017-10-25 Richard Biener * tree-ssa-pre.c (need_eh_cleanup, need_ab_cleanup, el_to_remove, el_to_fixup, el_todo, el_avail, el_avail_stack, eliminate_avail, eliminate_push_avail, eliminate_insert): Move inside... (class eliminate_dom_walker): ... this class in preparation of move. (fini_eliminate): Remove by merging with ... (eliminate): ... this function. Adjust for class changes. (pass_pre::execute): Remove fini_eliminate call. (pass_fre::execute): Likewise. From-SVN: r254068 --- gcc/ChangeLog | 12 ++++ gcc/tree-ssa-pre.c | 178 +++++++++++++++++++++++++---------------------------- 2 files changed, 95 insertions(+), 95 deletions(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2f750ac..9b749af 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2017-10-25 Richard Biener + + * tree-ssa-pre.c (need_eh_cleanup, need_ab_cleanup, el_to_remove, + el_to_fixup, el_todo, el_avail, el_avail_stack, eliminate_avail, + eliminate_push_avail, eliminate_insert): Move inside... + (class eliminate_dom_walker): ... this class in preparation + of move. + (fini_eliminate): Remove by merging with ... + (eliminate): ... this function. Adjust for class changes. + (pass_pre::execute): Remove fini_eliminate call. + (pass_fre::execute): Likewise. + 2017-10-24 Jakub Jelinek PR target/82460 diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c index 7bf8701..ad7a0f1 100644 --- a/gcc/tree-ssa-pre.c +++ b/gcc/tree-ssa-pre.c @@ -551,12 +551,6 @@ static unsigned int get_expr_value_id (pre_expr); static object_allocator bitmap_set_pool ("Bitmap sets"); static bitmap_obstack grand_bitmap_obstack; -/* Set of blocks with statements that have had their EH properties changed. */ -static bitmap need_eh_cleanup; - -/* Set of blocks with statements that have had their AB properties changed. */ -static bitmap need_ab_cleanup; - /* A three tuple {e, pred, v} used to cache phi translations in the phi_translate_table. */ @@ -4042,27 +4036,61 @@ compute_avail (void) free (worklist); } +class eliminate_dom_walker : public dom_walker +{ +public: + eliminate_dom_walker (cdi_direction direction, bool do_pre_); + ~eliminate_dom_walker (); + + virtual edge before_dom_children (basic_block); + virtual void after_dom_children (basic_block); + + tree eliminate_avail (tree op); + void eliminate_push_avail (tree op); + tree eliminate_insert (gimple_stmt_iterator *gsi, tree val); + + bool do_pre; + unsigned int el_todo; + + /* Blocks with statements that have had their EH properties changed. */ + bitmap need_eh_cleanup; + + /* Blocks with statements that have had their AB properties changed. */ + bitmap need_ab_cleanup; -/* Local state for the eliminate domwalk. */ -static vec el_to_remove; -static vec el_to_fixup; -static unsigned int el_todo; -static vec el_avail; -static vec el_avail_stack; + auto_vec to_remove; + auto_vec to_fixup; + auto_vec avail; + auto_vec avail_stack; +}; + +eliminate_dom_walker::eliminate_dom_walker (cdi_direction direction, + bool do_pre_) + : dom_walker (direction), do_pre (do_pre_), el_todo (0) +{ + need_eh_cleanup = BITMAP_ALLOC (NULL); + need_ab_cleanup = BITMAP_ALLOC (NULL); +} + +eliminate_dom_walker::~eliminate_dom_walker () +{ + BITMAP_FREE (need_eh_cleanup); + BITMAP_FREE (need_ab_cleanup); +} /* Return a leader for OP that is available at the current point of the eliminate domwalk. */ -static tree -eliminate_avail (tree op) +tree +eliminate_dom_walker::eliminate_avail (tree op) { tree valnum = VN_INFO (op)->valnum; if (TREE_CODE (valnum) == SSA_NAME) { if (SSA_NAME_IS_DEFAULT_DEF (valnum)) return valnum; - if (el_avail.length () > SSA_NAME_VERSION (valnum)) - return el_avail[SSA_NAME_VERSION (valnum)]; + if (avail.length () > SSA_NAME_VERSION (valnum)) + return avail[SSA_NAME_VERSION (valnum)]; } else if (is_gimple_min_invariant (valnum)) return valnum; @@ -4071,27 +4099,27 @@ eliminate_avail (tree op) /* At the current point of the eliminate domwalk make OP available. */ -static void -eliminate_push_avail (tree op) +void +eliminate_dom_walker::eliminate_push_avail (tree op) { tree valnum = VN_INFO (op)->valnum; if (TREE_CODE (valnum) == SSA_NAME) { - if (el_avail.length () <= SSA_NAME_VERSION (valnum)) - el_avail.safe_grow_cleared (SSA_NAME_VERSION (valnum) + 1); + if (avail.length () <= SSA_NAME_VERSION (valnum)) + avail.safe_grow_cleared (SSA_NAME_VERSION (valnum) + 1); tree pushop = op; - if (el_avail[SSA_NAME_VERSION (valnum)]) - pushop = el_avail[SSA_NAME_VERSION (valnum)]; - el_avail_stack.safe_push (pushop); - el_avail[SSA_NAME_VERSION (valnum)] = op; + if (avail[SSA_NAME_VERSION (valnum)]) + pushop = avail[SSA_NAME_VERSION (valnum)]; + avail_stack.safe_push (pushop); + avail[SSA_NAME_VERSION (valnum)] = op; } } /* Insert the expression recorded by SCCVN for VAL at *GSI. Returns the leader for the expression if insertion was successful. */ -static tree -eliminate_insert (gimple_stmt_iterator *gsi, tree val) +tree +eliminate_dom_walker::eliminate_insert (gimple_stmt_iterator *gsi, tree val) { /* We can insert a sequence with a single assignment only. */ gimple_seq stmts = VN_INFO (val)->expr; @@ -4171,17 +4199,7 @@ eliminate_insert (gimple_stmt_iterator *gsi, tree val) return res; } -class eliminate_dom_walker : public dom_walker -{ -public: - eliminate_dom_walker (cdi_direction direction, bool do_pre_) - : dom_walker (direction), do_pre (do_pre_) {} - virtual edge before_dom_children (basic_block); - virtual void after_dom_children (basic_block); - - bool do_pre; -}; /* Perform elimination for the basic-block B during the domwalk. */ @@ -4189,7 +4207,7 @@ edge eliminate_dom_walker::before_dom_children (basic_block b) { /* Mark new bb. */ - el_avail_stack.safe_push (NULL_TREE); + avail_stack.safe_push (NULL_TREE); /* Skip unreachable blocks marked unreachable during the SCCVN domwalk. */ edge_iterator ei; @@ -4236,7 +4254,7 @@ eliminate_dom_walker::before_dom_children (basic_block b) if (may_propagate_copy (res, sprime)) { /* Mark the PHI for removal. */ - el_to_remove.safe_push (phi); + to_remove.safe_push (phi); gsi_next (&gsi); continue; } @@ -4378,7 +4396,7 @@ eliminate_dom_walker::before_dom_children (basic_block b) if (may_propagate_copy (lhs, sprime)) { /* Mark it for removal. */ - el_to_remove.safe_push (stmt); + to_remove.safe_push (stmt); /* ??? Don't count copy/constant propagations. */ if (gimple_assign_single_p (stmt) @@ -4493,7 +4511,7 @@ eliminate_dom_walker::before_dom_children (basic_block b) } /* Queue stmt for removal. */ - el_to_remove.safe_push (stmt); + to_remove.safe_push (stmt); continue; } } @@ -4601,7 +4619,7 @@ eliminate_dom_walker::before_dom_children (basic_block b) stmt = gsi_stmt (gsi); /* In case we folded the stmt away schedule the NOP for removal. */ if (gimple_nop_p (stmt)) - el_to_remove.safe_push (stmt); + to_remove.safe_push (stmt); } /* Visit indirect calls and turn them into direct calls if @@ -4668,7 +4686,7 @@ eliminate_dom_walker::before_dom_children (basic_block b) is needed to fix up the noreturn call. */ if (!was_noreturn && is_gimple_call (stmt) && gimple_call_noreturn_p (stmt)) - el_to_fixup.safe_push (stmt); + to_fixup.safe_push (stmt); /* When changing a condition or switch into one we know what edge will be executed, schedule a cfg cleanup. */ if ((gimple_code (stmt) == GIMPLE_COND @@ -4734,14 +4752,14 @@ void eliminate_dom_walker::after_dom_children (basic_block) { tree entry; - while ((entry = el_avail_stack.pop ()) != NULL_TREE) + while ((entry = avail_stack.pop ()) != NULL_TREE) { tree valnum = VN_INFO (entry)->valnum; - tree old = el_avail[SSA_NAME_VERSION (valnum)]; + tree old = avail[SSA_NAME_VERSION (valnum)]; if (old == entry) - el_avail[SSA_NAME_VERSION (valnum)] = NULL_TREE; + avail[SSA_NAME_VERSION (valnum)] = NULL_TREE; else - el_avail[SSA_NAME_VERSION (valnum)] = entry; + avail[SSA_NAME_VERSION (valnum)] = entry; } } @@ -4750,40 +4768,18 @@ eliminate_dom_walker::after_dom_children (basic_block) static unsigned int eliminate (bool do_pre) { - need_eh_cleanup = BITMAP_ALLOC (NULL); - need_ab_cleanup = BITMAP_ALLOC (NULL); - - el_to_remove.create (0); - el_to_fixup.create (0); - el_todo = 0; - el_avail.create (num_ssa_names); - el_avail_stack.create (0); + eliminate_dom_walker el (CDI_DOMINATORS, do_pre); + el.avail.reserve (num_ssa_names); - eliminate_dom_walker (CDI_DOMINATORS, - do_pre).walk (cfun->cfg->x_entry_block_ptr); - - el_avail.release (); - el_avail_stack.release (); - - return el_todo; -} - -/* Perform CFG cleanups made necessary by elimination. */ - -static unsigned -fini_eliminate (void) -{ - gimple_stmt_iterator gsi; - gimple *stmt; - unsigned todo = 0; + el.walk (cfun->cfg->x_entry_block_ptr); /* We cannot remove stmts during BB walk, especially not release SSA names there as this confuses the VN machinery. The stmts ending - up in el_to_remove are either stores or simple copies. + up in to_remove are either stores or simple copies. Remove stmts in reverse order to make debug stmt creation possible. */ - while (!el_to_remove.is_empty ()) + while (!el.to_remove.is_empty ()) { - stmt = el_to_remove.pop (); + gimple *stmt = el.to_remove.pop (); if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -4791,7 +4787,7 @@ fini_eliminate (void) print_gimple_stmt (dump_file, stmt, 0, 0); } - gsi = gsi_for_stmt (stmt); + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); if (gimple_code (stmt) == GIMPLE_PHI) remove_phi_node (&gsi, true); else @@ -4799,24 +4795,23 @@ fini_eliminate (void) basic_block bb = gimple_bb (stmt); unlink_stmt_vdef (stmt); if (gsi_remove (&gsi, true)) - bitmap_set_bit (need_eh_cleanup, bb->index); + bitmap_set_bit (el.need_eh_cleanup, bb->index); if (is_gimple_call (stmt) && stmt_can_make_abnormal_goto (stmt)) - bitmap_set_bit (need_ab_cleanup, bb->index); + bitmap_set_bit (el.need_ab_cleanup, bb->index); release_defs (stmt); } /* Removing a stmt may expose a forwarder block. */ - todo |= TODO_cleanup_cfg; + el.el_todo |= TODO_cleanup_cfg; } - el_to_remove.release (); /* Fixup stmts that became noreturn calls. This may require splitting blocks and thus isn't possible during the dominator walk. Do this in reverse order so we don't inadvertedly remove a stmt we want to fixup by visiting a dominating now noreturn call first. */ - while (!el_to_fixup.is_empty ()) + while (!el.to_fixup.is_empty ()) { - stmt = el_to_fixup.pop (); + gimple *stmt = el.to_fixup.pop (); if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -4825,25 +4820,21 @@ fini_eliminate (void) } if (fixup_noreturn_call (stmt)) - todo |= TODO_cleanup_cfg; + el.el_todo |= TODO_cleanup_cfg; } - el_to_fixup.release (); - bool do_eh_cleanup = !bitmap_empty_p (need_eh_cleanup); - bool do_ab_cleanup = !bitmap_empty_p (need_ab_cleanup); + bool do_eh_cleanup = !bitmap_empty_p (el.need_eh_cleanup); + bool do_ab_cleanup = !bitmap_empty_p (el.need_ab_cleanup); if (do_eh_cleanup) - gimple_purge_all_dead_eh_edges (need_eh_cleanup); + gimple_purge_all_dead_eh_edges (el.need_eh_cleanup); if (do_ab_cleanup) - gimple_purge_all_dead_abnormal_call_edges (need_ab_cleanup); - - BITMAP_FREE (need_eh_cleanup); - BITMAP_FREE (need_ab_cleanup); + gimple_purge_all_dead_abnormal_call_edges (el.need_ab_cleanup); if (do_eh_cleanup || do_ab_cleanup) - todo |= TODO_cleanup_cfg; - return todo; + el.el_todo |= TODO_cleanup_cfg; + return el.el_todo; } /* Cheap DCE of a known set of possibly dead stmts. @@ -5043,7 +5034,6 @@ pass_pre::execute (function *fun) clear_expression_ids (); scev_finalize (); - todo |= fini_eliminate (); remove_dead_inserted_code (); fini_pre (); loop_optimizer_finalize (); @@ -5121,8 +5111,6 @@ pass_fre::execute (function *fun) /* Remove all the redundant expressions. */ todo |= eliminate (false); - todo |= fini_eliminate (); - scc_vn_restore_ssa_info (); free_scc_vn (); -- cgit v1.1 From bc1a75dda26988781847f00cfc5283eb13418106 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 25 Oct 2017 10:05:58 +0200 Subject: re PR libstdc++/81706 (std::sin vectorization bug) PR libstdc++/81706 * attribs.c (attribute_value_equal): Use omp_declare_simd_clauses_equal for comparison of OMP_CLAUSEs regardless of flag_openmp{,_simd}. (duplicate_one_attribute, copy_attributes_to_builtin): New functions. * attribs.h (duplicate_one_attribute, copy_attributes_to_builtin): New declarations. * c-decl.c (merge_decls): Copy "omp declare simd" attributes from newdecl to corresponding __builtin_ if any. * decl.c (duplicate_decls): Copy "omp declare simd" attributes from newdecl to corresponding __builtin_ if any. * gcc.target/i386/pr81706.c: New test. * g++.dg/ext/pr81706.C: New test. From-SVN: r254069 --- gcc/ChangeLog | 9 +++++++ gcc/attribs.c | 42 +++++++++++++++++++++++++++++++-- gcc/attribs.h | 10 ++++++++ gcc/c/ChangeLog | 6 +++++ gcc/c/c-decl.c | 2 ++ gcc/cp/ChangeLog | 6 +++++ gcc/cp/decl.c | 2 ++ gcc/testsuite/ChangeLog | 6 +++++ gcc/testsuite/g++.dg/ext/pr81706.C | 32 +++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr81706.c | 32 +++++++++++++++++++++++++ 10 files changed, 145 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/g++.dg/ext/pr81706.C create mode 100644 gcc/testsuite/gcc.target/i386/pr81706.c (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9b749af..7835159 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2017-10-25 Jakub Jelinek + + PR libstdc++/81706 + * attribs.c (attribute_value_equal): Use omp_declare_simd_clauses_equal + for comparison of OMP_CLAUSEs regardless of flag_openmp{,_simd}. + (duplicate_one_attribute, copy_attributes_to_builtin): New functions. + * attribs.h (duplicate_one_attribute, copy_attributes_to_builtin): New + declarations. + 2017-10-25 Richard Biener * tree-ssa-pre.c (need_eh_cleanup, need_ab_cleanup, el_to_remove, diff --git a/gcc/attribs.c b/gcc/attribs.c index ed76a8d..809f4c3 100644 --- a/gcc/attribs.c +++ b/gcc/attribs.c @@ -1125,9 +1125,9 @@ attribute_value_equal (const_tree attr1, const_tree attr2) TREE_VALUE (attr2)) == 1); } - if ((flag_openmp || flag_openmp_simd) - && TREE_VALUE (attr1) && TREE_VALUE (attr2) + if (TREE_VALUE (attr1) && TREE_CODE (TREE_VALUE (attr1)) == OMP_CLAUSE + && TREE_VALUE (attr2) && TREE_CODE (TREE_VALUE (attr2)) == OMP_CLAUSE) return omp_declare_simd_clauses_equal (TREE_VALUE (attr1), TREE_VALUE (attr2)); @@ -1322,6 +1322,44 @@ merge_decl_attributes (tree olddecl, tree newdecl) DECL_ATTRIBUTES (newdecl)); } +/* Duplicate all attributes with name NAME in ATTR list to *ATTRS if + they are missing there. */ + +void +duplicate_one_attribute (tree *attrs, tree attr, const char *name) +{ + attr = lookup_attribute (name, attr); + if (!attr) + return; + tree a = lookup_attribute (name, *attrs); + while (attr) + { + tree a2; + for (a2 = a; a2; a2 = lookup_attribute (name, TREE_CHAIN (a2))) + if (attribute_value_equal (attr, a2)) + break; + if (!a2) + { + a2 = copy_node (attr); + TREE_CHAIN (a2) = *attrs; + *attrs = a2; + } + attr = lookup_attribute (name, TREE_CHAIN (attr)); + } +} + +/* Duplicate all attributes from user DECL to the corresponding + builtin that should be propagated. */ + +void +copy_attributes_to_builtin (tree decl) +{ + tree b = builtin_decl_explicit (DECL_FUNCTION_CODE (decl)); + if (b) + duplicate_one_attribute (&DECL_ATTRIBUTES (b), + DECL_ATTRIBUTES (decl), "omp declare simd"); +} + #if TARGET_DLLIMPORT_DECL_ATTRIBUTES /* Specialization of merge_decl_attributes for various Windows targets. diff --git a/gcc/attribs.h b/gcc/attribs.h index 65e002c..f4bfe03 100644 --- a/gcc/attribs.h +++ b/gcc/attribs.h @@ -77,6 +77,16 @@ extern tree remove_attribute (const char *, tree); extern tree merge_attributes (tree, tree); +/* Duplicate all attributes with name NAME in ATTR list to *ATTRS if + they are missing there. */ + +extern void duplicate_one_attribute (tree *, tree, const char *); + +/* Duplicate all attributes from user DECL to the corresponding + builtin that should be propagated. */ + +extern void copy_attributes_to_builtin (tree); + /* Given two Windows decl attributes lists, possibly including dllimport, return a list of their union . */ extern tree merge_dllimport_decl_attributes (tree, tree); diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index c260f62..b4fde0d 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,9 @@ +2017-10-25 Jakub Jelinek + + PR libstdc++/81706 + * c-decl.c (merge_decls): Copy "omp declare simd" attributes from + newdecl to corresponding __builtin_ if any. + 2017-10-24 Paolo Carlini PR c++/82466 diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c index 5c472e6..90f0729 100644 --- a/gcc/c/c-decl.c +++ b/gcc/c/c-decl.c @@ -2570,6 +2570,8 @@ merge_decls (tree newdecl, tree olddecl, tree newtype, tree oldtype) set_builtin_decl_declared_p (fncode, true); break; } + + copy_attributes_to_builtin (newdecl); } } else diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 8c587a3..fdb2c9b 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,9 @@ +2017-10-25 Jakub Jelinek + + PR libstdc++/81706 + * decl.c (duplicate_decls): Copy "omp declare simd" attributes from + newdecl to corresponding __builtin_ if any. + 2017-10-24 Paolo Carlini PR c++/82466 diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index bb48099..c9c3d0a 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -2478,6 +2478,8 @@ next_arg:; break; } } + + copy_attributes_to_builtin (newdecl); } if (new_defines_function) /* If defining a function declared with other language diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 8dbf3b5..00dde9f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2017-10-25 Jakub Jelinek + + PR libstdc++/81706 + * gcc.target/i386/pr81706.c: New test. + * g++.dg/ext/pr81706.C: New test. + 2017-10-24 Jakub Jelinek PR target/82460 diff --git a/gcc/testsuite/g++.dg/ext/pr81706.C b/gcc/testsuite/g++.dg/ext/pr81706.C new file mode 100644 index 0000000..f0ed8ab --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/pr81706.C @@ -0,0 +1,32 @@ +// PR libstdc++/81706 +// { dg-do compile { target i?86-*-* x86_64-*-* } } +// { dg-options "-O3 -mavx2 -mno-avx512f" } +// { dg-final { scan-assembler "call\[^\n\r]_ZGVdN4v_cos" } } +// { dg-final { scan-assembler "call\[^\n\r]_ZGVdN4v_sin" } } + +#ifdef __cplusplus +extern "C" { +#endif +extern double cos (double) __attribute__ ((nothrow, leaf, simd ("notinbranch"))); +extern double sin (double) __attribute__ ((nothrow, leaf, simd ("notinbranch"))); +#ifdef __cplusplus +} +#endif +double p[1024] = { 1.0 }; +double q[1024] = { 1.0 }; + +void +foo (void) +{ + int i; + for (i = 0; i < 1024; i++) + p[i] = cos (q[i]); +} + +void +bar (void) +{ + int i; + for (i = 0; i < 1024; i++) + p[i] = __builtin_sin (q[i]); +} diff --git a/gcc/testsuite/gcc.target/i386/pr81706.c b/gcc/testsuite/gcc.target/i386/pr81706.c new file mode 100644 index 0000000..333fd15 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr81706.c @@ -0,0 +1,32 @@ +/* PR libstdc++/81706 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx2 -mno-avx512f" } */ +/* { dg-final { scan-assembler "call\[^\n\r]_ZGVdN4v_cos" } } */ +/* { dg-final { scan-assembler "call\[^\n\r]_ZGVdN4v_sin" } } */ + +#ifdef __cplusplus +extern "C" { +#endif +extern double cos (double) __attribute__ ((nothrow, leaf, simd ("notinbranch"))); +extern double sin (double) __attribute__ ((nothrow, leaf, simd ("notinbranch"))); +#ifdef __cplusplus +} +#endif +double p[1024] = { 1.0 }; +double q[1024] = { 1.0 }; + +void +foo (void) +{ + int i; + for (i = 0; i < 1024; i++) + p[i] = cos (q[i]); +} + +void +bar (void) +{ + int i; + for (i = 0; i < 1024; i++) + p[i] = __builtin_sin (q[i]); +} -- cgit v1.1 From 5dd9af4977aeff30b38c3b4cb68241e823013b9d Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Wed, 25 Oct 2017 09:15:30 +0000 Subject: Fix gcc.target/i386/cet-sjlj-5.c on Solaris * gcc.target/i386/cet-sjlj-5.c: Allow for emtpy user label prefix in setjmp call. From-SVN: r254070 --- gcc/testsuite/ChangeLog | 5 +++++ gcc/testsuite/gcc.target/i386/cet-sjlj-5.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 00dde9f..27aad40 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-10-25 Rainer Orth + + * gcc.target/i386/cet-sjlj-5.c: Allow for emtpy user label prefix + in setjmp call. + 2017-10-25 Jakub Jelinek PR libstdc++/81706 diff --git a/gcc/testsuite/gcc.target/i386/cet-sjlj-5.c b/gcc/testsuite/gcc.target/i386/cet-sjlj-5.c index 12ea9f4..8e54b4b 100644 --- a/gcc/testsuite/gcc.target/i386/cet-sjlj-5.c +++ b/gcc/testsuite/gcc.target/i386/cet-sjlj-5.c @@ -2,7 +2,7 @@ /* { dg-options "-O -fcf-protection -mcet" } */ /* { dg-final { scan-assembler-times "endbr32" 2 { target ia32 } } } */ /* { dg-final { scan-assembler-times "endbr64" 2 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "call _setjmp" 1 } } */ +/* { dg-final { scan-assembler-times "call _?setjmp" 1 } } */ /* { dg-final { scan-assembler-times "call longjmp" 1 } } */ #include -- cgit v1.1 From 7e65575a0efeaefe2b639d713306a9702d20f410 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Wed, 25 Oct 2017 09:43:54 +0000 Subject: Fix scan-assembler in tree-ssa/loop-1.c for nvptx 2017-10-25 Tom de Vries * gcc.dg/tree-ssa/loop-1.c: Add xfail for nvptx in scan-assembler-times line, and add nvptx-specific version. From-SVN: r254071 --- gcc/testsuite/ChangeLog | 5 +++++ gcc/testsuite/gcc.dg/tree-ssa/loop-1.c | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 27aad40..cc45364 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-10-25 Tom de Vries + + * gcc.dg/tree-ssa/loop-1.c: Add xfail for nvptx in scan-assembler-times + line, and add nvptx-specific version. + 2017-10-25 Rainer Orth * gcc.target/i386/cet-sjlj-5.c: Allow for emtpy user label prefix diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c index 0193c6e..01c37a5 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c @@ -46,7 +46,7 @@ int xxx(void) /* CRIS keeps the address in a register. */ /* m68k sometimes puts the address in a register, depending on CPU and PIC. */ -/* { dg-final { scan-assembler-times "foo" 5 { xfail hppa*-*-* ia64*-*-* sh*-*-* cris-*-* crisv32-*-* fido-*-* m68k-*-* i?86-*-mingw* i?86-*-cygwin* x86_64-*-mingw* visium-*-* } } } */ +/* { dg-final { scan-assembler-times "foo" 5 { xfail hppa*-*-* ia64*-*-* sh*-*-* cris-*-* crisv32-*-* fido-*-* m68k-*-* i?86-*-mingw* i?86-*-cygwin* x86_64-*-mingw* visium-*-* nvptx*-*-* } } } */ /* { dg-final { scan-assembler-times "foo,%r" 5 { target hppa*-*-* } } } */ /* { dg-final { scan-assembler-times "= foo" 5 { target ia64*-*-* } } } */ /* { dg-final { scan-assembler-times "call\[ \t\]*_foo" 5 { target i?86-*-mingw* i?86-*-cygwin* } } } */ @@ -55,3 +55,4 @@ int xxx(void) /* { dg-final { scan-assembler-times "Jsr \\\$r" 5 { target cris-*-* } } } */ /* { dg-final { scan-assembler-times "\[jb\]sr" 5 { target fido-*-* m68k-*-* } } } */ /* { dg-final { scan-assembler-times "bra *tr,r\[1-9\]*,r21" 5 { target visium-*-* } } } */ +/* { dg-final { scan-assembler-times "(?n)\[ \t\]call\[ \t\].*\[ \t\]foo," 5 { target nvptx*-*-* } } } */ -- cgit v1.1 From 7e252d9b9b273e95b77ffdf6650a2fc65df51b17 Mon Sep 17 00:00:00 2001 From: Paolo Carlini Date: Wed, 25 Oct 2017 09:55:21 +0000 Subject: re PR c++/71820 (ICE on valid C++ code: in arg_assoc_type, at cp/name-lookup.c:5583) 2017-10-25 Paolo Carlini PR c++/71820 * g++.dg/ext/typeof12.C: New. From-SVN: r254072 --- gcc/testsuite/ChangeLog | 5 +++++ gcc/testsuite/g++.dg/ext/typeof12.C | 11 +++++++++++ 2 files changed, 16 insertions(+) create mode 100644 gcc/testsuite/g++.dg/ext/typeof12.C (limited to 'gcc') diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index cc45364..63f9de2 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-10-25 Paolo Carlini + + PR c++/71820 + * g++.dg/ext/typeof12.C: New. + 2017-10-25 Tom de Vries * gcc.dg/tree-ssa/loop-1.c: Add xfail for nvptx in scan-assembler-times diff --git a/gcc/testsuite/g++.dg/ext/typeof12.C b/gcc/testsuite/g++.dg/ext/typeof12.C new file mode 100644 index 0000000..4ba7573 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/typeof12.C @@ -0,0 +1,11 @@ +// PR c++/71820 + +void f (void (*) (int, int)) {} + +template < typename T > void g (T x, __typeof__ x) {} // { dg-message "sorry, unimplemented: mangling" } + +int main () +{ + f (g < int >); + return 0; +} -- cgit v1.1 From a596f4970ededd424328f7789b4a304e5b3a7338 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 25 Oct 2017 09:59:39 +0000 Subject: re PR tree-optimization/82436 (465.tonto ICE in vect_get_slp_vect_defs, at tree-vect-slp.c:3410) 2017-10-25 Richard Biener PR tree-optimization/82436 * gcc.dg/torture/pr82436-2.c: New testcase. From-SVN: r254073 --- gcc/testsuite/ChangeLog | 5 ++++ gcc/testsuite/gcc.dg/torture/pr82436-2.c | 45 ++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/torture/pr82436-2.c (limited to 'gcc') diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 63f9de2..0e38439 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-10-25 Richard Biener + + PR tree-optimization/82436 + * gcc.dg/torture/pr82436-2.c: New testcase. + 2017-10-25 Paolo Carlini PR c++/71820 diff --git a/gcc/testsuite/gcc.dg/torture/pr82436-2.c b/gcc/testsuite/gcc.dg/torture/pr82436-2.c new file mode 100644 index 0000000..32eda18 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr82436-2.c @@ -0,0 +1,45 @@ +/* { dg-do compile } */ + +enum +{ + a, b, c, d, e, f, g, h, j, k +}; + +int l; +void m (short *s) +{ + short n, o, p; + float(*q)[k]; + int r, i; + while (l > 0) + r = l; + for (;;) + { + i = 0; + for (; i < r; i++) + { + { + float ab = q[i][a]; + int i = ab; + p = i; + } + ((short *) s)[0] = p; + { + float ab = q[i][b]; + int i = ab; + o = i; + } + ((short *) s)[1] = o; + { + float ab = q[i][f]; + int i = ab; + n = i; + } + ((short *) s)[2] = n; + float ab = q[i][g]; + int i = ab; + ((short *) s)[3] = i; + s = (short *) s + 4; + } + } +} -- cgit v1.1 From 5de583cc1809a49a4b38950d2fc4633e31085a33 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 25 Oct 2017 10:20:37 +0000 Subject: tree-ssa-sccvn.h (vn_eliminate): Declare. 2017-10-25 Richard Biener * tree-ssa-sccvn.h (vn_eliminate): Declare. * tree-ssa-pre.c (class eliminate_dom_walker, eliminate, class pass_fre): Move to ... * tree-ssa-sccvn.c (class eliminate_dom_walker, vn_eliminate, class pass_fre): ... here and adjust for statistics. From-SVN: r254074 --- gcc/ChangeLog | 8 + gcc/tree-ssa-pre.c | 868 +------------------------------------------------- gcc/tree-ssa-sccvn.c | 875 ++++++++++++++++++++++++++++++++++++++++++++++++++- gcc/tree-ssa-sccvn.h | 1 + 4 files changed, 884 insertions(+), 868 deletions(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7835159..c5dfcb7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2017-10-25 Richard Biener + + * tree-ssa-sccvn.h (vn_eliminate): Declare. + * tree-ssa-pre.c (class eliminate_dom_walker, eliminate, + class pass_fre): Move to ... + * tree-ssa-sccvn.c (class eliminate_dom_walker, vn_eliminate, + class pass_fre): ... here and adjust for statistics. + 2017-10-25 Jakub Jelinek PR libstdc++/81706 diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c index ad7a0f1..e4189d1 100644 --- a/gcc/tree-ssa-pre.c +++ b/gcc/tree-ssa-pre.c @@ -39,7 +39,6 @@ along with GCC; see the file COPYING3. If not see #include "gimplify.h" #include "gimple-iterator.h" #include "tree-cfg.h" -#include "tree-ssa-loop.h" #include "tree-into-ssa.h" #include "tree-dfa.h" #include "tree-ssa.h" @@ -50,9 +49,7 @@ along with GCC; see the file COPYING3. If not see #include "dbgcnt.h" #include "domwalk.h" #include "tree-ssa-propagate.h" -#include "ipa-utils.h" #include "tree-cfgcleanup.h" -#include "langhooks.h" #include "alias.h" /* Even though this file is called tree-ssa-pre.c, we actually @@ -516,9 +513,6 @@ typedef struct bb_bitmap_sets optimization PRE was able to perform. */ static struct { - /* The number of RHS computations eliminated by PRE. */ - int eliminations; - /* The number of new expressions/temporaries generated by PRE. */ int insertions; @@ -4036,807 +4030,6 @@ compute_avail (void) free (worklist); } -class eliminate_dom_walker : public dom_walker -{ -public: - eliminate_dom_walker (cdi_direction direction, bool do_pre_); - ~eliminate_dom_walker (); - - virtual edge before_dom_children (basic_block); - virtual void after_dom_children (basic_block); - - tree eliminate_avail (tree op); - void eliminate_push_avail (tree op); - tree eliminate_insert (gimple_stmt_iterator *gsi, tree val); - - bool do_pre; - unsigned int el_todo; - - /* Blocks with statements that have had their EH properties changed. */ - bitmap need_eh_cleanup; - - /* Blocks with statements that have had their AB properties changed. */ - bitmap need_ab_cleanup; - - auto_vec to_remove; - auto_vec to_fixup; - auto_vec avail; - auto_vec avail_stack; -}; - -eliminate_dom_walker::eliminate_dom_walker (cdi_direction direction, - bool do_pre_) - : dom_walker (direction), do_pre (do_pre_), el_todo (0) -{ - need_eh_cleanup = BITMAP_ALLOC (NULL); - need_ab_cleanup = BITMAP_ALLOC (NULL); -} - -eliminate_dom_walker::~eliminate_dom_walker () -{ - BITMAP_FREE (need_eh_cleanup); - BITMAP_FREE (need_ab_cleanup); -} - -/* Return a leader for OP that is available at the current point of the - eliminate domwalk. */ - -tree -eliminate_dom_walker::eliminate_avail (tree op) -{ - tree valnum = VN_INFO (op)->valnum; - if (TREE_CODE (valnum) == SSA_NAME) - { - if (SSA_NAME_IS_DEFAULT_DEF (valnum)) - return valnum; - if (avail.length () > SSA_NAME_VERSION (valnum)) - return avail[SSA_NAME_VERSION (valnum)]; - } - else if (is_gimple_min_invariant (valnum)) - return valnum; - return NULL_TREE; -} - -/* At the current point of the eliminate domwalk make OP available. */ - -void -eliminate_dom_walker::eliminate_push_avail (tree op) -{ - tree valnum = VN_INFO (op)->valnum; - if (TREE_CODE (valnum) == SSA_NAME) - { - if (avail.length () <= SSA_NAME_VERSION (valnum)) - avail.safe_grow_cleared (SSA_NAME_VERSION (valnum) + 1); - tree pushop = op; - if (avail[SSA_NAME_VERSION (valnum)]) - pushop = avail[SSA_NAME_VERSION (valnum)]; - avail_stack.safe_push (pushop); - avail[SSA_NAME_VERSION (valnum)] = op; - } -} - -/* Insert the expression recorded by SCCVN for VAL at *GSI. Returns - the leader for the expression if insertion was successful. */ - -tree -eliminate_dom_walker::eliminate_insert (gimple_stmt_iterator *gsi, tree val) -{ - /* We can insert a sequence with a single assignment only. */ - gimple_seq stmts = VN_INFO (val)->expr; - if (!gimple_seq_singleton_p (stmts)) - return NULL_TREE; - gassign *stmt = dyn_cast (gimple_seq_first_stmt (stmts)); - if (!stmt - || (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)) - && gimple_assign_rhs_code (stmt) != VIEW_CONVERT_EXPR - && gimple_assign_rhs_code (stmt) != BIT_FIELD_REF - && (gimple_assign_rhs_code (stmt) != BIT_AND_EXPR - || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST))) - return NULL_TREE; - - tree op = gimple_assign_rhs1 (stmt); - if (gimple_assign_rhs_code (stmt) == VIEW_CONVERT_EXPR - || gimple_assign_rhs_code (stmt) == BIT_FIELD_REF) - op = TREE_OPERAND (op, 0); - tree leader = TREE_CODE (op) == SSA_NAME ? eliminate_avail (op) : op; - if (!leader) - return NULL_TREE; - - tree res; - stmts = NULL; - if (gimple_assign_rhs_code (stmt) == BIT_FIELD_REF) - res = gimple_build (&stmts, BIT_FIELD_REF, - TREE_TYPE (val), leader, - TREE_OPERAND (gimple_assign_rhs1 (stmt), 1), - TREE_OPERAND (gimple_assign_rhs1 (stmt), 2)); - else if (gimple_assign_rhs_code (stmt) == BIT_AND_EXPR) - res = gimple_build (&stmts, BIT_AND_EXPR, - TREE_TYPE (val), leader, gimple_assign_rhs2 (stmt)); - else - res = gimple_build (&stmts, gimple_assign_rhs_code (stmt), - TREE_TYPE (val), leader); - if (TREE_CODE (res) != SSA_NAME - || SSA_NAME_IS_DEFAULT_DEF (res) - || gimple_bb (SSA_NAME_DEF_STMT (res))) - { - gimple_seq_discard (stmts); - - /* During propagation we have to treat SSA info conservatively - and thus we can end up simplifying the inserted expression - at elimination time to sth not defined in stmts. */ - /* But then this is a redundancy we failed to detect. Which means - res now has two values. That doesn't play well with how - we track availability here, so give up. */ - if (dump_file && (dump_flags & TDF_DETAILS)) - { - if (TREE_CODE (res) == SSA_NAME) - res = eliminate_avail (res); - if (res) - { - fprintf (dump_file, "Failed to insert expression for value "); - print_generic_expr (dump_file, val); - fprintf (dump_file, " which is really fully redundant to "); - print_generic_expr (dump_file, res); - fprintf (dump_file, "\n"); - } - } - - return NULL_TREE; - } - else - { - gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); - VN_INFO_GET (res)->valnum = val; - } - - pre_stats.insertions++; - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Inserted "); - print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (res), 0); - } - - return res; -} - - - -/* Perform elimination for the basic-block B during the domwalk. */ - -edge -eliminate_dom_walker::before_dom_children (basic_block b) -{ - /* Mark new bb. */ - avail_stack.safe_push (NULL_TREE); - - /* Skip unreachable blocks marked unreachable during the SCCVN domwalk. */ - edge_iterator ei; - edge e; - FOR_EACH_EDGE (e, ei, b->preds) - if (e->flags & EDGE_EXECUTABLE) - break; - if (! e) - return NULL; - - for (gphi_iterator gsi = gsi_start_phis (b); !gsi_end_p (gsi);) - { - gphi *phi = gsi.phi (); - tree res = PHI_RESULT (phi); - - if (virtual_operand_p (res)) - { - gsi_next (&gsi); - continue; - } - - tree sprime = eliminate_avail (res); - if (sprime - && sprime != res) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Replaced redundant PHI node defining "); - print_generic_expr (dump_file, res); - fprintf (dump_file, " with "); - print_generic_expr (dump_file, sprime); - fprintf (dump_file, "\n"); - } - - /* If we inserted this PHI node ourself, it's not an elimination. */ - if (inserted_exprs - && bitmap_bit_p (inserted_exprs, SSA_NAME_VERSION (res))) - pre_stats.phis--; - else - pre_stats.eliminations++; - - /* If we will propagate into all uses don't bother to do - anything. */ - if (may_propagate_copy (res, sprime)) - { - /* Mark the PHI for removal. */ - to_remove.safe_push (phi); - gsi_next (&gsi); - continue; - } - - remove_phi_node (&gsi, false); - - if (!useless_type_conversion_p (TREE_TYPE (res), TREE_TYPE (sprime))) - sprime = fold_convert (TREE_TYPE (res), sprime); - gimple *stmt = gimple_build_assign (res, sprime); - gimple_stmt_iterator gsi2 = gsi_after_labels (b); - gsi_insert_before (&gsi2, stmt, GSI_NEW_STMT); - continue; - } - - eliminate_push_avail (res); - gsi_next (&gsi); - } - - for (gimple_stmt_iterator gsi = gsi_start_bb (b); - !gsi_end_p (gsi); - gsi_next (&gsi)) - { - tree sprime = NULL_TREE; - gimple *stmt = gsi_stmt (gsi); - tree lhs = gimple_get_lhs (stmt); - if (lhs && TREE_CODE (lhs) == SSA_NAME - && !gimple_has_volatile_ops (stmt) - /* See PR43491. Do not replace a global register variable when - it is a the RHS of an assignment. Do replace local register - variables since gcc does not guarantee a local variable will - be allocated in register. - ??? The fix isn't effective here. This should instead - be ensured by not value-numbering them the same but treating - them like volatiles? */ - && !(gimple_assign_single_p (stmt) - && (TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL - && DECL_HARD_REGISTER (gimple_assign_rhs1 (stmt)) - && is_global_var (gimple_assign_rhs1 (stmt))))) - { - sprime = eliminate_avail (lhs); - if (!sprime) - { - /* If there is no existing usable leader but SCCVN thinks - it has an expression it wants to use as replacement, - insert that. */ - tree val = VN_INFO (lhs)->valnum; - if (val != VN_TOP - && TREE_CODE (val) == SSA_NAME - && VN_INFO (val)->needs_insertion - && VN_INFO (val)->expr != NULL - && (sprime = eliminate_insert (&gsi, val)) != NULL_TREE) - eliminate_push_avail (sprime); - } - - /* If this now constitutes a copy duplicate points-to - and range info appropriately. This is especially - important for inserted code. See tree-ssa-copy.c - for similar code. */ - if (sprime - && TREE_CODE (sprime) == SSA_NAME) - { - basic_block sprime_b = gimple_bb (SSA_NAME_DEF_STMT (sprime)); - if (POINTER_TYPE_P (TREE_TYPE (lhs)) - && VN_INFO_PTR_INFO (lhs) - && ! VN_INFO_PTR_INFO (sprime)) - { - duplicate_ssa_name_ptr_info (sprime, - VN_INFO_PTR_INFO (lhs)); - if (b != sprime_b) - mark_ptr_info_alignment_unknown - (SSA_NAME_PTR_INFO (sprime)); - } - else if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)) - && VN_INFO_RANGE_INFO (lhs) - && ! VN_INFO_RANGE_INFO (sprime) - && b == sprime_b) - duplicate_ssa_name_range_info (sprime, - VN_INFO_RANGE_TYPE (lhs), - VN_INFO_RANGE_INFO (lhs)); - } - - /* Inhibit the use of an inserted PHI on a loop header when - the address of the memory reference is a simple induction - variable. In other cases the vectorizer won't do anything - anyway (either it's loop invariant or a complicated - expression). */ - if (sprime - && TREE_CODE (sprime) == SSA_NAME - && do_pre - && (flag_tree_loop_vectorize || flag_tree_parallelize_loops > 1) - && loop_outer (b->loop_father) - && has_zero_uses (sprime) - && bitmap_bit_p (inserted_exprs, SSA_NAME_VERSION (sprime)) - && gimple_assign_load_p (stmt)) - { - gimple *def_stmt = SSA_NAME_DEF_STMT (sprime); - basic_block def_bb = gimple_bb (def_stmt); - if (gimple_code (def_stmt) == GIMPLE_PHI - && def_bb->loop_father->header == def_bb) - { - loop_p loop = def_bb->loop_father; - ssa_op_iter iter; - tree op; - bool found = false; - FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) - { - affine_iv iv; - def_bb = gimple_bb (SSA_NAME_DEF_STMT (op)); - if (def_bb - && flow_bb_inside_loop_p (loop, def_bb) - && simple_iv (loop, loop, op, &iv, true)) - { - found = true; - break; - } - } - if (found) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Not replacing "); - print_gimple_expr (dump_file, stmt, 0); - fprintf (dump_file, " with "); - print_generic_expr (dump_file, sprime); - fprintf (dump_file, " which would add a loop" - " carried dependence to loop %d\n", - loop->num); - } - /* Don't keep sprime available. */ - sprime = NULL_TREE; - } - } - } - - if (sprime) - { - /* If we can propagate the value computed for LHS into - all uses don't bother doing anything with this stmt. */ - if (may_propagate_copy (lhs, sprime)) - { - /* Mark it for removal. */ - to_remove.safe_push (stmt); - - /* ??? Don't count copy/constant propagations. */ - if (gimple_assign_single_p (stmt) - && (TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME - || gimple_assign_rhs1 (stmt) == sprime)) - continue; - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Replaced "); - print_gimple_expr (dump_file, stmt, 0); - fprintf (dump_file, " with "); - print_generic_expr (dump_file, sprime); - fprintf (dump_file, " in all uses of "); - print_gimple_stmt (dump_file, stmt, 0); - } - - pre_stats.eliminations++; - continue; - } - - /* If this is an assignment from our leader (which - happens in the case the value-number is a constant) - then there is nothing to do. */ - if (gimple_assign_single_p (stmt) - && sprime == gimple_assign_rhs1 (stmt)) - continue; - - /* Else replace its RHS. */ - bool can_make_abnormal_goto - = is_gimple_call (stmt) - && stmt_can_make_abnormal_goto (stmt); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Replaced "); - print_gimple_expr (dump_file, stmt, 0); - fprintf (dump_file, " with "); - print_generic_expr (dump_file, sprime); - fprintf (dump_file, " in "); - print_gimple_stmt (dump_file, stmt, 0); - } - - pre_stats.eliminations++; - gimple *orig_stmt = stmt; - if (!useless_type_conversion_p (TREE_TYPE (lhs), - TREE_TYPE (sprime))) - sprime = fold_convert (TREE_TYPE (lhs), sprime); - tree vdef = gimple_vdef (stmt); - tree vuse = gimple_vuse (stmt); - propagate_tree_value_into_stmt (&gsi, sprime); - stmt = gsi_stmt (gsi); - update_stmt (stmt); - if (vdef != gimple_vdef (stmt)) - VN_INFO (vdef)->valnum = vuse; - - /* If we removed EH side-effects from the statement, clean - its EH information. */ - if (maybe_clean_or_replace_eh_stmt (orig_stmt, stmt)) - { - bitmap_set_bit (need_eh_cleanup, - gimple_bb (stmt)->index); - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " Removed EH side-effects.\n"); - } - - /* Likewise for AB side-effects. */ - if (can_make_abnormal_goto - && !stmt_can_make_abnormal_goto (stmt)) - { - bitmap_set_bit (need_ab_cleanup, - gimple_bb (stmt)->index); - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " Removed AB side-effects.\n"); - } - - continue; - } - } - - /* If the statement is a scalar store, see if the expression - has the same value number as its rhs. If so, the store is - dead. */ - if (gimple_assign_single_p (stmt) - && !gimple_has_volatile_ops (stmt) - && !is_gimple_reg (gimple_assign_lhs (stmt)) - && (TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME - || is_gimple_min_invariant (gimple_assign_rhs1 (stmt)))) - { - tree val; - tree rhs = gimple_assign_rhs1 (stmt); - vn_reference_t vnresult; - val = vn_reference_lookup (lhs, gimple_vuse (stmt), VN_WALKREWRITE, - &vnresult, false); - if (TREE_CODE (rhs) == SSA_NAME) - rhs = VN_INFO (rhs)->valnum; - if (val - && operand_equal_p (val, rhs, 0)) - { - /* We can only remove the later store if the former aliases - at least all accesses the later one does or if the store - was to readonly memory storing the same value. */ - alias_set_type set = get_alias_set (lhs); - if (! vnresult - || vnresult->set == set - || alias_set_subset_of (set, vnresult->set)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Deleted redundant store "); - print_gimple_stmt (dump_file, stmt, 0); - } - - /* Queue stmt for removal. */ - to_remove.safe_push (stmt); - continue; - } - } - } - - /* If this is a control statement value numbering left edges - unexecuted on force the condition in a way consistent with - that. */ - if (gcond *cond = dyn_cast (stmt)) - { - if ((EDGE_SUCC (b, 0)->flags & EDGE_EXECUTABLE) - ^ (EDGE_SUCC (b, 1)->flags & EDGE_EXECUTABLE)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Removing unexecutable edge from "); - print_gimple_stmt (dump_file, stmt, 0); - } - if (((EDGE_SUCC (b, 0)->flags & EDGE_TRUE_VALUE) != 0) - == ((EDGE_SUCC (b, 0)->flags & EDGE_EXECUTABLE) != 0)) - gimple_cond_make_true (cond); - else - gimple_cond_make_false (cond); - update_stmt (cond); - el_todo |= TODO_cleanup_cfg; - continue; - } - } - - bool can_make_abnormal_goto = stmt_can_make_abnormal_goto (stmt); - bool was_noreturn = (is_gimple_call (stmt) - && gimple_call_noreturn_p (stmt)); - tree vdef = gimple_vdef (stmt); - tree vuse = gimple_vuse (stmt); - - /* If we didn't replace the whole stmt (or propagate the result - into all uses), replace all uses on this stmt with their - leaders. */ - bool modified = false; - use_operand_p use_p; - ssa_op_iter iter; - FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE) - { - tree use = USE_FROM_PTR (use_p); - /* ??? The call code above leaves stmt operands un-updated. */ - if (TREE_CODE (use) != SSA_NAME) - continue; - tree sprime = eliminate_avail (use); - if (sprime && sprime != use - && may_propagate_copy (use, sprime) - /* We substitute into debug stmts to avoid excessive - debug temporaries created by removed stmts, but we need - to avoid doing so for inserted sprimes as we never want - to create debug temporaries for them. */ - && (!inserted_exprs - || TREE_CODE (sprime) != SSA_NAME - || !is_gimple_debug (stmt) - || !bitmap_bit_p (inserted_exprs, SSA_NAME_VERSION (sprime)))) - { - propagate_value (use_p, sprime); - modified = true; - } - } - - /* Fold the stmt if modified, this canonicalizes MEM_REFs we propagated - into which is a requirement for the IPA devirt machinery. */ - gimple *old_stmt = stmt; - if (modified) - { - /* If a formerly non-invariant ADDR_EXPR is turned into an - invariant one it was on a separate stmt. */ - if (gimple_assign_single_p (stmt) - && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR) - recompute_tree_invariant_for_addr_expr (gimple_assign_rhs1 (stmt)); - gimple_stmt_iterator prev = gsi; - gsi_prev (&prev); - if (fold_stmt (&gsi)) - { - /* fold_stmt may have created new stmts inbetween - the previous stmt and the folded stmt. Mark - all defs created there as varying to not confuse - the SCCVN machinery as we're using that even during - elimination. */ - if (gsi_end_p (prev)) - prev = gsi_start_bb (b); - else - gsi_next (&prev); - if (gsi_stmt (prev) != gsi_stmt (gsi)) - do - { - tree def; - ssa_op_iter dit; - FOR_EACH_SSA_TREE_OPERAND (def, gsi_stmt (prev), - dit, SSA_OP_ALL_DEFS) - /* As existing DEFs may move between stmts - we have to guard VN_INFO_GET. */ - if (! has_VN_INFO (def)) - VN_INFO_GET (def)->valnum = def; - if (gsi_stmt (prev) == gsi_stmt (gsi)) - break; - gsi_next (&prev); - } - while (1); - } - stmt = gsi_stmt (gsi); - /* In case we folded the stmt away schedule the NOP for removal. */ - if (gimple_nop_p (stmt)) - to_remove.safe_push (stmt); - } - - /* Visit indirect calls and turn them into direct calls if - possible using the devirtualization machinery. Do this before - checking for required EH/abnormal/noreturn cleanup as devird - may expose more of those. */ - if (gcall *call_stmt = dyn_cast (stmt)) - { - tree fn = gimple_call_fn (call_stmt); - if (fn - && flag_devirtualize - && virtual_method_call_p (fn)) - { - tree otr_type = obj_type_ref_class (fn); - unsigned HOST_WIDE_INT otr_tok - = tree_to_uhwi (OBJ_TYPE_REF_TOKEN (fn)); - tree instance; - ipa_polymorphic_call_context context (current_function_decl, - fn, stmt, &instance); - context.get_dynamic_type (instance, OBJ_TYPE_REF_OBJECT (fn), - otr_type, stmt); - bool final; - vec targets - = possible_polymorphic_call_targets (obj_type_ref_class (fn), - otr_tok, context, &final); - if (dump_file) - dump_possible_polymorphic_call_targets (dump_file, - obj_type_ref_class (fn), - otr_tok, context); - if (final && targets.length () <= 1 && dbg_cnt (devirt)) - { - tree fn; - if (targets.length () == 1) - fn = targets[0]->decl; - else - fn = builtin_decl_implicit (BUILT_IN_UNREACHABLE); - if (dump_enabled_p ()) - { - location_t loc = gimple_location (stmt); - dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, - "converting indirect call to " - "function %s\n", - lang_hooks.decl_printable_name (fn, 2)); - } - gimple_call_set_fndecl (call_stmt, fn); - /* If changing the call to __builtin_unreachable - or similar noreturn function, adjust gimple_call_fntype - too. */ - if (gimple_call_noreturn_p (call_stmt) - && VOID_TYPE_P (TREE_TYPE (TREE_TYPE (fn))) - && TYPE_ARG_TYPES (TREE_TYPE (fn)) - && (TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fn))) - == void_type_node)) - gimple_call_set_fntype (call_stmt, TREE_TYPE (fn)); - maybe_remove_unused_call_args (cfun, call_stmt); - modified = true; - } - } - } - - if (modified) - { - /* When changing a call into a noreturn call, cfg cleanup - is needed to fix up the noreturn call. */ - if (!was_noreturn - && is_gimple_call (stmt) && gimple_call_noreturn_p (stmt)) - to_fixup.safe_push (stmt); - /* When changing a condition or switch into one we know what - edge will be executed, schedule a cfg cleanup. */ - if ((gimple_code (stmt) == GIMPLE_COND - && (gimple_cond_true_p (as_a (stmt)) - || gimple_cond_false_p (as_a (stmt)))) - || (gimple_code (stmt) == GIMPLE_SWITCH - && TREE_CODE (gimple_switch_index - (as_a (stmt))) == INTEGER_CST)) - el_todo |= TODO_cleanup_cfg; - /* If we removed EH side-effects from the statement, clean - its EH information. */ - if (maybe_clean_or_replace_eh_stmt (old_stmt, stmt)) - { - bitmap_set_bit (need_eh_cleanup, - gimple_bb (stmt)->index); - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " Removed EH side-effects.\n"); - } - /* Likewise for AB side-effects. */ - if (can_make_abnormal_goto - && !stmt_can_make_abnormal_goto (stmt)) - { - bitmap_set_bit (need_ab_cleanup, - gimple_bb (stmt)->index); - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " Removed AB side-effects.\n"); - } - update_stmt (stmt); - if (vdef != gimple_vdef (stmt)) - VN_INFO (vdef)->valnum = vuse; - } - - /* Make new values available - for fully redundant LHS we - continue with the next stmt above and skip this. */ - def_operand_p defp; - FOR_EACH_SSA_DEF_OPERAND (defp, stmt, iter, SSA_OP_DEF) - eliminate_push_avail (DEF_FROM_PTR (defp)); - } - - /* Replace destination PHI arguments. */ - FOR_EACH_EDGE (e, ei, b->succs) - if (e->flags & EDGE_EXECUTABLE) - for (gphi_iterator gsi = gsi_start_phis (e->dest); - !gsi_end_p (gsi); - gsi_next (&gsi)) - { - gphi *phi = gsi.phi (); - use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, e); - tree arg = USE_FROM_PTR (use_p); - if (TREE_CODE (arg) != SSA_NAME - || virtual_operand_p (arg)) - continue; - tree sprime = eliminate_avail (arg); - if (sprime && may_propagate_copy (arg, sprime)) - propagate_value (use_p, sprime); - } - return NULL; -} - -/* Make no longer available leaders no longer available. */ - -void -eliminate_dom_walker::after_dom_children (basic_block) -{ - tree entry; - while ((entry = avail_stack.pop ()) != NULL_TREE) - { - tree valnum = VN_INFO (entry)->valnum; - tree old = avail[SSA_NAME_VERSION (valnum)]; - if (old == entry) - avail[SSA_NAME_VERSION (valnum)] = NULL_TREE; - else - avail[SSA_NAME_VERSION (valnum)] = entry; - } -} - -/* Eliminate fully redundant computations. */ - -static unsigned int -eliminate (bool do_pre) -{ - eliminate_dom_walker el (CDI_DOMINATORS, do_pre); - el.avail.reserve (num_ssa_names); - - el.walk (cfun->cfg->x_entry_block_ptr); - - /* We cannot remove stmts during BB walk, especially not release SSA - names there as this confuses the VN machinery. The stmts ending - up in to_remove are either stores or simple copies. - Remove stmts in reverse order to make debug stmt creation possible. */ - while (!el.to_remove.is_empty ()) - { - gimple *stmt = el.to_remove.pop (); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Removing dead stmt "); - print_gimple_stmt (dump_file, stmt, 0, 0); - } - - gimple_stmt_iterator gsi = gsi_for_stmt (stmt); - if (gimple_code (stmt) == GIMPLE_PHI) - remove_phi_node (&gsi, true); - else - { - basic_block bb = gimple_bb (stmt); - unlink_stmt_vdef (stmt); - if (gsi_remove (&gsi, true)) - bitmap_set_bit (el.need_eh_cleanup, bb->index); - if (is_gimple_call (stmt) && stmt_can_make_abnormal_goto (stmt)) - bitmap_set_bit (el.need_ab_cleanup, bb->index); - release_defs (stmt); - } - - /* Removing a stmt may expose a forwarder block. */ - el.el_todo |= TODO_cleanup_cfg; - } - - /* Fixup stmts that became noreturn calls. This may require splitting - blocks and thus isn't possible during the dominator walk. Do this - in reverse order so we don't inadvertedly remove a stmt we want to - fixup by visiting a dominating now noreturn call first. */ - while (!el.to_fixup.is_empty ()) - { - gimple *stmt = el.to_fixup.pop (); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Fixing up noreturn call "); - print_gimple_stmt (dump_file, stmt, 0); - } - - if (fixup_noreturn_call (stmt)) - el.el_todo |= TODO_cleanup_cfg; - } - - bool do_eh_cleanup = !bitmap_empty_p (el.need_eh_cleanup); - bool do_ab_cleanup = !bitmap_empty_p (el.need_ab_cleanup); - - if (do_eh_cleanup) - gimple_purge_all_dead_eh_edges (el.need_eh_cleanup); - - if (do_ab_cleanup) - gimple_purge_all_dead_abnormal_call_edges (el.need_ab_cleanup); - - if (do_eh_cleanup || do_ab_cleanup) - el.el_todo |= TODO_cleanup_cfg; - return el.el_todo; -} - /* Cheap DCE of a known set of possibly dead stmts. Because we don't follow exactly the standard PRE algorithm, and decide not @@ -5023,13 +4216,12 @@ pass_pre::execute (function *fun) gcc_assert (!need_ssa_update_p (fun)); /* Remove all the redundant expressions. */ - todo |= eliminate (true); + todo |= vn_eliminate (inserted_exprs); statistics_counter_event (fun, "Insertions", pre_stats.insertions); statistics_counter_event (fun, "PA inserted", pre_stats.pa_insert); statistics_counter_event (fun, "HOIST inserted", pre_stats.hoist_insert); statistics_counter_event (fun, "New PHIs", pre_stats.phis); - statistics_counter_event (fun, "Eliminated", pre_stats.eliminations); clear_expression_ids (); @@ -5069,61 +4261,3 @@ make_pass_pre (gcc::context *ctxt) { return new pass_pre (ctxt); } - -namespace { - -const pass_data pass_data_fre = -{ - GIMPLE_PASS, /* type */ - "fre", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - TV_TREE_FRE, /* tv_id */ - ( PROP_cfg | PROP_ssa ), /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0, /* todo_flags_finish */ -}; - -class pass_fre : public gimple_opt_pass -{ -public: - pass_fre (gcc::context *ctxt) - : gimple_opt_pass (pass_data_fre, ctxt) - {} - - /* opt_pass methods: */ - opt_pass * clone () { return new pass_fre (m_ctxt); } - virtual bool gate (function *) { return flag_tree_fre != 0; } - virtual unsigned int execute (function *); - -}; // class pass_fre - -unsigned int -pass_fre::execute (function *fun) -{ - unsigned int todo = 0; - - run_scc_vn (VN_WALKREWRITE); - - memset (&pre_stats, 0, sizeof (pre_stats)); - - /* Remove all the redundant expressions. */ - todo |= eliminate (false); - - scc_vn_restore_ssa_info (); - free_scc_vn (); - - statistics_counter_event (fun, "Insertions", pre_stats.insertions); - statistics_counter_event (fun, "Eliminated", pre_stats.eliminations); - - return todo; -} - -} // anon namespace - -gimple_opt_pass * -make_pass_fre (gcc::context *ctxt) -{ - return new pass_fre (ctxt); -} diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c index d27bcee..306080b 100644 --- a/gcc/tree-ssa-sccvn.c +++ b/gcc/tree-ssa-sccvn.c @@ -55,13 +55,21 @@ along with GCC; see the file COPYING3. If not see #include "cfgloop.h" #include "params.h" #include "tree-ssa-propagate.h" -#include "tree-ssa-sccvn.h" #include "tree-cfg.h" #include "domwalk.h" #include "gimple-iterator.h" #include "gimple-match.h" #include "stringpool.h" #include "attribs.h" +#include "tree-pass.h" +#include "statistics.h" +#include "langhooks.h" +#include "ipa-utils.h" +#include "dbgcnt.h" +#include "tree-cfgcleanup.h" +#include "tree-ssa-loop.h" +#include "tree-scalar-evolution.h" +#include "tree-ssa-sccvn.h" /* This algorithm is based on the SCC algorithm presented by Keith Cooper and L. Taylor Simpson in "SCC-Based Value numbering" @@ -5149,3 +5157,868 @@ vn_nary_may_trap (vn_nary_op_t nary) return false; } + + +class eliminate_dom_walker : public dom_walker +{ +public: + eliminate_dom_walker (cdi_direction, bitmap); + ~eliminate_dom_walker (); + + virtual edge before_dom_children (basic_block); + virtual void after_dom_children (basic_block); + + tree eliminate_avail (tree op); + void eliminate_push_avail (tree op); + tree eliminate_insert (gimple_stmt_iterator *gsi, tree val); + + bool do_pre; + unsigned int el_todo; + unsigned int eliminations; + unsigned int insertions; + + /* SSA names that had their defs inserted by PRE if do_pre. */ + bitmap inserted_exprs; + + /* Blocks with statements that have had their EH properties changed. */ + bitmap need_eh_cleanup; + + /* Blocks with statements that have had their AB properties changed. */ + bitmap need_ab_cleanup; + + auto_vec to_remove; + auto_vec to_fixup; + auto_vec avail; + auto_vec avail_stack; +}; + +eliminate_dom_walker::eliminate_dom_walker (cdi_direction direction, + bitmap inserted_exprs_) + : dom_walker (direction), do_pre (inserted_exprs_ != NULL), + el_todo (0), eliminations (0), insertions (0), + inserted_exprs (inserted_exprs_) +{ + need_eh_cleanup = BITMAP_ALLOC (NULL); + need_ab_cleanup = BITMAP_ALLOC (NULL); +} + +eliminate_dom_walker::~eliminate_dom_walker () +{ + BITMAP_FREE (need_eh_cleanup); + BITMAP_FREE (need_ab_cleanup); +} + +/* Return a leader for OP that is available at the current point of the + eliminate domwalk. */ + +tree +eliminate_dom_walker::eliminate_avail (tree op) +{ + tree valnum = VN_INFO (op)->valnum; + if (TREE_CODE (valnum) == SSA_NAME) + { + if (SSA_NAME_IS_DEFAULT_DEF (valnum)) + return valnum; + if (avail.length () > SSA_NAME_VERSION (valnum)) + return avail[SSA_NAME_VERSION (valnum)]; + } + else if (is_gimple_min_invariant (valnum)) + return valnum; + return NULL_TREE; +} + +/* At the current point of the eliminate domwalk make OP available. */ + +void +eliminate_dom_walker::eliminate_push_avail (tree op) +{ + tree valnum = VN_INFO (op)->valnum; + if (TREE_CODE (valnum) == SSA_NAME) + { + if (avail.length () <= SSA_NAME_VERSION (valnum)) + avail.safe_grow_cleared (SSA_NAME_VERSION (valnum) + 1); + tree pushop = op; + if (avail[SSA_NAME_VERSION (valnum)]) + pushop = avail[SSA_NAME_VERSION (valnum)]; + avail_stack.safe_push (pushop); + avail[SSA_NAME_VERSION (valnum)] = op; + } +} + +/* Insert the expression recorded by SCCVN for VAL at *GSI. Returns + the leader for the expression if insertion was successful. */ + +tree +eliminate_dom_walker::eliminate_insert (gimple_stmt_iterator *gsi, tree val) +{ + /* We can insert a sequence with a single assignment only. */ + gimple_seq stmts = VN_INFO (val)->expr; + if (!gimple_seq_singleton_p (stmts)) + return NULL_TREE; + gassign *stmt = dyn_cast (gimple_seq_first_stmt (stmts)); + if (!stmt + || (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)) + && gimple_assign_rhs_code (stmt) != VIEW_CONVERT_EXPR + && gimple_assign_rhs_code (stmt) != BIT_FIELD_REF + && (gimple_assign_rhs_code (stmt) != BIT_AND_EXPR + || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST))) + return NULL_TREE; + + tree op = gimple_assign_rhs1 (stmt); + if (gimple_assign_rhs_code (stmt) == VIEW_CONVERT_EXPR + || gimple_assign_rhs_code (stmt) == BIT_FIELD_REF) + op = TREE_OPERAND (op, 0); + tree leader = TREE_CODE (op) == SSA_NAME ? eliminate_avail (op) : op; + if (!leader) + return NULL_TREE; + + tree res; + stmts = NULL; + if (gimple_assign_rhs_code (stmt) == BIT_FIELD_REF) + res = gimple_build (&stmts, BIT_FIELD_REF, + TREE_TYPE (val), leader, + TREE_OPERAND (gimple_assign_rhs1 (stmt), 1), + TREE_OPERAND (gimple_assign_rhs1 (stmt), 2)); + else if (gimple_assign_rhs_code (stmt) == BIT_AND_EXPR) + res = gimple_build (&stmts, BIT_AND_EXPR, + TREE_TYPE (val), leader, gimple_assign_rhs2 (stmt)); + else + res = gimple_build (&stmts, gimple_assign_rhs_code (stmt), + TREE_TYPE (val), leader); + if (TREE_CODE (res) != SSA_NAME + || SSA_NAME_IS_DEFAULT_DEF (res) + || gimple_bb (SSA_NAME_DEF_STMT (res))) + { + gimple_seq_discard (stmts); + + /* During propagation we have to treat SSA info conservatively + and thus we can end up simplifying the inserted expression + at elimination time to sth not defined in stmts. */ + /* But then this is a redundancy we failed to detect. Which means + res now has two values. That doesn't play well with how + we track availability here, so give up. */ + if (dump_file && (dump_flags & TDF_DETAILS)) + { + if (TREE_CODE (res) == SSA_NAME) + res = eliminate_avail (res); + if (res) + { + fprintf (dump_file, "Failed to insert expression for value "); + print_generic_expr (dump_file, val); + fprintf (dump_file, " which is really fully redundant to "); + print_generic_expr (dump_file, res); + fprintf (dump_file, "\n"); + } + } + + return NULL_TREE; + } + else + { + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + VN_INFO_GET (res)->valnum = val; + } + + insertions++; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Inserted "); + print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (res), 0); + } + + return res; +} + + + +/* Perform elimination for the basic-block B during the domwalk. */ + +edge +eliminate_dom_walker::before_dom_children (basic_block b) +{ + /* Mark new bb. */ + avail_stack.safe_push (NULL_TREE); + + /* Skip unreachable blocks marked unreachable during the SCCVN domwalk. */ + edge_iterator ei; + edge e; + FOR_EACH_EDGE (e, ei, b->preds) + if (e->flags & EDGE_EXECUTABLE) + break; + if (! e) + return NULL; + + for (gphi_iterator gsi = gsi_start_phis (b); !gsi_end_p (gsi);) + { + gphi *phi = gsi.phi (); + tree res = PHI_RESULT (phi); + + if (virtual_operand_p (res)) + { + gsi_next (&gsi); + continue; + } + + tree sprime = eliminate_avail (res); + if (sprime + && sprime != res) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Replaced redundant PHI node defining "); + print_generic_expr (dump_file, res); + fprintf (dump_file, " with "); + print_generic_expr (dump_file, sprime); + fprintf (dump_file, "\n"); + } + + /* If we inserted this PHI node ourself, it's not an elimination. */ + if (! inserted_exprs + || ! bitmap_bit_p (inserted_exprs, SSA_NAME_VERSION (res))) + eliminations++; + + /* If we will propagate into all uses don't bother to do + anything. */ + if (may_propagate_copy (res, sprime)) + { + /* Mark the PHI for removal. */ + to_remove.safe_push (phi); + gsi_next (&gsi); + continue; + } + + remove_phi_node (&gsi, false); + + if (!useless_type_conversion_p (TREE_TYPE (res), TREE_TYPE (sprime))) + sprime = fold_convert (TREE_TYPE (res), sprime); + gimple *stmt = gimple_build_assign (res, sprime); + gimple_stmt_iterator gsi2 = gsi_after_labels (b); + gsi_insert_before (&gsi2, stmt, GSI_NEW_STMT); + continue; + } + + eliminate_push_avail (res); + gsi_next (&gsi); + } + + for (gimple_stmt_iterator gsi = gsi_start_bb (b); + !gsi_end_p (gsi); + gsi_next (&gsi)) + { + tree sprime = NULL_TREE; + gimple *stmt = gsi_stmt (gsi); + tree lhs = gimple_get_lhs (stmt); + if (lhs && TREE_CODE (lhs) == SSA_NAME + && !gimple_has_volatile_ops (stmt) + /* See PR43491. Do not replace a global register variable when + it is a the RHS of an assignment. Do replace local register + variables since gcc does not guarantee a local variable will + be allocated in register. + ??? The fix isn't effective here. This should instead + be ensured by not value-numbering them the same but treating + them like volatiles? */ + && !(gimple_assign_single_p (stmt) + && (TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL + && DECL_HARD_REGISTER (gimple_assign_rhs1 (stmt)) + && is_global_var (gimple_assign_rhs1 (stmt))))) + { + sprime = eliminate_avail (lhs); + if (!sprime) + { + /* If there is no existing usable leader but SCCVN thinks + it has an expression it wants to use as replacement, + insert that. */ + tree val = VN_INFO (lhs)->valnum; + if (val != VN_TOP + && TREE_CODE (val) == SSA_NAME + && VN_INFO (val)->needs_insertion + && VN_INFO (val)->expr != NULL + && (sprime = eliminate_insert (&gsi, val)) != NULL_TREE) + eliminate_push_avail (sprime); + } + + /* If this now constitutes a copy duplicate points-to + and range info appropriately. This is especially + important for inserted code. See tree-ssa-copy.c + for similar code. */ + if (sprime + && TREE_CODE (sprime) == SSA_NAME) + { + basic_block sprime_b = gimple_bb (SSA_NAME_DEF_STMT (sprime)); + if (POINTER_TYPE_P (TREE_TYPE (lhs)) + && VN_INFO_PTR_INFO (lhs) + && ! VN_INFO_PTR_INFO (sprime)) + { + duplicate_ssa_name_ptr_info (sprime, + VN_INFO_PTR_INFO (lhs)); + if (b != sprime_b) + mark_ptr_info_alignment_unknown + (SSA_NAME_PTR_INFO (sprime)); + } + else if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)) + && VN_INFO_RANGE_INFO (lhs) + && ! VN_INFO_RANGE_INFO (sprime) + && b == sprime_b) + duplicate_ssa_name_range_info (sprime, + VN_INFO_RANGE_TYPE (lhs), + VN_INFO_RANGE_INFO (lhs)); + } + + /* Inhibit the use of an inserted PHI on a loop header when + the address of the memory reference is a simple induction + variable. In other cases the vectorizer won't do anything + anyway (either it's loop invariant or a complicated + expression). */ + if (sprime + && TREE_CODE (sprime) == SSA_NAME + && do_pre + && (flag_tree_loop_vectorize || flag_tree_parallelize_loops > 1) + && loop_outer (b->loop_father) + && has_zero_uses (sprime) + && bitmap_bit_p (inserted_exprs, SSA_NAME_VERSION (sprime)) + && gimple_assign_load_p (stmt)) + { + gimple *def_stmt = SSA_NAME_DEF_STMT (sprime); + basic_block def_bb = gimple_bb (def_stmt); + if (gimple_code (def_stmt) == GIMPLE_PHI + && def_bb->loop_father->header == def_bb) + { + loop_p loop = def_bb->loop_father; + ssa_op_iter iter; + tree op; + bool found = false; + FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) + { + affine_iv iv; + def_bb = gimple_bb (SSA_NAME_DEF_STMT (op)); + if (def_bb + && flow_bb_inside_loop_p (loop, def_bb) + && simple_iv (loop, loop, op, &iv, true)) + { + found = true; + break; + } + } + if (found) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Not replacing "); + print_gimple_expr (dump_file, stmt, 0); + fprintf (dump_file, " with "); + print_generic_expr (dump_file, sprime); + fprintf (dump_file, " which would add a loop" + " carried dependence to loop %d\n", + loop->num); + } + /* Don't keep sprime available. */ + sprime = NULL_TREE; + } + } + } + + if (sprime) + { + /* If we can propagate the value computed for LHS into + all uses don't bother doing anything with this stmt. */ + if (may_propagate_copy (lhs, sprime)) + { + /* Mark it for removal. */ + to_remove.safe_push (stmt); + + /* ??? Don't count copy/constant propagations. */ + if (gimple_assign_single_p (stmt) + && (TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME + || gimple_assign_rhs1 (stmt) == sprime)) + continue; + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Replaced "); + print_gimple_expr (dump_file, stmt, 0); + fprintf (dump_file, " with "); + print_generic_expr (dump_file, sprime); + fprintf (dump_file, " in all uses of "); + print_gimple_stmt (dump_file, stmt, 0); + } + + eliminations++; + continue; + } + + /* If this is an assignment from our leader (which + happens in the case the value-number is a constant) + then there is nothing to do. */ + if (gimple_assign_single_p (stmt) + && sprime == gimple_assign_rhs1 (stmt)) + continue; + + /* Else replace its RHS. */ + bool can_make_abnormal_goto + = is_gimple_call (stmt) + && stmt_can_make_abnormal_goto (stmt); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Replaced "); + print_gimple_expr (dump_file, stmt, 0); + fprintf (dump_file, " with "); + print_generic_expr (dump_file, sprime); + fprintf (dump_file, " in "); + print_gimple_stmt (dump_file, stmt, 0); + } + + eliminations++; + gimple *orig_stmt = stmt; + if (!useless_type_conversion_p (TREE_TYPE (lhs), + TREE_TYPE (sprime))) + sprime = fold_convert (TREE_TYPE (lhs), sprime); + tree vdef = gimple_vdef (stmt); + tree vuse = gimple_vuse (stmt); + propagate_tree_value_into_stmt (&gsi, sprime); + stmt = gsi_stmt (gsi); + update_stmt (stmt); + if (vdef != gimple_vdef (stmt)) + VN_INFO (vdef)->valnum = vuse; + + /* If we removed EH side-effects from the statement, clean + its EH information. */ + if (maybe_clean_or_replace_eh_stmt (orig_stmt, stmt)) + { + bitmap_set_bit (need_eh_cleanup, + gimple_bb (stmt)->index); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Removed EH side-effects.\n"); + } + + /* Likewise for AB side-effects. */ + if (can_make_abnormal_goto + && !stmt_can_make_abnormal_goto (stmt)) + { + bitmap_set_bit (need_ab_cleanup, + gimple_bb (stmt)->index); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Removed AB side-effects.\n"); + } + + continue; + } + } + + /* If the statement is a scalar store, see if the expression + has the same value number as its rhs. If so, the store is + dead. */ + if (gimple_assign_single_p (stmt) + && !gimple_has_volatile_ops (stmt) + && !is_gimple_reg (gimple_assign_lhs (stmt)) + && (TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME + || is_gimple_min_invariant (gimple_assign_rhs1 (stmt)))) + { + tree val; + tree rhs = gimple_assign_rhs1 (stmt); + vn_reference_t vnresult; + val = vn_reference_lookup (lhs, gimple_vuse (stmt), VN_WALKREWRITE, + &vnresult, false); + if (TREE_CODE (rhs) == SSA_NAME) + rhs = VN_INFO (rhs)->valnum; + if (val + && operand_equal_p (val, rhs, 0)) + { + /* We can only remove the later store if the former aliases + at least all accesses the later one does or if the store + was to readonly memory storing the same value. */ + alias_set_type set = get_alias_set (lhs); + if (! vnresult + || vnresult->set == set + || alias_set_subset_of (set, vnresult->set)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Deleted redundant store "); + print_gimple_stmt (dump_file, stmt, 0); + } + + /* Queue stmt for removal. */ + to_remove.safe_push (stmt); + continue; + } + } + } + + /* If this is a control statement value numbering left edges + unexecuted on force the condition in a way consistent with + that. */ + if (gcond *cond = dyn_cast (stmt)) + { + if ((EDGE_SUCC (b, 0)->flags & EDGE_EXECUTABLE) + ^ (EDGE_SUCC (b, 1)->flags & EDGE_EXECUTABLE)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Removing unexecutable edge from "); + print_gimple_stmt (dump_file, stmt, 0); + } + if (((EDGE_SUCC (b, 0)->flags & EDGE_TRUE_VALUE) != 0) + == ((EDGE_SUCC (b, 0)->flags & EDGE_EXECUTABLE) != 0)) + gimple_cond_make_true (cond); + else + gimple_cond_make_false (cond); + update_stmt (cond); + el_todo |= TODO_cleanup_cfg; + continue; + } + } + + bool can_make_abnormal_goto = stmt_can_make_abnormal_goto (stmt); + bool was_noreturn = (is_gimple_call (stmt) + && gimple_call_noreturn_p (stmt)); + tree vdef = gimple_vdef (stmt); + tree vuse = gimple_vuse (stmt); + + /* If we didn't replace the whole stmt (or propagate the result + into all uses), replace all uses on this stmt with their + leaders. */ + bool modified = false; + use_operand_p use_p; + ssa_op_iter iter; + FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE) + { + tree use = USE_FROM_PTR (use_p); + /* ??? The call code above leaves stmt operands un-updated. */ + if (TREE_CODE (use) != SSA_NAME) + continue; + tree sprime = eliminate_avail (use); + if (sprime && sprime != use + && may_propagate_copy (use, sprime) + /* We substitute into debug stmts to avoid excessive + debug temporaries created by removed stmts, but we need + to avoid doing so for inserted sprimes as we never want + to create debug temporaries for them. */ + && (!inserted_exprs + || TREE_CODE (sprime) != SSA_NAME + || !is_gimple_debug (stmt) + || !bitmap_bit_p (inserted_exprs, SSA_NAME_VERSION (sprime)))) + { + propagate_value (use_p, sprime); + modified = true; + } + } + + /* Fold the stmt if modified, this canonicalizes MEM_REFs we propagated + into which is a requirement for the IPA devirt machinery. */ + gimple *old_stmt = stmt; + if (modified) + { + /* If a formerly non-invariant ADDR_EXPR is turned into an + invariant one it was on a separate stmt. */ + if (gimple_assign_single_p (stmt) + && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR) + recompute_tree_invariant_for_addr_expr (gimple_assign_rhs1 (stmt)); + gimple_stmt_iterator prev = gsi; + gsi_prev (&prev); + if (fold_stmt (&gsi)) + { + /* fold_stmt may have created new stmts inbetween + the previous stmt and the folded stmt. Mark + all defs created there as varying to not confuse + the SCCVN machinery as we're using that even during + elimination. */ + if (gsi_end_p (prev)) + prev = gsi_start_bb (b); + else + gsi_next (&prev); + if (gsi_stmt (prev) != gsi_stmt (gsi)) + do + { + tree def; + ssa_op_iter dit; + FOR_EACH_SSA_TREE_OPERAND (def, gsi_stmt (prev), + dit, SSA_OP_ALL_DEFS) + /* As existing DEFs may move between stmts + we have to guard VN_INFO_GET. */ + if (! has_VN_INFO (def)) + VN_INFO_GET (def)->valnum = def; + if (gsi_stmt (prev) == gsi_stmt (gsi)) + break; + gsi_next (&prev); + } + while (1); + } + stmt = gsi_stmt (gsi); + /* In case we folded the stmt away schedule the NOP for removal. */ + if (gimple_nop_p (stmt)) + to_remove.safe_push (stmt); + } + + /* Visit indirect calls and turn them into direct calls if + possible using the devirtualization machinery. Do this before + checking for required EH/abnormal/noreturn cleanup as devird + may expose more of those. */ + if (gcall *call_stmt = dyn_cast (stmt)) + { + tree fn = gimple_call_fn (call_stmt); + if (fn + && flag_devirtualize + && virtual_method_call_p (fn)) + { + tree otr_type = obj_type_ref_class (fn); + unsigned HOST_WIDE_INT otr_tok + = tree_to_uhwi (OBJ_TYPE_REF_TOKEN (fn)); + tree instance; + ipa_polymorphic_call_context context (current_function_decl, + fn, stmt, &instance); + context.get_dynamic_type (instance, OBJ_TYPE_REF_OBJECT (fn), + otr_type, stmt); + bool final; + vec targets + = possible_polymorphic_call_targets (obj_type_ref_class (fn), + otr_tok, context, &final); + if (dump_file) + dump_possible_polymorphic_call_targets (dump_file, + obj_type_ref_class (fn), + otr_tok, context); + if (final && targets.length () <= 1 && dbg_cnt (devirt)) + { + tree fn; + if (targets.length () == 1) + fn = targets[0]->decl; + else + fn = builtin_decl_implicit (BUILT_IN_UNREACHABLE); + if (dump_enabled_p ()) + { + location_t loc = gimple_location (stmt); + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, + "converting indirect call to " + "function %s\n", + lang_hooks.decl_printable_name (fn, 2)); + } + gimple_call_set_fndecl (call_stmt, fn); + /* If changing the call to __builtin_unreachable + or similar noreturn function, adjust gimple_call_fntype + too. */ + if (gimple_call_noreturn_p (call_stmt) + && VOID_TYPE_P (TREE_TYPE (TREE_TYPE (fn))) + && TYPE_ARG_TYPES (TREE_TYPE (fn)) + && (TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fn))) + == void_type_node)) + gimple_call_set_fntype (call_stmt, TREE_TYPE (fn)); + maybe_remove_unused_call_args (cfun, call_stmt); + modified = true; + } + } + } + + if (modified) + { + /* When changing a call into a noreturn call, cfg cleanup + is needed to fix up the noreturn call. */ + if (!was_noreturn + && is_gimple_call (stmt) && gimple_call_noreturn_p (stmt)) + to_fixup.safe_push (stmt); + /* When changing a condition or switch into one we know what + edge will be executed, schedule a cfg cleanup. */ + if ((gimple_code (stmt) == GIMPLE_COND + && (gimple_cond_true_p (as_a (stmt)) + || gimple_cond_false_p (as_a (stmt)))) + || (gimple_code (stmt) == GIMPLE_SWITCH + && TREE_CODE (gimple_switch_index + (as_a (stmt))) == INTEGER_CST)) + el_todo |= TODO_cleanup_cfg; + /* If we removed EH side-effects from the statement, clean + its EH information. */ + if (maybe_clean_or_replace_eh_stmt (old_stmt, stmt)) + { + bitmap_set_bit (need_eh_cleanup, + gimple_bb (stmt)->index); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Removed EH side-effects.\n"); + } + /* Likewise for AB side-effects. */ + if (can_make_abnormal_goto + && !stmt_can_make_abnormal_goto (stmt)) + { + bitmap_set_bit (need_ab_cleanup, + gimple_bb (stmt)->index); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Removed AB side-effects.\n"); + } + update_stmt (stmt); + if (vdef != gimple_vdef (stmt)) + VN_INFO (vdef)->valnum = vuse; + } + + /* Make new values available - for fully redundant LHS we + continue with the next stmt above and skip this. */ + def_operand_p defp; + FOR_EACH_SSA_DEF_OPERAND (defp, stmt, iter, SSA_OP_DEF) + eliminate_push_avail (DEF_FROM_PTR (defp)); + } + + /* Replace destination PHI arguments. */ + FOR_EACH_EDGE (e, ei, b->succs) + if (e->flags & EDGE_EXECUTABLE) + for (gphi_iterator gsi = gsi_start_phis (e->dest); + !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gphi *phi = gsi.phi (); + use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, e); + tree arg = USE_FROM_PTR (use_p); + if (TREE_CODE (arg) != SSA_NAME + || virtual_operand_p (arg)) + continue; + tree sprime = eliminate_avail (arg); + if (sprime && may_propagate_copy (arg, sprime)) + propagate_value (use_p, sprime); + } + return NULL; +} + +/* Make no longer available leaders no longer available. */ + +void +eliminate_dom_walker::after_dom_children (basic_block) +{ + tree entry; + while ((entry = avail_stack.pop ()) != NULL_TREE) + { + tree valnum = VN_INFO (entry)->valnum; + tree old = avail[SSA_NAME_VERSION (valnum)]; + if (old == entry) + avail[SSA_NAME_VERSION (valnum)] = NULL_TREE; + else + avail[SSA_NAME_VERSION (valnum)] = entry; + } +} + +/* Eliminate fully redundant computations. */ + +unsigned int +vn_eliminate (bitmap inserted_exprs) +{ + eliminate_dom_walker el (CDI_DOMINATORS, inserted_exprs); + el.avail.reserve (num_ssa_names); + + el.walk (cfun->cfg->x_entry_block_ptr); + + /* We cannot remove stmts during BB walk, especially not release SSA + names there as this confuses the VN machinery. The stmts ending + up in to_remove are either stores or simple copies. + Remove stmts in reverse order to make debug stmt creation possible. */ + while (!el.to_remove.is_empty ()) + { + gimple *stmt = el.to_remove.pop (); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Removing dead stmt "); + print_gimple_stmt (dump_file, stmt, 0, 0); + } + + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + if (gimple_code (stmt) == GIMPLE_PHI) + remove_phi_node (&gsi, true); + else + { + basic_block bb = gimple_bb (stmt); + unlink_stmt_vdef (stmt); + if (gsi_remove (&gsi, true)) + bitmap_set_bit (el.need_eh_cleanup, bb->index); + if (is_gimple_call (stmt) && stmt_can_make_abnormal_goto (stmt)) + bitmap_set_bit (el.need_ab_cleanup, bb->index); + release_defs (stmt); + } + + /* Removing a stmt may expose a forwarder block. */ + el.el_todo |= TODO_cleanup_cfg; + } + + /* Fixup stmts that became noreturn calls. This may require splitting + blocks and thus isn't possible during the dominator walk. Do this + in reverse order so we don't inadvertedly remove a stmt we want to + fixup by visiting a dominating now noreturn call first. */ + while (!el.to_fixup.is_empty ()) + { + gimple *stmt = el.to_fixup.pop (); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Fixing up noreturn call "); + print_gimple_stmt (dump_file, stmt, 0); + } + + if (fixup_noreturn_call (stmt)) + el.el_todo |= TODO_cleanup_cfg; + } + + bool do_eh_cleanup = !bitmap_empty_p (el.need_eh_cleanup); + bool do_ab_cleanup = !bitmap_empty_p (el.need_ab_cleanup); + + if (do_eh_cleanup) + gimple_purge_all_dead_eh_edges (el.need_eh_cleanup); + + if (do_ab_cleanup) + gimple_purge_all_dead_abnormal_call_edges (el.need_ab_cleanup); + + if (do_eh_cleanup || do_ab_cleanup) + el.el_todo |= TODO_cleanup_cfg; + + statistics_counter_event (cfun, "Eliminated", el.eliminations); + statistics_counter_event (cfun, "Insertions", el.insertions); + + return el.el_todo; +} + + +namespace { + +const pass_data pass_data_fre = +{ + GIMPLE_PASS, /* type */ + "fre", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_TREE_FRE, /* tv_id */ + ( PROP_cfg | PROP_ssa ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_fre : public gimple_opt_pass +{ +public: + pass_fre (gcc::context *ctxt) + : gimple_opt_pass (pass_data_fre, ctxt) + {} + + /* opt_pass methods: */ + opt_pass * clone () { return new pass_fre (m_ctxt); } + virtual bool gate (function *) { return flag_tree_fre != 0; } + virtual unsigned int execute (function *); + +}; // class pass_fre + +unsigned int +pass_fre::execute (function *) +{ + unsigned int todo = 0; + + run_scc_vn (VN_WALKREWRITE); + + /* Remove all the redundant expressions. */ + todo |= vn_eliminate (NULL); + + scc_vn_restore_ssa_info (); + free_scc_vn (); + + return todo; +} + +} // anon namespace + +gimple_opt_pass * +make_pass_fre (gcc::context *ctxt) +{ + return new pass_fre (ctxt); +} diff --git a/gcc/tree-ssa-sccvn.h b/gcc/tree-ssa-sccvn.h index 77d0183..38877bc 100644 --- a/gcc/tree-ssa-sccvn.h +++ b/gcc/tree-ssa-sccvn.h @@ -214,6 +214,7 @@ extern vn_ssa_aux_t VN_INFO (tree); extern vn_ssa_aux_t VN_INFO_GET (tree); tree vn_get_expr_for (tree); void run_scc_vn (vn_lookup_kind); +unsigned int vn_eliminate (bitmap); void free_scc_vn (void); void scc_vn_restore_ssa_info (void); tree vn_nary_op_lookup (tree, vn_nary_op_t *); -- cgit v1.1 From d185db140ad68f80d134f2f7f254e4ace7f89f24 Mon Sep 17 00:00:00 2001 From: Bernhard Reutner-Fischer Date: Wed, 25 Oct 2017 21:10:15 +0200 Subject: match.c (gfc_match_type_is): Fix typo in error message 2017-10-25 Bernhard Reutner-Fischer * match.c (gfc_match_type_is): Fix typo in error message. From-SVN: r254082 --- gcc/fortran/ChangeLog | 4 ++++ gcc/fortran/match.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 6bf4f1d..9e34152 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,7 @@ +2017-10-25 Bernhard Reutner-Fischer + + * match.c (gfc_match_type_is): Fix typo in error message. + 2017-10-21 Paul Thomas PR fortran/82586 diff --git a/gcc/fortran/match.c b/gcc/fortran/match.c index 4d657e0..624fdf5 100644 --- a/gcc/fortran/match.c +++ b/gcc/fortran/match.c @@ -6204,7 +6204,7 @@ gfc_match_type_is (void) return MATCH_YES; syntax: - gfc_error ("Ssyntax error in TYPE IS specification at %C"); + gfc_error ("Syntax error in TYPE IS specification at %C"); cleanup: if (c != NULL) -- cgit v1.1 From a4fe6139ab2e39d8b264befaf38f748e5c88d76a Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Wed, 25 Oct 2017 21:11:41 +0200 Subject: i386.c (ix86_builtin_vectorization_cost): Compute scatter/gather cost correctly. * i386.c (ix86_builtin_vectorization_cost): Compute scatter/gather cost correctly. * i386.h (processor_costs): Add gather_static, gather_per_elt, scatter_static, scatter_per_elt. * x86-tune-costs.h: Add new cost entries. From-SVN: r254083 --- gcc/ChangeLog | 8 ++++++ gcc/config/i386/i386.c | 18 +++++++++++-- gcc/config/i386/i386.h | 4 +++ gcc/config/i386/x86-tune-costs.h | 56 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c5dfcb7..5985d9e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2017-10-23 Jan Hubicka + + * i386.c (ix86_builtin_vectorization_cost): Compute scatter/gather + cost correctly. + * i386.h (processor_costs): Add gather_static, gather_per_elt, + scatter_static, scatter_per_elt. + * x86-tune-costs.h: Add new cost entries. + 2017-10-25 Richard Biener * tree-ssa-sccvn.h (vn_eliminate): Declare. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 367cade..56486e0 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -44490,7 +44490,6 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, /* We should have separate costs for unaligned loads and gather/scatter. Do that incrementally. */ case unaligned_load: - case vector_gather_load: index = sse_store_index (mode); return ix86_vec_cost (mode, COSTS_N_INSNS @@ -44498,13 +44497,28 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, true); case unaligned_store: - case vector_scatter_store: index = sse_store_index (mode); return ix86_vec_cost (mode, COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2, true); + case vector_gather_load: + return ix86_vec_cost (mode, + COSTS_N_INSNS + (ix86_cost->gather_static + + ix86_cost->gather_per_elt + * TYPE_VECTOR_SUBPARTS (vectype)) / 2, + true); + + case vector_scatter_store: + return ix86_vec_cost (mode, + COSTS_N_INSNS + (ix86_cost->scatter_static + + ix86_cost->scatter_per_elt + * TYPE_VECTOR_SUBPARTS (vectype)) / 2, + true); + case cond_branch_taken: return ix86_cost->cond_taken_branch_cost; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 27fc9f0..837906b 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -253,6 +253,10 @@ struct processor_costs { const int mmxsse_to_integer; /* cost of moving mmxsse register to integer. */ const int ssemmx_to_integer; /* cost of moving integer to mmxsse register. */ + const int gather_static, gather_per_elt; /* Cost of gather load is computed + as static + per_item * nelts. */ + const int scatter_static, scatter_per_elt; /* Cost of gather store is + computed as static + per_item * nelts. */ const int l1_cache_size; /* size of l1 cache, in kilobytes. */ const int l2_cache_size; /* size of l2 cache, in kilobytes. */ const int prefetch_block; /* bytes moved to cache for prefetch. */ diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index e31d7ce..c7ac70e 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -82,6 +82,8 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ {3, 3, 3, 3, 3}, /* cost of unaligned SSE store in 128bit, 256bit and 512bit */ 3, 3, /* SSE->integer and integer->SSE moves */ + 5, 0, /* Gather load static, per_elt. */ + 5, 0, /* Gather store static, per_elt. */ 0, /* size of l1 cache */ 0, /* size of l2 cache */ 0, /* size of prefetch block */ @@ -166,6 +168,8 @@ struct processor_costs i386_cost = { /* 386 specific costs */ in 32,64,128,256 and 512-bit */ {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 3, 3, /* SSE->integer and integer->SSE moves */ + 4, 4, /* Gather load static, per_elt. */ + 4, 4, /* Gather store static, per_elt. */ 0, /* size of l1 cache */ 0, /* size of l2 cache */ 0, /* size of prefetch block */ @@ -249,6 +253,8 @@ struct processor_costs i486_cost = { /* 486 specific costs */ in 32,64,128,256 and 512-bit */ {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 3, 3, /* SSE->integer and integer->SSE moves */ + 4, 4, /* Gather load static, per_elt. */ + 4, 4, /* Gather store static, per_elt. */ 4, /* size of l1 cache. 486 has 8kB cache shared for code and data, so 4kB is not really precise. */ @@ -334,6 +340,8 @@ struct processor_costs pentium_cost = { in 32,64,128,256 and 512-bit */ {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 3, 3, /* SSE->integer and integer->SSE moves */ + 4, 4, /* Gather load static, per_elt. */ + 4, 4, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ 8, /* size of l2 cache */ 0, /* size of prefetch block */ @@ -410,6 +418,8 @@ struct processor_costs lakemont_cost = { in 32,64,128,256 and 512-bit */ {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 3, 3, /* SSE->integer and integer->SSE moves */ + 4, 4, /* Gather load static, per_elt. */ + 4, 4, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ 8, /* size of l2 cache */ 0, /* size of prefetch block */ @@ -501,6 +511,8 @@ struct processor_costs pentiumpro_cost = { in 32,64,128,256 and 512-bit */ {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 3, 3, /* SSE->integer and integer->SSE moves */ + 4, 4, /* Gather load static, per_elt. */ + 4, 4, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ 256, /* size of l2 cache */ 32, /* size of prefetch block */ @@ -584,6 +596,8 @@ struct processor_costs geode_cost = { in 32,64,128,256 and 512-bit */ {2, 2, 8, 16, 32}, /* cost of unaligned stores. */ 6, 6, /* SSE->integer and integer->SSE moves */ + 2, 2, /* Gather load static, per_elt. */ + 2, 2, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ 128, /* size of l2 cache. */ 32, /* size of prefetch block */ @@ -666,6 +680,8 @@ struct processor_costs k6_cost = { in 32,64,128,256 and 512-bit */ {2, 2, 8, 16, 32}, /* cost of unaligned stores. */ 6, 6, /* SSE->integer and integer->SSE moves */ + 2, 2, /* Gather load static, per_elt. */ + 2, 2, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ 32, /* size of l2 cache. Some models have integrated l2 cache, but @@ -754,6 +770,8 @@ struct processor_costs athlon_cost = { in 32,64,128,256 and 512-bit */ {4, 4, 5, 10, 20}, /* cost of unaligned stores. */ 5, 5, /* SSE->integer and integer->SSE moves */ + 4, 4, /* Gather load static, per_elt. */ + 4, 4, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ 256, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -844,6 +862,8 @@ struct processor_costs k8_cost = { in 32,64,128,256 and 512-bit */ {4, 4, 5, 10, 20}, /* cost of unaligned stores. */ 5, 5, /* SSE->integer and integer->SSE moves */ + 4, 4, /* Gather load static, per_elt. */ + 4, 4, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ 512, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -946,6 +966,8 @@ struct processor_costs amdfam10_cost = { 1/1 1/1 MOVD reg32, xmmreg Double FADD 3 1/1 1/1 */ + 4, 4, /* Gather load static, per_elt. */ + 4, 4, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ 512, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1041,6 +1063,8 @@ const struct processor_costs bdver1_cost = { in 32,64,128,256 and 512-bit */ {10, 10, 10, 20, 30}, /* cost of unaligned stores. */ 16, 20, /* SSE->integer and integer->SSE moves */ + 12, 12, /* Gather load static, per_elt. */ + 10, 10, /* Gather store static, per_elt. */ 16, /* size of l1 cache. */ 2048, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1138,6 +1162,8 @@ const struct processor_costs bdver2_cost = { in 32,64,128,256 and 512-bit */ {10, 10, 10, 20, 30}, /* cost of unaligned stores. */ 16, 20, /* SSE->integer and integer->SSE moves */ + 12, 12, /* Gather load static, per_elt. */ + 10, 10, /* Gather store static, per_elt. */ 16, /* size of l1 cache. */ 2048, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1234,6 +1260,8 @@ struct processor_costs bdver3_cost = { in 32,64,128,256 and 512-bit */ {10, 10, 10, 20, 30}, /* cost of unaligned stores. */ 16, 20, /* SSE->integer and integer->SSE moves */ + 12, 12, /* Gather load static, per_elt. */ + 10, 10, /* Gather store static, per_elt. */ 16, /* size of l1 cache. */ 2048, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1329,6 +1357,8 @@ struct processor_costs bdver4_cost = { in 32,64,128,256 and 512-bit */ {10, 10, 10, 20, 30}, /* cost of unaligned stores. */ 16, 20, /* SSE->integer and integer->SSE moves */ + 12, 12, /* Gather load static, per_elt. */ + 10, 10, /* Gather store static, per_elt. */ 16, /* size of l1 cache. */ 2048, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1435,6 +1465,11 @@ struct processor_costs znver1_cost = { in 32,64,128,256 and 512-bit. */ {8, 8, 8, 8, 16}, /* cost of unaligned stores. */ 6, 6, /* SSE->integer and integer->SSE moves. */ + /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops, + throughput 12. Approx 9 uops do not depend on vector size and every load + is 7 uops. */ + 18, 8, /* Gather load static, per_elt. */ + 18, 10, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ 512, /* size of l2 cache. */ 64, /* size of prefetch block. */ @@ -1539,6 +1574,8 @@ const struct processor_costs btver1_cost = { in 32,64,128,256 and 512-bit */ {10, 10, 12, 24, 48}, /* cost of unaligned stores. */ 14, 14, /* SSE->integer and integer->SSE moves */ + 10, 10, /* Gather load static, per_elt. */ + 10, 10, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ 512, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1624,6 +1661,8 @@ const struct processor_costs btver2_cost = { in 32,64,128,256 and 512-bit */ {10, 10, 12, 24, 48}, /* cost of unaligned stores. */ 14, 14, /* SSE->integer and integer->SSE moves */ + 10, 10, /* Gather load static, per_elt. */ + 10, 10, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ 2048, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1708,6 +1747,8 @@ struct processor_costs pentium4_cost = { in 32,64,128,256 and 512-bit */ {32, 32, 32, 64, 128}, /* cost of unaligned stores. */ 20, 12, /* SSE->integer and integer->SSE moves */ + 16, 16, /* Gather load static, per_elt. */ + 16, 16, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ 256, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1795,6 +1836,8 @@ struct processor_costs nocona_cost = { in 32,64,128,256 and 512-bit */ {24, 24, 24, 48, 96}, /* cost of unaligned stores. */ 20, 12, /* SSE->integer and integer->SSE moves */ + 12, 12, /* Gather load static, per_elt. */ + 12, 12, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ 1024, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1880,6 +1923,8 @@ struct processor_costs atom_cost = { in 32,64,128,256 and 512-bit */ {16, 16, 16, 32, 64}, /* cost of unaligned stores. */ 8, 6, /* SSE->integer and integer->SSE moves */ + 8, 8, /* Gather load static, per_elt. */ + 8, 8, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ 256, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1965,6 +2010,8 @@ struct processor_costs slm_cost = { in 32,64,128,256 and 512-bit */ {16, 16, 16, 32, 64}, /* cost of unaligned stores. */ 8, 6, /* SSE->integer and integer->SSE moves */ + 8, 8, /* Gather load static, per_elt. */ + 8, 8, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ 256, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -2050,6 +2097,8 @@ struct processor_costs intel_cost = { in 32,64,128,256 and 512-bit */ {10, 10, 10, 10, 10}, /* cost of unaligned loads. */ 4, 4, /* SSE->integer and integer->SSE moves */ + 6, 6, /* Gather load static, per_elt. */ + 6, 6, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ 256, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -2142,6 +2191,8 @@ struct processor_costs generic_cost = { in 32,64,128,256 and 512-bit */ {10, 10, 10, 15, 20}, /* cost of unaligned storess. */ 20, 20, /* SSE->integer and integer->SSE moves */ + 6, 6, /* Gather load static, per_elt. */ + 6, 6, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ 512, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -2239,6 +2290,11 @@ struct processor_costs core_cost = { in 32,64,128,256 and 512-bit */ {6, 6, 6, 6, 12}, /* cost of unaligned stores. */ 2, 2, /* SSE->integer and integer->SSE moves */ + /* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops, + rec. throughput 6. + So 5 uops statically and one uops per load. */ + 10, 6, /* Gather load static, per_elt. */ + 10, 6, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ 512, /* size of l2 cache. */ 64, /* size of prefetch block */ -- cgit v1.1 From 733ba9b9a347d8a2ac2dedec820c51481c5c6338 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Wed, 25 Oct 2017 20:52:54 +0000 Subject: [C++ PATCH] Label checking cleanups https://gcc.gnu.org/ml/gcc-patches/2017-10/msg01847.html * decl.c (identifier_goto): Reduce duplication. (check_previous_goto_1): Likewise. (check_goto): Move var decls to initialization. (check_omp_return, define_label_1, define_label): Likewise. From-SVN: r254086 --- gcc/cp/ChangeLog | 7 +++ gcc/cp/decl.c | 149 ++++++++++++++++++++++++++----------------------------- 2 files changed, 76 insertions(+), 80 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index fdb2c9b..9788518 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,10 @@ +2017-10-25 Nathan Sidwell + + * decl.c (identifier_goto): Reduce duplication. + (check_previous_goto_1): Likewise. + (check_goto): Move var decls to initialization. + (check_omp_return, define_label_1, define_label): Likewise. + 2017-10-25 Jakub Jelinek PR libstdc++/81706 diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index c9c3d0a..45c3482 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -3093,8 +3093,9 @@ identify_goto (tree decl, location_t loc, const location_t *locus, diagnostic_t diag_kind) { bool complained - = (decl ? emit_diagnostic (diag_kind, loc, 0, "jump to label %qD", decl) - : emit_diagnostic (diag_kind, loc, 0, "jump to case label")); + = emit_diagnostic (diag_kind, loc, 0, + decl ? "jump to label %qD" : "jump to case label", + decl); if (complained && locus) inform (*locus, " from here"); return complained; @@ -3149,68 +3150,62 @@ check_previous_goto_1 (tree decl, cp_binding_level* level, tree names, " crosses initialization of %q#D", new_decls); else inform (DECL_SOURCE_LOCATION (new_decls), - " enters scope of %q#D which has " + " enters scope of %q#D, which has " "non-trivial destructor", new_decls); } } if (b == level) break; - if ((b->kind == sk_try || b->kind == sk_catch) && !saw_eh) + + const char *inf = NULL; + location_t loc = input_location; + switch (b->kind) { - if (identified < 2) - { - complained = identify_goto (decl, input_location, locus, - DK_ERROR); - identified = 2; - } - if (complained) - { - if (b->kind == sk_try) - inform (input_location, " enters try block"); - else - inform (input_location, " enters catch block"); - } + case sk_try: + if (!saw_eh) + inf = "enters try block"; saw_eh = true; - } - if (b->kind == sk_omp && !saw_omp) - { - if (identified < 2) - { - complained = identify_goto (decl, input_location, locus, - DK_ERROR); - identified = 2; - } - if (complained) - inform (input_location, " enters OpenMP structured block"); + break; + + case sk_catch: + if (!saw_eh) + inf = "enters catch block"; + saw_eh = true; + break; + + case sk_omp: + if (!saw_omp) + inf = "enters OpenMP structured block"; saw_omp = true; - } - if (b->kind == sk_transaction && !saw_tm) - { - if (identified < 2) + break; + + case sk_transaction: + if (!saw_tm) + inf = "enters synchronized or atomic statement"; + saw_tm = true; + break; + + case sk_block: + if (!saw_cxif && level_for_constexpr_if (b->level_chain)) { - complained = identify_goto (decl, input_location, locus, - DK_ERROR); - identified = 2; + inf = "enters constexpr if statement"; + loc = EXPR_LOCATION (b->level_chain->this_entity); + saw_cxif = true; } - if (complained) - inform (input_location, - " enters synchronized or atomic statement"); - saw_tm = true; + break; + + default: + break; } - if (!saw_cxif && b->kind == sk_block - && level_for_constexpr_if (b->level_chain)) + + if (inf) { if (identified < 2) - { - complained = identify_goto (decl, input_location, locus, - DK_ERROR); - identified = 2; - } + complained = identify_goto (decl, input_location, locus, DK_ERROR); + identified = 2; if (complained) - inform (EXPR_LOCATION (b->level_chain->this_entity), - " enters constexpr if statement"); - saw_cxif = true; + inform (loc, " %s", inf); } } @@ -3238,10 +3233,6 @@ void check_goto (tree decl) { struct named_label_entry *ent, dummy; - bool saw_catch = false, complained = false; - int identified = 0; - tree bad; - unsigned ix; /* We can't know where a computed goto is jumping. So we assume that it's OK. */ @@ -3279,6 +3270,11 @@ check_goto (tree decl) return; } + bool saw_catch = false, complained = false; + int identified = 0; + tree bad; + unsigned ix; + if (ent->in_try_scope || ent->in_catch_scope || ent->in_transaction_scope || ent->in_constexpr_if || ent->in_omp_scope || !vec_safe_is_empty (ent->bad_decls)) @@ -3339,27 +3335,24 @@ check_goto (tree decl) inform (input_location, " enters OpenMP structured block"); } else if (flag_openmp) - { - cp_binding_level *b; - for (b = current_binding_level; b ; b = b->level_chain) - { - if (b == ent->binding_level) + for (cp_binding_level *b = current_binding_level; b ; b = b->level_chain) + { + if (b == ent->binding_level) + break; + if (b->kind == sk_omp) + { + if (identified < 2) + { + complained = identify_goto (decl, + DECL_SOURCE_LOCATION (decl), + &input_location, DK_ERROR); + identified = 2; + } + if (complained) + inform (input_location, " exits OpenMP structured block"); break; - if (b->kind == sk_omp) - { - if (identified < 2) - { - complained = identify_goto (decl, - DECL_SOURCE_LOCATION (decl), - &input_location, DK_ERROR); - identified = 2; - } - if (complained) - inform (input_location, " exits OpenMP structured block"); - break; - } - } - } + } + } } /* Check that a return is ok wrt OpenMP structured blocks. @@ -3368,8 +3361,7 @@ check_goto (tree decl) bool check_omp_return (void) { - cp_binding_level *b; - for (b = current_binding_level; b ; b = b->level_chain) + for (cp_binding_level *b = current_binding_level; b ; b = b->level_chain) if (b->kind == sk_omp) { error ("invalid exit from OpenMP structured block"); @@ -3413,8 +3405,6 @@ define_label_1 (location_t location, tree name) } else { - struct named_label_use_entry *use; - /* Mark label as having been defined. */ DECL_INITIAL (decl) = error_mark_node; /* Say where in the source. */ @@ -3423,7 +3413,7 @@ define_label_1 (location_t location, tree name) ent->binding_level = current_binding_level; ent->names_in_scope = current_binding_level->names; - for (use = ent->uses; use ; use = use->next) + for (named_label_use_entry *use = ent->uses; use; use = use->next) check_previous_goto (decl, use); ent->uses = NULL; } @@ -3436,9 +3426,8 @@ define_label_1 (location_t location, tree name) tree define_label (location_t location, tree name) { - tree ret; bool running = timevar_cond_start (TV_NAME_LOOKUP); - ret = define_label_1 (location, name); + tree ret = define_label_1 (location, name); timevar_cond_stop (TV_NAME_LOOKUP, running); return ret; } -- cgit v1.1 From a2af967e9718d3d15461dbc5c52008ceab63a2f0 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Wed, 25 Oct 2017 21:37:33 +0000 Subject: [C++ PATCH] Kill IDENTIFIER_LABEL_VALUE https://gcc.gnu.org/ml/gcc-patches/2017-10/msg01854.html Kill IDENTIFIER_LABEL_VALUE. * cp-tree.h (lang_identifier): Delete label_value slot. (IDENTIFIER_LABEL_VALUE, SET_IDENTIFIER_LABEL_VALUE): Delete. (struct named_label_hasher): Rename to ... (struct named_label_hash): ... here. Reimplement. (struct language_function): Adjust x_named_labels. * name-lookup.h (struct cp_label_binding): Delete. (struct cp_binding_level): Delete shadowed_labels slot. * decl.c (struct named_label_entry): Add name and outer slots. (pop_label): Rename to ... (check_label_used): ... here. Don't pop. (note_label, sort_labels): Delete. (pop_labels, pop_local_label): Reimplement. (poplevel): Pop local labels as any other decl. Remove shadowed_labels handling. (named_label_hash::hash, named_label_hash::equal): New. (make_label_decl): Absorb into ... (lookup_label_1): ... here. Add making_local_p arg, reimplement. (lookup_label, declare_local_label): Adjust. (check_goto, define_label): Adjust. * lex.c (make_conv_op_name): Don't clear IDENTIFIER_LABEL_VALUE. * ptree.c (cxx_print_identifier): Don't print identifier binding. From-SVN: r254087 --- gcc/cp/ChangeLog | 23 +++++ gcc/cp/cp-tree.h | 27 ++--- gcc/cp/decl.c | 272 ++++++++++++++++++++++----------------------------- gcc/cp/lex.c | 1 - gcc/cp/name-lookup.h | 13 --- gcc/cp/ptree.c | 1 - 6 files changed, 155 insertions(+), 182 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 9788518..8177664 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,5 +1,28 @@ 2017-10-25 Nathan Sidwell + Kill IDENTIFIER_LABEL_VALUE. + * cp-tree.h (lang_identifier): Delete label_value slot. + (IDENTIFIER_LABEL_VALUE, SET_IDENTIFIER_LABEL_VALUE): Delete. + (struct named_label_hasher): Rename to ... + (struct named_label_hash): ... here. Reimplement. + (struct language_function): Adjust x_named_labels. + * name-lookup.h (struct cp_label_binding): Delete. + (struct cp_binding_level): Delete shadowed_labels slot. + * decl.c (struct named_label_entry): Add name and outer slots. + (pop_label): Rename to ... + (check_label_used): ... here. Don't pop. + (note_label, sort_labels): Delete. + (pop_labels, pop_local_label): Reimplement. + (poplevel): Pop local labels as any other decl. Remove + shadowed_labels handling. + (named_label_hash::hash, named_label_hash::equal): New. + (make_label_decl): Absorb into ... + (lookup_label_1): ... here. Add making_local_p arg, reimplement. + (lookup_label, declare_local_label): Adjust. + (check_goto, define_label): Adjust. + * lex.c (make_conv_op_name): Don't clear IDENTIFIER_LABEL_VALUE. + * ptree.c (cxx_print_identifier): Don't print identifier binding. + * decl.c (identifier_goto): Reduce duplication. (check_previous_goto_1): Likewise. (check_goto): Move var decls to initialization. diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index b74b6d9..f2570b003 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -561,7 +561,6 @@ extern GTY(()) tree cp_global_trees[CPTI_MAX]; struct GTY(()) lang_identifier { struct c_common_identifier c_common; cxx_binding *bindings; - tree label_value; }; /* Return a typed pointer version of T if it designates a @@ -996,11 +995,6 @@ enum GTY(()) abstract_class_use { #define SET_IDENTIFIER_TYPE_VALUE(NODE,TYPE) (TREE_TYPE (NODE) = (TYPE)) #define IDENTIFIER_HAS_TYPE_VALUE(NODE) (IDENTIFIER_TYPE_VALUE (NODE) ? 1 : 0) -#define IDENTIFIER_LABEL_VALUE(NODE) \ - (LANG_IDENTIFIER_CAST (NODE)->label_value) -#define SET_IDENTIFIER_LABEL_VALUE(NODE, VALUE) \ - IDENTIFIER_LABEL_VALUE (NODE) = (VALUE) - /* Kinds of identifiers. Values are carefully chosen. */ enum cp_identifier_kind { cik_normal = 0, /* Not a special identifier. */ @@ -1662,12 +1656,22 @@ struct cxx_int_tree_map_hasher : ggc_ptr_hash static bool equal (cxx_int_tree_map *, cxx_int_tree_map *); }; -struct named_label_entry; +struct named_label_entry; /* Defined in decl.c. */ -struct named_label_hasher : ggc_ptr_hash +struct named_label_hash : ggc_remove { - static hashval_t hash (named_label_entry *); - static bool equal (named_label_entry *, named_label_entry *); + typedef named_label_entry *value_type; + typedef tree compare_type; /* An identifier. */ + + inline static hashval_t hash (value_type); + inline static bool equal (const value_type, compare_type); + + inline static void mark_empty (value_type &p) {p = NULL;} + inline static bool is_empty (value_type p) {return !p;} + + /* Nothing is deletable. Everything is insertable. */ + inline static bool is_deleted (value_type) { return false; } + inline static void mark_deleted (value_type) { gcc_unreachable (); } }; /* Global state pertinent to the current function. */ @@ -1696,7 +1700,8 @@ struct GTY(()) language_function { BOOL_BITFIELD invalid_constexpr : 1; - hash_table *x_named_labels; + hash_table *x_named_labels; + cp_binding_level *bindings; vec *x_local_names; /* Tracking possibly infinite loops. This is a vec only because diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 45c3482..42b5274 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -189,27 +189,33 @@ struct GTY((chain_next ("%h.next"))) named_label_use_entry { function, and so we can check the validity of jumps to these labels. */ struct GTY((for_user)) named_label_entry { - /* The decl itself. */ - tree label_decl; + + tree name; /* Name of decl. */ + + tree label_decl; /* LABEL_DECL, unless deleted local label. */ + + named_label_entry *outer; /* Outer shadowed chain. */ /* The binding level to which the label is *currently* attached. This is initially set to the binding level in which the label is defined, but is modified as scopes are closed. */ cp_binding_level *binding_level; + /* The head of the names list that was current when the label was defined, or the inner scope popped. These are the decls that will be skipped when jumping to the label. */ tree names_in_scope; + /* A vector of all decls from all binding levels that would be crossed by a backward branch to the label. */ vec *bad_decls; /* A list of uses of the label, before the label is defined. */ - struct named_label_use_entry *uses; + named_label_use_entry *uses; /* The following bits are set after the label is defined, and are - updated as scopes are popped. They indicate that a backward jump - to the label will illegally enter a scope of the given flavor. */ + updated as scopes are popped. They indicate that a jump to the + label will illegally enter a scope of the given flavor. */ bool in_try_scope; bool in_catch_scope; bool in_omp_scope; @@ -347,7 +353,7 @@ finish_scope (void) in a valid manner, and issue any appropriate warnings or errors. */ static void -pop_label (tree label, tree old_value) +check_label_used (tree label) { if (!processing_template_decl) { @@ -364,32 +370,6 @@ pop_label (tree label, tree old_value) else warn_for_unused_label (label); } - - SET_IDENTIFIER_LABEL_VALUE (DECL_NAME (label), old_value); -} - -/* Push all named labels into a vector, so that we can sort it on DECL_UID - to avoid code generation differences. */ - -int -note_label (named_label_entry **slot, vec &labels) -{ - labels.quick_push (slot); - return 1; -} - -/* Helper function to sort named label entries in a vector by DECL_UID. */ - -static int -sort_labels (const void *a, const void *b) -{ - named_label_entry **slot1 = *(named_label_entry **const *) a; - named_label_entry **slot2 = *(named_label_entry **const *) b; - if (DECL_UID ((*slot1)->label_decl) < DECL_UID ((*slot2)->label_decl)) - return -1; - if (DECL_UID ((*slot1)->label_decl) > DECL_UID ((*slot2)->label_decl)) - return 1; - return 0; } /* At the end of a function, all labels declared within the function @@ -399,46 +379,49 @@ sort_labels (const void *a, const void *b) static void pop_labels (tree block) { - if (named_labels) + if (!named_labels) + return; + + hash_table::iterator end (named_labels->end ()); + for (hash_table::iterator iter + (named_labels->begin ()); iter != end; ++iter) { - auto_vec labels; - named_label_entry **slot; - unsigned int i; + named_label_entry *ent = *iter; - /* Push all the labels into a vector and sort them by DECL_UID, - so that gaps between DECL_UIDs don't affect code generation. */ - labels.reserve_exact (named_labels->elements ()); - named_labels->traverse &, note_label> (labels); - labels.qsort (sort_labels); - FOR_EACH_VEC_ELT (labels, i, slot) + gcc_checking_assert (!ent->outer); + if (ent->label_decl) { - struct named_label_entry *ent = *slot; - - pop_label (ent->label_decl, NULL_TREE); + check_label_used (ent->label_decl); /* Put the labels into the "variables" of the top-level block, so debugger can see them. */ DECL_CHAIN (ent->label_decl) = BLOCK_VARS (block); BLOCK_VARS (block) = ent->label_decl; - - named_labels->clear_slot (slot); } - named_labels = NULL; + ggc_free (ent); } + + named_labels = NULL; } /* At the end of a block with local labels, restore the outer definition. */ static void -pop_local_label (tree label, tree old_value) +pop_local_label (tree id, tree label) { - struct named_label_entry dummy; - - pop_label (label, old_value); + check_label_used (label); + named_label_entry **slot = named_labels->find_slot_with_hash + (id, IDENTIFIER_HASH_VALUE (id), NO_INSERT); + named_label_entry *ent = *slot; - dummy.label_decl = label; - named_label_entry **slot = named_labels->find_slot (&dummy, NO_INSERT); - named_labels->clear_slot (slot); + if (ent->outer) + ent = ent->outer; + else + { + ent = ggc_cleared_alloc (); + ent->name = id; + } + *slot = ent; } /* The following two routines are used to interface to Objective-C++. @@ -579,7 +562,6 @@ poplevel (int keep, int reverse, int functionbody) int leaving_for_scope; scope_kind kind; unsigned ix; - cp_label_binding *label_bind; bool subtime = timevar_cond_start (TV_NAME_LOOKUP); restart: @@ -613,11 +595,12 @@ poplevel (int keep, int reverse, int functionbody) Usually current_binding_level->names is in reverse order. But parameter decls were previously put in forward order. */ + decls = current_binding_level->names; if (reverse) - current_binding_level->names - = decls = nreverse (current_binding_level->names); - else - decls = current_binding_level->names; + { + decls = nreverse (decls); + current_binding_level->names = decls; + } /* If there were any declarations or structure tags in that level, or if this level is a function body, @@ -770,7 +753,10 @@ poplevel (int keep, int reverse, int functionbody) } } /* Remove the binding. */ - pop_local_binding (name, decl); + if (TREE_CODE (decl) == LABEL_DECL) + pop_local_label (name, decl); + else + pop_local_binding (name, decl); } /* Remove declarations for any `for' variables from inner scopes @@ -784,11 +770,6 @@ poplevel (int keep, int reverse, int functionbody) link; link = TREE_CHAIN (link)) SET_IDENTIFIER_TYPE_VALUE (TREE_PURPOSE (link), TREE_VALUE (link)); - /* Restore the IDENTIFIER_LABEL_VALUEs for local labels. */ - FOR_EACH_VEC_SAFE_ELT_REVERSE (current_binding_level->shadowed_labels, - ix, label_bind) - pop_local_label (label_bind->label, label_bind->prev_value); - /* There may be OVERLOADs (wrapped in TREE_LISTs) on the BLOCK_VARs list if a `using' declaration put them there. The debugging back ends won't understand OVERLOAD, so we remove them here. @@ -2949,81 +2930,83 @@ redeclaration_error_message (tree newdecl, tree olddecl) } } + /* Hash and equality functions for the named_label table. */ hashval_t -named_label_hasher::hash (named_label_entry *ent) +named_label_hash::hash (const value_type entry) { - return DECL_UID (ent->label_decl); + return IDENTIFIER_HASH_VALUE (entry->name); } bool -named_label_hasher::equal (named_label_entry *a, named_label_entry *b) +named_label_hash::equal (const value_type entry, compare_type name) { - return a->label_decl == b->label_decl; + return name == entry->name; } -/* Create a new label, named ID. */ +/* Look for a label named ID in the current function. If one cannot + be found, create one. Return the named_label_entry, or NULL on + failure. */ -static tree -make_label_decl (tree id, int local_p) +static named_label_entry * +lookup_label_1 (tree id, bool making_local_p) { - struct named_label_entry *ent; - tree decl; - - decl = build_decl (input_location, LABEL_DECL, id, void_type_node); - - DECL_CONTEXT (decl) = current_function_decl; - SET_DECL_MODE (decl, VOIDmode); - C_DECLARED_LABEL_FLAG (decl) = local_p; - - /* Say where one reference is to the label, for the sake of the - error if it is not defined. */ - DECL_SOURCE_LOCATION (decl) = input_location; - - /* Record the fact that this identifier is bound to this label. */ - SET_IDENTIFIER_LABEL_VALUE (id, decl); + /* You can't use labels at global scope. */ + if (current_function_decl == NULL_TREE) + { + error ("label %qE referenced outside of any function", id); + return NULL; + } - /* Create the label htab for the function on demand. */ if (!named_labels) - named_labels = hash_table::create_ggc (13); + named_labels = hash_table::create_ggc (13); - /* Record this label on the list of labels used in this function. - We do this before calling make_label_decl so that we get the - IDENTIFIER_LABEL_VALUE before the new label is declared. */ - ent = ggc_cleared_alloc (); - ent->label_decl = decl; - - named_label_entry **slot = named_labels->find_slot (ent, INSERT); - gcc_assert (*slot == NULL); - *slot = ent; + hashval_t hash = IDENTIFIER_HASH_VALUE (id); + named_label_entry **slot + = named_labels->find_slot_with_hash (id, hash, INSERT); + named_label_entry *old = *slot; + + if (old && old->label_decl) + { + if (!making_local_p) + return old; - return decl; -} + if (old->binding_level == current_binding_level) + { + error ("local label %qE conflicts with existing label", id); + inform (DECL_SOURCE_LOCATION (old->label_decl), "previous label"); + return NULL; + } + } -/* Look for a label named ID in the current function. If one cannot - be found, create one. (We keep track of used, but undefined, - labels, and complain about them at the end of a function.) */ + /* We are making a new decl, create or reuse the named_label_entry */ + named_label_entry *ent = NULL; + if (old && !old->label_decl) + ent = old; + else + { + ent = ggc_cleared_alloc (); + ent->name = id; + ent->outer = old; + *slot = ent; + } -static tree -lookup_label_1 (tree id) -{ - tree decl; + /* Now create the LABEL_DECL. */ + tree decl = build_decl (input_location, LABEL_DECL, id, void_type_node); - /* You can't use labels at global scope. */ - if (current_function_decl == NULL_TREE) + DECL_CONTEXT (decl) = current_function_decl; + SET_DECL_MODE (decl, VOIDmode); + if (making_local_p) { - error ("label %qE referenced outside of any function", id); - return NULL_TREE; + C_DECLARED_LABEL_FLAG (decl) = true; + DECL_CHAIN (decl) = current_binding_level->names; + current_binding_level->names = decl; } - /* See if we've already got this label. */ - decl = IDENTIFIER_LABEL_VALUE (id); - if (decl != NULL_TREE && DECL_CONTEXT (decl) == current_function_decl) - return decl; + ent->label_decl = decl; - decl = make_label_decl (id, /*local_p=*/0); - return decl; + return ent; } /* Wrapper for lookup_label_1. */ @@ -3031,30 +3014,19 @@ lookup_label_1 (tree id) tree lookup_label (tree id) { - tree ret; bool subtime = timevar_cond_start (TV_NAME_LOOKUP); - ret = lookup_label_1 (id); + named_label_entry *ent = lookup_label_1 (id, false); timevar_cond_stop (TV_NAME_LOOKUP, subtime); - return ret; + return ent ? ent->label_decl : NULL_TREE; } -/* Declare a local label named ID. */ - tree declare_local_label (tree id) { - tree decl; - cp_label_binding bind; - - /* Add a new entry to the SHADOWED_LABELS list so that when we leave - this scope we can restore the old value of IDENTIFIER_TYPE_VALUE. */ - bind.prev_value = IDENTIFIER_LABEL_VALUE (id); - - decl = make_label_decl (id, /*local_p=*/1); - bind.label = decl; - vec_safe_push (current_binding_level->shadowed_labels, bind); - - return decl; + bool subtime = timevar_cond_start (TV_NAME_LOOKUP); + named_label_entry *ent = lookup_label_1 (id, true); + timevar_cond_stop (TV_NAME_LOOKUP, subtime); + return ent ? ent->label_decl : NULL_TREE; } /* Returns nonzero if it is ill-formed to jump past the declaration of @@ -3232,8 +3204,6 @@ check_switch_goto (cp_binding_level* level) void check_goto (tree decl) { - struct named_label_entry *ent, dummy; - /* We can't know where a computed goto is jumping. So we assume that it's OK. */ if (TREE_CODE (decl) != LABEL_DECL) @@ -3244,22 +3214,22 @@ check_goto (tree decl) if (decl == cdtor_label) return; - dummy.label_decl = decl; - ent = named_labels->find (&dummy); - gcc_assert (ent != NULL); + hashval_t hash = IDENTIFIER_HASH_VALUE (DECL_NAME (decl)); + named_label_entry **slot + = named_labels->find_slot_with_hash (DECL_NAME (decl), hash, NO_INSERT); + named_label_entry *ent = *slot; /* If the label hasn't been defined yet, defer checking. */ if (! DECL_INITIAL (decl)) { - struct named_label_use_entry *new_use; - /* Don't bother creating another use if the last goto had the same data, and will therefore create the same set of errors. */ if (ent->uses && ent->uses->names_in_scope == current_binding_level->names) return; - new_use = ggc_alloc (); + named_label_use_entry *new_use + = ggc_alloc (); new_use->binding_level = current_binding_level; new_use->names_in_scope = current_binding_level->names; new_use->o_goto_locus = input_location; @@ -3378,25 +3348,15 @@ check_omp_return (void) static tree define_label_1 (location_t location, tree name) { - struct named_label_entry *ent, dummy; - cp_binding_level *p; - tree decl; - - decl = lookup_label (name); - - dummy.label_decl = decl; - ent = named_labels->find (&dummy); - gcc_assert (ent != NULL); - /* After labels, make any new cleanups in the function go into their own new (temporary) binding contour. */ - for (p = current_binding_level; + for (cp_binding_level *p = current_binding_level; p->kind != sk_function_parms; p = p->level_chain) p->more_cleanups_ok = 0; - if (name == get_identifier ("wchar_t")) - permerror (input_location, "label named wchar_t"); + named_label_entry *ent = lookup_label_1 (name, false); + tree decl = ent->label_decl; if (DECL_INITIAL (decl) != NULL_TREE) { diff --git a/gcc/cp/lex.c b/gcc/cp/lex.c index fd93401..da9187d 100644 --- a/gcc/cp/lex.c +++ b/gcc/cp/lex.c @@ -585,7 +585,6 @@ make_conv_op_name (tree type) /* Just in case something managed to bind. */ IDENTIFIER_BINDING (identifier) = NULL; - IDENTIFIER_LABEL_VALUE (identifier) = NULL_TREE; /* Hang TYPE off the identifier so it can be found easily later when performing conversions. */ diff --git a/gcc/cp/name-lookup.h b/gcc/cp/name-lookup.h index bf0bf85..1fc1280 100644 --- a/gcc/cp/name-lookup.h +++ b/gcc/cp/name-lookup.h @@ -148,15 +148,6 @@ struct GTY(()) cp_class_binding { tree identifier; }; - -struct GTY(()) cp_label_binding { - /* The bound LABEL_DECL. */ - tree label; - /* The previous IDENTIFIER_LABEL_VALUE. */ - tree prev_value; -}; - - /* For each binding contour we allocate a binding_level structure which records the names defined in that contour. Contours include: @@ -202,10 +193,6 @@ struct GTY(()) cp_binding_level { the class. */ tree type_shadowed; - /* Similar to class_shadowed, but for IDENTIFIER_LABEL_VALUE, and - used for all binding levels. */ - vec *shadowed_labels; - /* For each level (except not the global one), a chain of BLOCK nodes for all the levels that were entered and exited one level down. */ diff --git a/gcc/cp/ptree.c b/gcc/cp/ptree.c index 50c717e..90bae2a 100644 --- a/gcc/cp/ptree.c +++ b/gcc/cp/ptree.c @@ -177,7 +177,6 @@ cxx_print_identifier (FILE *file, tree node, int indent) indent_to (file, indent + 4); fprintf (file, "%s local bindings <%p>", get_identifier_kind_name (node), (void *) IDENTIFIER_BINDING (node)); - print_node (file, "label", IDENTIFIER_LABEL_VALUE (node), indent + 4); } void -- cgit v1.1 From c0b24017eb8fee727bf1244e269e4b6b435e7952 Mon Sep 17 00:00:00 2001 From: Eric Botcazou Date: Wed, 25 Oct 2017 21:53:21 +0000 Subject: re PR middle-end/82062 (simple conditional expressions no longer folded) PR middle-end/82062 * fold-const.c (operand_equal_for_comparison_p): Also return true if ARG0 is a simple variant of ARG1 with narrower precision. (fold_ternary_loc): Always pass unstripped operands to the predicate. From-SVN: r254089 --- gcc/ChangeLog | 9 ++++++- gcc/fold-const.c | 24 ++++++++++++----- gcc/testsuite/ChangeLog | 6 +++++ gcc/testsuite/gcc.dg/fold-cond-2.c | 47 +++++++++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/fold-cond-3.c | 35 ++++++++++++++++++++++++ gcc/testsuite/gcc.dg/fold-cond_expr-1.c | 47 --------------------------------- 6 files changed, 114 insertions(+), 54 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/fold-cond-2.c create mode 100644 gcc/testsuite/gcc.dg/fold-cond-3.c delete mode 100644 gcc/testsuite/gcc.dg/fold-cond_expr-1.c (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5985d9e..796a7e8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,4 +1,11 @@ -2017-10-23 Jan Hubicka +2017-10-25 Eric Botcazou + + PR middle-end/82062 + * fold-const.c (operand_equal_for_comparison_p): Also return true + if ARG0 is a simple variant of ARG1 with narrower precision. + (fold_ternary_loc): Always pass unstripped operands to the predicate. + +2017-10-25 Jan Hubicka * i386.c (ix86_builtin_vectorization_cost): Compute scatter/gather cost correctly. diff --git a/gcc/fold-const.c b/gcc/fold-const.c index c16959b..9db5aeb 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -3366,7 +3366,8 @@ operand_equal_p (const_tree arg0, const_tree arg1, unsigned int flags) #undef OP_SAME_WITH_NULL } -/* Similar to operand_equal_p, but strip nops first. */ +/* Similar to operand_equal_p, but see if ARG0 might be a variant of ARG1 + with a different signedness or a narrower precision. */ static bool operand_equal_for_comparison_p (tree arg0, tree arg1) @@ -3381,9 +3382,20 @@ operand_equal_for_comparison_p (tree arg0, tree arg1) /* Discard any conversions that don't change the modes of ARG0 and ARG1 and see if the inner values are the same. This removes any signedness comparison, which doesn't matter here. */ - STRIP_NOPS (arg0); - STRIP_NOPS (arg1); - if (operand_equal_p (arg0, arg1, 0)) + tree op0 = arg0; + tree op1 = arg1; + STRIP_NOPS (op0); + STRIP_NOPS (op1); + if (operand_equal_p (op0, op1, 0)) + return true; + + /* Discard a single widening conversion from ARG1 and see if the inner + value is the same as ARG0. */ + if (CONVERT_EXPR_P (arg1) + && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (arg1, 0))) + && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (arg1, 0))) + < TYPE_PRECISION (TREE_TYPE (arg1)) + && operand_equal_p (arg0, TREE_OPERAND (arg1, 0), 0)) return true; return false; @@ -11169,8 +11181,8 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, Also try swapping the arguments and inverting the conditional. */ if (COMPARISON_CLASS_P (arg0) - && operand_equal_for_comparison_p (TREE_OPERAND (arg0, 0), arg1) - && !HONOR_SIGNED_ZEROS (element_mode (arg1))) + && operand_equal_for_comparison_p (TREE_OPERAND (arg0, 0), op1) + && !HONOR_SIGNED_ZEROS (element_mode (op1))) { tem = fold_cond_expr_with_comparison (loc, type, arg0, op1, op2); if (tem) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 0e38439..443e7c7 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2017-10-25 Eric Botcazou + + * gcc.dg/fold-cond_expr-1.c: Rename to... + * gcc.dg/fold-cond-2.c: ...this. + * gcc.dg/fold-cond-3.c: New test. + 2017-10-25 Richard Biener PR tree-optimization/82436 diff --git a/gcc/testsuite/gcc.dg/fold-cond-2.c b/gcc/testsuite/gcc.dg/fold-cond-2.c new file mode 100644 index 0000000..68ec754 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-cond-2.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int +min1 (signed char op1, signed char op2) +{ + return (op1 < 25) ? (int)op1 : 24; +} +int +min2 (signed char op1, signed char op2) +{ + return (op1 <= 24) ? (int)op1 : 25; +} +int +min3 (unsigned char op1, unsigned char op2) +{ + return (op1 < 25) ? (unsigned int)op1 : 24; +} +int +min4 (unsigned char op1, unsigned char op2) +{ + return (op1 <= 24) ? (unsigned int)op1 : 25; +} +int +max1 (signed char op1, signed char op2) +{ + return (op1 > 24) ? (int)op1 : 25; +} +int +max2 (signed char op1, signed char op2) +{ + return (op1 >= 25) ? (int)op1 : 24; +} +int +max3 (unsigned char op1, unsigned char op2) +{ + return (op1 > 24) ? (unsigned int)op1 : 25; +} +int +max4 (unsigned char op1, unsigned char op2) +{ + return (op1 >= 25) ? (unsigned int)op1 : 24; +} + +/* { dg-final { scan-tree-dump-times "MIN_EXPR" 4 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "MAX_EXPR" 4 "optimized" } } */ + diff --git a/gcc/testsuite/gcc.dg/fold-cond-3.c b/gcc/testsuite/gcc.dg/fold-cond-3.c new file mode 100644 index 0000000..fe0ba65 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-cond-3.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-fdump-tree-original" } */ + +unsigned long f1 (int x) +{ + return x > 0 ? (unsigned long) x : 0; +} + +unsigned long f2 (int x, int y) +{ + return x > y ? (unsigned long) x : (unsigned long) y; +} + +unsigned long f3 (int x) +{ + return x < 0 ? (unsigned long) x : 0; +} + +unsigned long f4 (int x, int y) +{ + return x < y ? (unsigned long) x : (unsigned long) y; +} + +unsigned long f5 (unsigned int x, unsigned int y) +{ + return x > y ? (unsigned long) x : (unsigned long) y; +} + +unsigned long f6 (unsigned int x, unsigned int y) +{ + return x < y ? (unsigned long) x : (unsigned long) y; +} + +/* { dg-final { scan-tree-dump-times "MAX_EXPR" 3 "original"} } */ +/* { dg-final { scan-tree-dump-times "MIN_EXPR" 3 "original"} } */ diff --git a/gcc/testsuite/gcc.dg/fold-cond_expr-1.c b/gcc/testsuite/gcc.dg/fold-cond_expr-1.c deleted file mode 100644 index 68ec754..0000000 --- a/gcc/testsuite/gcc.dg/fold-cond_expr-1.c +++ /dev/null @@ -1,47 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-optimized" } */ - -int -min1 (signed char op1, signed char op2) -{ - return (op1 < 25) ? (int)op1 : 24; -} -int -min2 (signed char op1, signed char op2) -{ - return (op1 <= 24) ? (int)op1 : 25; -} -int -min3 (unsigned char op1, unsigned char op2) -{ - return (op1 < 25) ? (unsigned int)op1 : 24; -} -int -min4 (unsigned char op1, unsigned char op2) -{ - return (op1 <= 24) ? (unsigned int)op1 : 25; -} -int -max1 (signed char op1, signed char op2) -{ - return (op1 > 24) ? (int)op1 : 25; -} -int -max2 (signed char op1, signed char op2) -{ - return (op1 >= 25) ? (int)op1 : 24; -} -int -max3 (unsigned char op1, unsigned char op2) -{ - return (op1 > 24) ? (unsigned int)op1 : 25; -} -int -max4 (unsigned char op1, unsigned char op2) -{ - return (op1 >= 25) ? (unsigned int)op1 : 24; -} - -/* { dg-final { scan-tree-dump-times "MIN_EXPR" 4 "optimized" } } */ -/* { dg-final { scan-tree-dump-times "MAX_EXPR" 4 "optimized" } } */ - -- cgit v1.1 From 0b661358bcd72a70bbf4b903db1f0f8de98a6bbd Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Wed, 25 Oct 2017 22:45:55 +0000 Subject: RISC-V: Add Sign/Zero extend patterns for PIC loads Loads on RISC-V are sign-extending by default, but we weren't telling GCC this in our PIC load patterns. This corrects the problem, and adds a zero-extending pattern as well. gcc/ChangeLog 2017-10-25 Palmer Dabbelt * config/riscv/riscv.md (ZERO_EXTEND_LOAD): Define. * config/riscv/pic.md (local_pic_load): Rename to local_pic_load_s, mark as a sign-extending load. (local_pic_load_u): Define. From-SVN: r254092 --- gcc/ChangeLog | 7 +++++++ gcc/config/riscv/pic.md | 11 +++++++++-- gcc/config/riscv/riscv.md | 3 +++ 3 files changed, 19 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 796a7e8..5e297fb 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2017-10-25 Palmer Dabbelt + + * config/riscv/riscv.md (ZERO_EXTEND_LOAD): Define. + * config/riscv/pic.md (local_pic_load): Rename to local_pic_load_s, + mark as a sign-extending load. + (local_pic_load_u): Define. + 2017-10-25 Eric Botcazou PR middle-end/82062 diff --git a/gcc/config/riscv/pic.md b/gcc/config/riscv/pic.md index 6a29ead..03b8f9b 100644 --- a/gcc/config/riscv/pic.md +++ b/gcc/config/riscv/pic.md @@ -22,13 +22,20 @@ ;; Simplify PIC loads to static variables. ;; These should go away once we figure out how to emit auipc discretely. -(define_insn "*local_pic_load" +(define_insn "*local_pic_load_s" [(set (match_operand:ANYI 0 "register_operand" "=r") - (mem:ANYI (match_operand 1 "absolute_symbolic_operand" "")))] + (sign_extend:ANYI (mem:ANYI (match_operand 1 "absolute_symbolic_operand" ""))))] "USE_LOAD_ADDRESS_MACRO (operands[1])" "\t%0,%1" [(set (attr "length") (const_int 8))]) +(define_insn "*local_pic_load_u" + [(set (match_operand:ZERO_EXTEND_LOAD 0 "register_operand" "=r") + (zero_extend:ZERO_EXTEND_LOAD (mem:ZERO_EXTEND_LOAD (match_operand 1 "absolute_symbolic_operand" ""))))] + "USE_LOAD_ADDRESS_MACRO (operands[1])" + "u\t%0,%1" + [(set (attr "length") (const_int 8))]) + (define_insn "*local_pic_load" [(set (match_operand:ANYF 0 "register_operand" "=f") (mem:ANYF (match_operand 1 "absolute_symbolic_operand" ""))) diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index fd9236c..9f056bb 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -259,6 +259,9 @@ ;; Iterator for QImode extension patterns. (define_mode_iterator SUPERQI [HI SI (DI "TARGET_64BIT")]) +;; Iterator for extending loads. +(define_mode_iterator ZERO_EXTEND_LOAD [QI HI (SI "TARGET_64BIT")]) + ;; Iterator for hardware integer modes narrower than XLEN. (define_mode_iterator SUBX [QI HI (SI "TARGET_64BIT")]) -- cgit v1.1 From 1a59ccf25df49415490423382d31db15fa9c7796 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Wed, 25 Oct 2017 23:53:41 +0000 Subject: C: detect more missing semicolons (PR c/7356) c_parser_declaration_or_fndef has logic for parsing what might be either a declaration or a function definition. This patch adds a test to detect cases where a semicolon would have terminated the decls as a declaration, where the token that follows would start a new declaration specifier, and updates the error message accordingly, with a fix-it hint. This addresses PR c/7356, fixing the case of a stray token before a #include that previously gave inscrutable output, and improving e.g.: int i int j; from: t.c:2:1: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'int' int j; ^~~ to: t.c:1:6: error: expected ';' before 'int' int i ^ ; int j; ~~~ gcc.dg/noncompile/920923-1.c needs a slight update, as the output for the first line changes from: 920923-1.c:2:14: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'unsigned' typedef BYTE unsigned char; /* { dg-error "expected" } */ ^~~~~~~~ to: 920923-1.c:2:13: error: expected ';' before 'unsigned' typedef BYTE unsigned char; /* { dg-error "expected" } */ ^~~~~~~~~ ; 920923-1.c:2:1: warning: useless type name in empty declaration typedef BYTE unsigned char; /* { dg-error "expected" } */ ^~~~~~~ The patch also adds a test for PR c/44515 as a baseline. gcc/c/ChangeLog: PR c/7356 * c-parser.c (c_parser_declaration_or_fndef): Detect missing semicolons. gcc/testsuite/ChangeLog: PR c/7356 PR c/44515 * c-c++-common/pr44515.c: New test case. * gcc.dg/pr7356-2.c: New test case. * gcc.dg/pr7356.c: New test case. * gcc.dg/spellcheck-typenames.c: Update the "singed" char "TODO" case to reflect changes to output. * gcc.dg/noncompile/920923-1.c: Add dg-warning to reflect changes to output. From-SVN: r254093 --- gcc/c/ChangeLog | 6 +++++ gcc/c/c-parser.c | 36 +++++++++++++++++++++++++---- gcc/testsuite/ChangeLog | 12 ++++++++++ gcc/testsuite/c-c++-common/pr44515.c | 14 +++++++++++ gcc/testsuite/gcc.dg/noncompile/920923-1.c | 1 + gcc/testsuite/gcc.dg/pr7356-2.c | 33 ++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/pr7356.c | 17 ++++++++++++++ gcc/testsuite/gcc.dg/spellcheck-typenames.c | 5 ++-- 8 files changed, 117 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/pr44515.c create mode 100644 gcc/testsuite/gcc.dg/pr7356-2.c create mode 100644 gcc/testsuite/gcc.dg/pr7356.c (limited to 'gcc') diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index b4fde0d..5d028b4 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,9 @@ +2017-10-25 David Malcolm + + PR c/7356 + * c-parser.c (c_parser_declaration_or_fndef): Detect missing + semicolons. + 2017-10-25 Jakub Jelinek PR libstdc++/81706 diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index 6b84324..68c45fd 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -2241,11 +2241,37 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok, } if (!start_function (specs, declarator, all_prefix_attrs)) { - /* This can appear in many cases looking nothing like a - function definition, so we don't give a more specific - error suggesting there was one. */ - c_parser_error (parser, "expected %<=%>, %<,%>, %<;%>, % " - "or %<__attribute__%>"); + /* At this point we've consumed: + declaration-specifiers declarator + and the next token isn't CPP_EQ, CPP_COMMA, CPP_SEMICOLON, + RID_ASM, RID_ATTRIBUTE, or RID_IN, + but the + declaration-specifiers declarator + aren't grokkable as a function definition, so we have + an error. */ + gcc_assert (!c_parser_next_token_is (parser, CPP_SEMICOLON)); + if (c_parser_next_token_starts_declspecs (parser)) + { + /* If we have + declaration-specifiers declarator decl-specs + then assume we have a missing semicolon, which would + give us: + declaration-specifiers declarator decl-specs + ^ + ; + <~~~~~~~~~ declaration ~~~~~~~~~~> + Use c_parser_require to get an error with a fix-it hint. */ + c_parser_require (parser, CPP_SEMICOLON, "expected %<;%>"); + parser->error = false; + } + else + { + /* This can appear in many cases looking nothing like a + function definition, so we don't give a more specific + error suggesting there was one. */ + c_parser_error (parser, "expected %<=%>, %<,%>, %<;%>, % " + "or %<__attribute__%>"); + } if (nested) c_pop_function_context (); break; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 443e7c7..e2f7e4d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,15 @@ +2017-10-25 David Malcolm + + PR c/7356 + PR c/44515 + * c-c++-common/pr44515.c: New test case. + * gcc.dg/pr7356-2.c: New test case. + * gcc.dg/pr7356.c: New test case. + * gcc.dg/spellcheck-typenames.c: Update the "singed" char "TODO" + case to reflect changes to output. + * gcc.dg/noncompile/920923-1.c: Add dg-warning to reflect changes + to output. + 2017-10-25 Eric Botcazou * gcc.dg/fold-cond_expr-1.c: Rename to... diff --git a/gcc/testsuite/c-c++-common/pr44515.c b/gcc/testsuite/c-c++-common/pr44515.c new file mode 100644 index 0000000..dbb77509 --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr44515.c @@ -0,0 +1,14 @@ +/* { dg-options "-fdiagnostics-show-caret" } */ + +void bar(void); +void foo(void) +{ + bar() /* { dg-error "expected ';' before '.' token" } */ +} +/* { dg-begin-multiline-output "" } + bar() + ^ + ; + } + ~ + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/noncompile/920923-1.c b/gcc/testsuite/gcc.dg/noncompile/920923-1.c index 1cb140e..006a071 100644 --- a/gcc/testsuite/gcc.dg/noncompile/920923-1.c +++ b/gcc/testsuite/gcc.dg/noncompile/920923-1.c @@ -1,5 +1,6 @@ /* { dg-message "undeclared identifier is reported only once" "reminder for mmu_base" { target *-*-* } 0 } */ typedef BYTE unsigned char; /* { dg-error "expected" } */ +/* { dg-warning "useless type name in empty declaration" "" { target *-*-* } .-1 } */ typedef int item_n; typedef int perm_set; struct PENT { caddr_t v_addr; };/* { dg-error "unknown type name" } */ diff --git a/gcc/testsuite/gcc.dg/pr7356-2.c b/gcc/testsuite/gcc.dg/pr7356-2.c new file mode 100644 index 0000000..ad67975 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr7356-2.c @@ -0,0 +1,33 @@ +/* { dg-options "-fdiagnostics-show-caret" } */ + +int i /* { dg-error "6: expected ';' before 'int'" } */ +int j; +/* { dg-begin-multiline-output "" } + int i + ^ + ; + int j; + ~~~ + { dg-end-multiline-output "" } */ + + +void test (void) +{ + int i /* { dg-error "8: expected ';' before 'int'" } */ + int j; + + /* { dg-begin-multiline-output "" } + int i + ^ + ; + int j; + ~~~ + { dg-end-multiline-output "" } */ +} + +int old_style_params (first, second) + int first; + int second; +{ + return first + second; +} diff --git a/gcc/testsuite/gcc.dg/pr7356.c b/gcc/testsuite/gcc.dg/pr7356.c new file mode 100644 index 0000000..84baf07 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr7356.c @@ -0,0 +1,17 @@ +/* { dg-options "-fdiagnostics-show-caret" } */ + +a /* { dg-line stray_token } */ +#include +#include +#include +int main(int argc, char** argv) +{ + return 0; +} + +/* { dg-error "expected ';' before '.*'" "" { target *-*-* } stray_token } */ +/* { dg-begin-multiline-output "" } + a + ^ + ; + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/spellcheck-typenames.c b/gcc/testsuite/gcc.dg/spellcheck-typenames.c index f3b8102..3717ad8 100644 --- a/gcc/testsuite/gcc.dg/spellcheck-typenames.c +++ b/gcc/testsuite/gcc.dg/spellcheck-typenames.c @@ -100,8 +100,9 @@ baz value; /* { dg-error "1: unknown type name .baz.; use .enum. keyword to refe { dg-end-multiline-output "" } */ /* TODO: it would be better to detect the "singed" vs "signed" typo here. */ -singed char ch; /* { dg-error "8: before .char." } */ +singed char ch; /* { dg-error "7: before .char." } */ /* { dg-begin-multiline-output "" } singed char ch; - ^~~~ + ^~~~~ + ; { dg-end-multiline-output "" } */ -- cgit v1.1 From 62fb4acb486f2c17ccba52f60f3288dcfc7c30ce Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Thu, 26 Oct 2017 00:16:12 +0000 Subject: Daily bump. From-SVN: r254096 --- gcc/DATESTAMP | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 7e02f03..882f5ad 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20171025 +20171026 -- cgit v1.1 From c3ef5fda143585ca19dff5a977b6aa87b7a17566 Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Thu, 26 Oct 2017 06:39:56 +0000 Subject: vect-dot-qi.h: New. 2017-10-26 Tamar Christina * gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h: New. * gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c: New. * gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c: New. * gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c: New. From-SVN: r254097 --- gcc/testsuite/ChangeLog | 7 +++ .../aarch64/advsimd-intrinsics/vdot-compile.c | 73 ++++++++++++++++++++++ .../aarch64/advsimd-intrinsics/vect-dot-qi.h | 15 +++++ .../aarch64/advsimd-intrinsics/vect-dot-s8.c | 9 +++ .../aarch64/advsimd-intrinsics/vect-dot-u8.c | 9 +++ 5 files changed, 113 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c (limited to 'gcc') diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e2f7e4d..cf7095e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2017-10-26 Tamar Christina + + * gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h: New. + * gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c: New. + * gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c: New. + * gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c: New. + 2017-10-25 David Malcolm PR c/7356 diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c new file mode 100644 index 0000000..b7378ad --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c @@ -0,0 +1,73 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */ + +#include + +/* Unsigned Dot Product instructions. */ + +uint32x2_t ufoo (uint32x2_t r, uint8x8_t x, uint8x8_t y) +{ + return vdot_u32 (r, x, y); +} + +uint32x4_t ufooq (uint32x4_t r, uint8x16_t x, uint8x16_t y) +{ + return vdotq_u32 (r, x, y); +} + +uint32x2_t ufoo_lane (uint32x2_t r, uint8x8_t x, uint8x8_t y) +{ + return vdot_lane_u32 (r, x, y, 0); +} + +uint32x2_t ufoo_laneq (uint32x2_t r, uint8x8_t x, uint8x16_t y) +{ + return vdot_laneq_u32 (r, x, y, 0); +} + +uint32x4_t ufooq_lane (uint32x4_t r, uint8x16_t x, uint8x8_t y) +{ + return vdotq_lane_u32 (r, x, y, 0); +} + +uint32x4_t ufooq_laneq (uint32x4_t r, uint8x16_t x, uint8x16_t y) +{ + return vdotq_laneq_u32 (r, x, y, 0); +} + +/* Signed Dot Product instructions. */ + +int32x2_t sfoo (int32x2_t r, int8x8_t x, int8x8_t y) +{ + return vdot_s32 (r, x, y); +} + +int32x4_t sfooq (int32x4_t r, int8x16_t x, int8x16_t y) +{ + return vdotq_s32 (r, x, y); +} + +int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, int8x8_t y) +{ + return vdot_lane_s32 (r, x, y, 0); +} + +int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, int8x16_t y) +{ + return vdot_laneq_s32 (r, x, y, 0); +} + +int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, int8x8_t y) +{ + return vdotq_lane_s32 (r, x, y, 0); +} + +int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, int8x16_t y) +{ + return vdotq_laneq_s32 (r, x, y, 0); +} + +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.8b} 2 } } */ +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.4b\[[0-9]+\]} 4 } } */ +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.16b} 2 } } */ +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.4b\[[0-9]+\]} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h new file mode 100644 index 0000000..90b00af --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h @@ -0,0 +1,15 @@ +TYPE char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +TYPE char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); + +__attribute__ ((noinline)) int +foo1(int len) { + int i; + TYPE int result = 0; + TYPE short prod; + + for (i=0; i Date: Thu, 26 Oct 2017 06:42:41 +0000 Subject: re PR target/81800 (On aarch64 ilp32 lrint should not be inlined as two instructions) 2017-10-26 Tamar Christina PR target/81800 * config/aarch64/aarch64.md (lrint2): Add flag_trapping_math and flag_fp_int_builtin_inexact. gcc/testsuite/ 2017-10-26 Tamar Christina * gcc.target/aarch64/inline-lrint_2.c (dg-options): Add -fno-trapping-math. From-SVN: r254098 --- gcc/ChangeLog | 6 ++++++ gcc/config/aarch64/aarch64.md | 4 +++- gcc/testsuite/ChangeLog | 4 ++++ gcc/testsuite/gcc.target/aarch64/inline-lrint_2.c | 2 +- 4 files changed, 14 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5e297fb..ad54a57 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2017-10-26 Tamar Christina + + PR target/81800 + * config/aarch64/aarch64.md (lrint2): Add flag_trapping_math + and flag_fp_int_builtin_inexact. + 2017-10-25 Palmer Dabbelt * config/riscv/riscv.md (ZERO_EXTEND_LOAD): Define. diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 389f2f9..eee836b 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -5155,7 +5155,9 @@ (define_expand "lrint2" [(match_operand:GPI 0 "register_operand") (match_operand:GPF 1 "register_operand")] - "TARGET_FLOAT" + "TARGET_FLOAT + && ((GET_MODE_SIZE (mode) <= GET_MODE_SIZE (mode)) + || !flag_trapping_math || flag_fp_int_builtin_inexact)" { rtx cvt = gen_reg_rtx (mode); emit_insn (gen_rint2 (cvt, operands[1])); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index cf7095e..8dcf461 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,9 @@ 2017-10-26 Tamar Christina + * gcc.target/aarch64/inline-lrint_2.c (dg-options): Add -fno-trapping-math. + +2017-10-26 Tamar Christina + * gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h: New. * gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c: New. * gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c: New. diff --git a/gcc/testsuite/gcc.target/aarch64/inline-lrint_2.c b/gcc/testsuite/gcc.target/aarch64/inline-lrint_2.c index 6080e18..bd0c73c 100644 --- a/gcc/testsuite/gcc.target/aarch64/inline-lrint_2.c +++ b/gcc/testsuite/gcc.target/aarch64/inline-lrint_2.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-require-effective-target ilp32 } */ -/* { dg-options "-O3 -fno-math-errno" } */ +/* { dg-options "-O3 -fno-math-errno -fno-trapping-math" } */ #include "lrint-matherr.h" -- cgit v1.1 From c5a92111082b32711635a61a602f1495effe65ff Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Thu, 26 Oct 2017 06:56:31 +0000 Subject: vect-multitypes-1.c: Correct target selector. 2017-10-26 Tamar Christina * gcc.dg/vect/vect-multitypes-1.c: Correct target selector. From-SVN: r254099 --- gcc/testsuite/ChangeLog | 4 ++++ gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 8dcf461..786c085 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,9 @@ 2017-10-26 Tamar Christina + * gcc.dg/vect/vect-multitypes-1.c: Correct target selector. + +2017-10-26 Tamar Christina + * gcc.target/aarch64/inline-lrint_2.c (dg-options): Add -fno-trapping-math. 2017-10-26 Tamar Christina diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c index 836fa76..1afdb46 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c +++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c @@ -83,5 +83,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail {{ vect_no_align && { ! vect_hw_misalign } } || {vect_sizes_32B_16B }}} } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { target {{ vect_no_align && { ! vect_hw_misalign } } || {vect_sizes_32B_16B }}} } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { target { vect_no_align && { { ! vect_hw_misalign } && vect_sizes_32B_16B } } }} } */ -- cgit v1.1 From 2b5de01437baf9417c4e7d0831037ed09a0557d5 Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Thu, 26 Oct 2017 09:57:09 +0000 Subject: 2017-10-26 Tamar Christina * lib/target-supports.exp (check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache): New. (check_effective_target_arm_v8_2a_dotprod_neon_ok): New. (add_options_for_arm_v8_2a_dotprod_neon): New. (check_effective_target_arm_v8_2a_dotprod_neon_hw): New. (check_effective_target_vect_sdot_qi): Add ARM && AArch64. (check_effective_target_vect_udot_qi): Likewise. * gcc.target/arm/simd/vdot-exec.c: New. * gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c: New. * gcc/doc/sourcebuild.texi: Document arm_v8_2a_dotprod_neon. From-SVN: r254100 --- gcc/doc/sourcebuild.texi | 16 +++++ gcc/testsuite/ChangeLog | 13 ++++ .../aarch64/advsimd-intrinsics/vdot-exec.c | 81 +++++++++++++++++++++ gcc/testsuite/lib/target-supports.exp | 84 +++++++++++++++++++++- 4 files changed, 193 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c (limited to 'gcc') diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index 7d6d4a3..c00aece 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1702,6 +1702,17 @@ ARM target supports executing instructions from ARMv8.2 with the FP16 extension. Some multilibs may be incompatible with these options. Implies arm_v8_2a_fp16_neon_ok and arm_v8_2a_fp16_scalar_hw. +@item arm_v8_2a_dotprod_neon_ok +@anchor{arm_v8_2a_dotprod_neon_ok} +ARM target supports options to generate instructions from ARMv8.2 with +the Dot Product extension. Some multilibs may be incompatible with these +options. + +@item arm_v8_2a_dotprod_neon_hw +ARM target supports executing instructions from ARMv8.2 with the Dot +Product extension. Some multilibs may be incompatible with these options. +Implies arm_v8_2a_dotprod_neon_ok. + @item arm_prefer_ldrd_strd ARM target prefers @code{LDRD} and @code{STRD} instructions over @code{LDM} and @code{STM} instructions. @@ -2308,6 +2319,11 @@ supported by the target; see the @ref{arm_v8_2a_fp16_neon_ok,,arm_v8_2a_fp16_neon_ok} effective target keyword. +@item arm_v8_2a_dotprod_neon +Add options for ARMv8.2 with Adv.SIMD Dot Product support, if this is +supported by the target; see the +@ref{arm_v8_2a_dotprod_neon_ok} effective target keyword. + @item bind_pic_locally Add the target-specific flags needed to enable functions to bind locally when using pic/PIC passes in the testsuite. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 786c085..6d42152 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,18 @@ 2017-10-26 Tamar Christina + * lib/target-supports.exp + (check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache): New. + (check_effective_target_arm_v8_2a_dotprod_neon_ok): New. + (add_options_for_arm_v8_2a_dotprod_neon): New. + (check_effective_target_arm_v8_2a_dotprod_neon_hw): New. + (check_effective_target_vect_sdot_qi): Add ARM && AArch64. + (check_effective_target_vect_udot_qi): Likewise. + * gcc.target/arm/simd/vdot-exec.c: New. + * gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c: New. + * gcc/doc/sourcebuild.texi: Document arm_v8_2a_dotprod_neon. + +2017-10-26 Tamar Christina + * gcc.dg/vect/vect-multitypes-1.c: Correct target selector. 2017-10-26 Tamar Christina diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c new file mode 100644 index 0000000..3e7cd6c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c @@ -0,0 +1,81 @@ +/* { dg-skip-if "can't compile on arm." { arm*-*-* } } */ +/* { dg-do run { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */ +/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw } */ + +#include + +extern void abort(); + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define ORDER(x, y) y +#else +# define ORDER(x, y) x - y +#endif + +#define P(n1,n2) n1,n1,n1,n1,n2,n2,n2,n2 +#define ARR(nm, p, ty, ...) ty nm##_##p = { __VA_ARGS__ } +#define TEST(t1, t2, t3, f, r1, r2, n1, n2) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##r = {0}; \ + f##_##r = f (f##_##r, f##_##x, f##_##y); \ + if (f##_##r[0] != n1 || f##_##r[1] != n2) \ + abort (); + +#define TEST_LANE(t1, t2, t3, f, r1, r2, n1, n2, n3, n4) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##rx = {0}; \ + f##_##rx = f (f##_##rx, f##_##x, f##_##y, ORDER (1, 0)); \ + if (f##_##rx[0] != n1 || f##_##rx[1] != n2) \ + abort (); \ + t3 f##_##rx1 = {0}; \ + f##_##rx1 = f (f##_##rx1, f##_##x, f##_##y, ORDER (1, 1)); \ + if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4) \ + abort (); + +#define Px(n1,n2,n3,n4) P(n1,n2),P(n3,n4) +#define TEST_LANEQ(t1, t2, t3, f, r1, r2, n1, n2, n3, n4, n5, n6, n7, n8) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##rx = {0}; \ + f##_##rx = f (f##_##rx, f##_##x, f##_##y, ORDER (3, 0)); \ + if (f##_##rx[0] != n1 || f##_##rx[1] != n2) \ + abort (); \ + t3 f##_##rx1 = {0}; \ + f##_##rx1 = f (f##_##rx1, f##_##x, f##_##y, ORDER (3, 1)); \ + if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4) \ + abort (); \ + t3 f##_##rx2 = {0}; \ + f##_##rx2 = f (f##_##rx2, f##_##x, f##_##y, ORDER (3, 2)); \ + if (f##_##rx2[0] != n5 || f##_##rx2[1] != n6) \ + abort (); \ + t3 f##_##rx3 = {0}; \ + f##_##rx3 = f (f##_##rx3, f##_##x, f##_##y, ORDER (3, 3)); \ + if (f##_##rx3[0] != n7 || f##_##rx3[1] != n8) \ + abort (); + +int +main() +{ + TEST (uint8x8_t, uint8x8_t, uint32x2_t, vdot_u32, P(1,2), P(2,3), 8, 24); + TEST (int8x8_t, int8x8_t, int32x2_t, vdot_s32, P(1,2), P(-2,-3), -8, -24); + + TEST (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_u32, P(1,2), P(2,3), 8, 24); + TEST (int8x16_t, int8x16_t, int32x4_t, vdotq_s32, P(1,2), P(-2,-3), -8, -24); + + TEST_LANE (uint8x8_t, uint8x8_t, uint32x2_t, vdot_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24); + TEST_LANE (int8x8_t, int8x8_t, int32x2_t, vdot_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24); + + TEST_LANE (uint8x16_t, uint8x8_t, uint32x4_t, vdotq_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24); + TEST_LANE (int8x16_t, int8x8_t, int32x4_t, vdotq_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24); + + TEST_LANEQ (uint8x8_t, uint8x16_t, uint32x2_t, vdot_laneq_u32, P(1,2), Px(2,3,1,4), 8, 16, 12, 24, 4, 8, 16, 32); + TEST_LANEQ (int8x8_t, int8x16_t, int32x2_t, vdot_laneq_s32, P(1,2), Px(-2,-3,-1,-4), -8, -16, -12, -24, -4, -8, -16, -32); + + TEST_LANEQ (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_laneq_u32, Px(1,2,2,1), Px(2,3,1,4), 8, 16, 12, 24, 4, 8, 16, 32); + TEST_LANEQ (int8x16_t, int8x16_t, int32x4_t, vdotq_laneq_s32, Px(1,2,2,1), Px(-2,-3,-1,-4), -8, -16, -12, -24, -4, -8, -16, -32); + + return 0; +} diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 56ac221..d7ef04f 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -4396,6 +4396,48 @@ proc check_effective_target_arm_v8_2a_fp16_neon_ok { } { check_effective_target_arm_v8_2a_fp16_neon_ok_nocache] } +# Return 1 if the target supports ARMv8.2 Adv.SIMD Dot Product +# instructions, 0 otherwise. The test is valid for ARM and for AArch64. +# Record the command line options needed. + +proc check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache { } { + global et_arm_v8_2a_dotprod_neon_flags + set et_arm_v8_2a_dotprod_neon_flags "" + + if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } { + return 0; + } + + # Iterate through sets of options to find the compiler flags that + # need to be added to the -march option. + foreach flags {"" "-mfloat-abi=softfp -mfpu=neon-fp-armv8" "-mfloat-abi=hard -mfpu=neon-fp-armv8"} { + if { [check_no_compiler_messages_nocache \ + arm_v8_2a_dotprod_neon_ok object { + #if !defined (__ARM_FEATURE_DOTPROD) + #error "__ARM_FEATURE_DOTPROD not defined" + #endif + } "$flags -march=armv8.2-a+dotprod"] } { + set et_arm_v8_2a_dotprod_neon_flags "$flags -march=armv8.2-a+dotprod" + return 1 + } + } + + return 0; +} + +proc check_effective_target_arm_v8_2a_dotprod_neon_ok { } { + return [check_cached_effective_target arm_v8_2a_dotprod_neon_ok \ + check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache] +} + +proc add_options_for_arm_v8_2a_dotprod_neon { flags } { + if { ! [check_effective_target_arm_v8_2a_dotprod_neon_ok] } { + return "$flags" + } + global et_arm_v8_2a_dotprod_neon_flags + return "$flags $et_arm_v8_2a_dotprod_neon_flags" +} + # Return 1 if the target supports executing ARMv8 NEON instructions, 0 # otherwise. @@ -4533,6 +4575,42 @@ proc check_effective_target_arm_v8_2a_fp16_neon_hw { } { } [add_options_for_arm_v8_2a_fp16_neon ""]] } +# Return 1 if the target supports executing AdvSIMD instructions from ARMv8.2 +# with the Dot Product extension, 0 otherwise. The test is valid for ARM and for +# AArch64. + +proc check_effective_target_arm_v8_2a_dotprod_neon_hw { } { + if { ![check_effective_target_arm_v8_2a_dotprod_neon_ok] } { + return 0; + } + return [check_runtime arm_v8_2a_dotprod_neon_hw_available { + #include "arm_neon.h" + int + main (void) + { + + uint32x2_t results = {0,0}; + uint8x8_t a = {1,1,1,1,2,2,2,2}; + uint8x8_t b = {2,2,2,2,3,3,3,3}; + + #ifdef __ARM_ARCH_ISA_A64 + asm ("udot %0.2s, %1.8b, %2.8b" + : "=w"(results) + : "w"(a), "w"(b) + : /* No clobbers. */); + + #else + asm ("vudot.u8 %P0, %P1, %P2" + : "=w"(results) + : "w"(a), "w"(b) + : /* No clobbers. */); + #endif + + return (results[0] == 8 && results[1] == 24) ? 1 : 0; + } + } [add_options_for_arm_v8_2a_dotprod_neon ""]] +} + # Return 1 if this is a ARM target with NEON enabled. proc check_effective_target_arm_neon { } { @@ -5850,6 +5928,8 @@ proc check_effective_target_vect_sdot_qi { } { } else { set et_vect_sdot_qi_saved($et_index) 0 if { [istarget ia64-*-*] + || [istarget aarch64*-*-*] + || [istarget arm*-*-*] || ([istarget mips*-*-*] && [et-is-effective-target mips_msa]) } { set et_vect_udot_qi_saved 1 @@ -5874,6 +5954,8 @@ proc check_effective_target_vect_udot_qi { } { } else { set et_vect_udot_qi_saved($et_index) 0 if { [istarget powerpc*-*-*] + || [istarget aarch64*-*-*] + || [istarget arm*-*-*] || [istarget ia64-*-*] || ([istarget mips*-*-*] && [et-is-effective-target mips_msa]) } { @@ -8290,7 +8372,7 @@ proc check_effective_target_aarch64_tiny { } { # Create functions to check that the AArch64 assembler supports the # various architecture extensions via the .arch_extension pseudo-op. -foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse"} { +foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod"} { eval [string map [list FUNC $aarch64_ext] { proc check_effective_target_aarch64_asm_FUNC_ok { } { if { [istarget aarch64*-*-*] } { -- cgit v1.1 From 4bc19a3b1a9fe74e515b7c54082bbad27decc227 Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Thu, 26 Oct 2017 09:59:14 +0000 Subject: 2017-10-26 Tamar Christina * gcc.dg/vect/vect-reduc-dot-s8a.c (dg-additional-options, dg-require-effective-target): Add +dotprod. * gcc.dg/vect/vect-reduc-dot-u8a.c (dg-additional-options, dg-require-effective-target): Add +dotprod. From-SVN: r254101 --- gcc/testsuite/ChangeLog | 7 ++++ gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c | 3 ++ gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c | 3 ++ gcc/testsuite/gcc.target/arm/simd/vdot-exec.c | 55 ++++++++++++++++++++++++++ 4 files changed, 68 insertions(+) create mode 100644 gcc/testsuite/gcc.target/arm/simd/vdot-exec.c (limited to 'gcc') diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6d42152..d71ab16 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,12 @@ 2017-10-26 Tamar Christina + * gcc.dg/vect/vect-reduc-dot-s8a.c + (dg-additional-options, dg-require-effective-target): Add +dotprod. + * gcc.dg/vect/vect-reduc-dot-u8a.c + (dg-additional-options, dg-require-effective-target): Add +dotprod. + +2017-10-26 Tamar Christina + * lib/target-supports.exp (check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache): New. (check_effective_target_arm_v8_2a_dotprod_neon_ok): New. diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c index dc4f520..ac67474 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c @@ -1,4 +1,7 @@ /* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-additional-options "-march=armv8.2-a+dotprod" { target { aarch64*-*-* } } } */ +/* { dg-add-options arm_v8_2a_dotprod_neon } */ #include #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c index f3cc6c7..d020f64 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c @@ -1,4 +1,7 @@ /* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-additional-options "-march=armv8.2-a+dotprod" { target { aarch64*-*-* } } } */ +/* { dg-add-options arm_v8_2a_dotprod_neon } */ #include #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.target/arm/simd/vdot-exec.c b/gcc/testsuite/gcc.target/arm/simd/vdot-exec.c new file mode 100644 index 0000000..054f470 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vdot-exec.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O3" } */ +/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw } */ +/* { dg-add-options arm_v8_2a_dotprod_neon } */ + +#include + +extern void abort(); + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define ORDER(x, y) y +#else +# define ORDER(x, y) x - y +#endif + +#define P(n1,n2) n1,n1,n1,n1,n2,n2,n2,n2 +#define ARR(nm, p, ty, ...) ty nm##_##p = { __VA_ARGS__ } +#define TEST(t1, t2, t3, f, r1, r2, n1, n2) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##r = {0}; \ + f##_##r = f (f##_##r, f##_##x, f##_##y); \ + if (f##_##r[0] != n1 || f##_##r[1] != n2) \ + abort (); + +#define TEST_LANE(t1, t2, t3, f, r1, r2, n1, n2, n3, n4) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##rx = {0}; \ + f##_##rx = f (f##_##rx, f##_##x, f##_##y, ORDER (1, 0)); \ + if (f##_##rx[0] != n1 || f##_##rx[1] != n2) \ + abort (); \ + t3 f##_##rx1 = {0}; \ + f##_##rx1 = f (f##_##rx1, f##_##x, f##_##y, ORDER (1, 1)); \ + if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4) \ + abort (); \ + +int +main() +{ + TEST (uint8x8_t, uint8x8_t, uint32x2_t, vdot_u32, P(1,2), P(2,3), 8, 24); + TEST (int8x8_t, int8x8_t, int32x2_t, vdot_s32, P(1,2), P(-2,-3), -8, -24); + + TEST (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_u32, P(1,2), P(2,3), 8, 24); + TEST (int8x16_t, int8x16_t, int32x4_t, vdotq_s32, P(1,2), P(-2,-3), -8, -24); + + TEST_LANE (uint8x8_t, uint8x8_t, uint32x2_t, vdot_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24); + + TEST_LANE (int8x8_t, int8x8_t, int32x2_t, vdot_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24); + + TEST_LANE (uint8x16_t, uint8x8_t, uint32x4_t, vdotq_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24); + TEST_LANE (int8x16_t, int8x8_t, int32x4_t, vdotq_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24); + + return 0; +} -- cgit v1.1 From f073de07ad00d4be604bdbaeab14786850932601 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 26 Oct 2017 11:28:25 +0000 Subject: This patch adds a new hook that gives the preferred alignment for a static rtx... TARGET_STATIC_RTX_ALIGNMENT This patch adds a new hook that gives the preferred alignment for a static rtx, so that we don't need to query the front end in force_const_mem. 2017-10-26 Richard Sandiford gcc/ * target.def (static_rtx_alignment): New hook. * targhooks.h (default_static_rtx_alignment): Declare. * targhooks.c (default_static_rtx_alignment): New function. * doc/tm.texi.in (TARGET_STATIC_RTX_ALIGNMENT): New hook. * doc/tm.texi: Regenerate. * varasm.c (force_const_mem): Use targetm.static_rtx_alignment instead of targetm.constant_alignment. Remove call to set_mem_attributes. * config/cris/cris.c (TARGET_STATIC_RTX_ALIGNMENT): Redefine. (cris_preferred_mininum_alignment): New function, split out from... (cris_constant_alignment): ...here. (cris_static_rtx_alignment): New function. * config/i386/i386.c (ix86_static_rtx_alignment): New function, split out from... (ix86_constant_alignment): ...here. (TARGET_STATIC_RTX_ALIGNMENT): Redefine. * config/mmix/mmix.c (TARGET_STATIC_RTX_ALIGNMENT): Redefine. (mmix_static_rtx_alignment): New function. * config/spu/spu.c (spu_static_rtx_alignment): New function. (TARGET_STATIC_RTX_ALIGNMENT): Redefine. From-SVN: r254102 --- gcc/ChangeLog | 23 +++++++++++++++++++++++ gcc/config/cris/cris.c | 29 ++++++++++++++++++++++++----- gcc/config/i386/i386.c | 21 +++++++++++++++++---- gcc/config/mmix/mmix.c | 11 +++++++++++ gcc/config/spu/spu.c | 14 ++++++++++++++ gcc/doc/tm.texi | 7 +++++++ gcc/doc/tm.texi.in | 2 ++ gcc/target.def | 9 +++++++++ gcc/targhooks.c | 8 ++++++++ gcc/targhooks.h | 1 + gcc/varasm.c | 8 ++------ 11 files changed, 118 insertions(+), 15 deletions(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ad54a57..3033a7f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,26 @@ +2017-10-26 Richard Sandiford + + * target.def (static_rtx_alignment): New hook. + * targhooks.h (default_static_rtx_alignment): Declare. + * targhooks.c (default_static_rtx_alignment): New function. + * doc/tm.texi.in (TARGET_STATIC_RTX_ALIGNMENT): New hook. + * doc/tm.texi: Regenerate. + * varasm.c (force_const_mem): Use targetm.static_rtx_alignment + instead of targetm.constant_alignment. Remove call to + set_mem_attributes. + * config/cris/cris.c (TARGET_STATIC_RTX_ALIGNMENT): Redefine. + (cris_preferred_mininum_alignment): New function, split out from... + (cris_constant_alignment): ...here. + (cris_static_rtx_alignment): New function. + * config/i386/i386.c (ix86_static_rtx_alignment): New function, + split out from... + (ix86_constant_alignment): ...here. + (TARGET_STATIC_RTX_ALIGNMENT): Redefine. + * config/mmix/mmix.c (TARGET_STATIC_RTX_ALIGNMENT): Redefine. + (mmix_static_rtx_alignment): New function. + * config/spu/spu.c (spu_static_rtx_alignment): New function. + (TARGET_STATIC_RTX_ALIGNMENT): Redefine. + 2017-10-26 Tamar Christina PR target/81800 diff --git a/gcc/config/cris/cris.c b/gcc/config/cris/cris.c index fe80a27..8fa234f 100644 --- a/gcc/config/cris/cris.c +++ b/gcc/config/cris/cris.c @@ -165,6 +165,7 @@ static bool cris_function_value_regno_p (const unsigned int); static void cris_file_end (void); static unsigned int cris_hard_regno_nregs (unsigned int, machine_mode); static bool cris_hard_regno_mode_ok (unsigned int, machine_mode); +static HOST_WIDE_INT cris_static_rtx_alignment (machine_mode); static HOST_WIDE_INT cris_constant_alignment (const_tree, HOST_WIDE_INT); /* This is the parsed result of the "-max-stack-stackframe=" option. If @@ -288,6 +289,8 @@ int cris_cpu_version = CRIS_DEFAULT_CPU_VERSION; #undef TARGET_HARD_REGNO_MODE_OK #define TARGET_HARD_REGNO_MODE_OK cris_hard_regno_mode_ok +#undef TARGET_STATIC_RTX_ALIGNMENT +#define TARGET_STATIC_RTX_ALIGNMENT cris_static_rtx_alignment #undef TARGET_CONSTANT_ALIGNMENT #define TARGET_CONSTANT_ALIGNMENT cris_constant_alignment @@ -4329,6 +4332,26 @@ cris_hard_regno_mode_ok (unsigned int regno, machine_mode mode) || (regno != CRIS_MOF_REGNUM && regno != CRIS_ACR_REGNUM))); } +/* Return the preferred minimum alignment for a static object. */ + +static HOST_WIDE_INT +cris_preferred_mininum_alignment (void) +{ + if (!TARGET_CONST_ALIGN) + return 8; + if (TARGET_ALIGN_BY_32) + return 32; + return 16; +} + +/* Implement TARGET_STATIC_RTX_ALIGNMENT. */ + +static HOST_WIDE_INT +cris_static_rtx_alignment (machine_mode mode) +{ + return MAX (cris_preferred_mininum_alignment (), GET_MODE_ALIGNMENT (mode)); +} + /* Implement TARGET_CONSTANT_ALIGNMENT. Note that this hook has the effect of making gcc believe that ALL references to constant stuff (in code segment, like strings) have this alignment. That is a rather @@ -4339,11 +4362,7 @@ cris_hard_regno_mode_ok (unsigned int regno, machine_mode mode) static HOST_WIDE_INT cris_constant_alignment (const_tree, HOST_WIDE_INT basic_align) { - if (!TARGET_CONST_ALIGN) - return basic_align; - if (TARGET_ALIGN_BY_32) - return MAX (basic_align, 32); - return MAX (basic_align, 16); + return MAX (cris_preferred_mininum_alignment (), basic_align); } #if 0 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 56486e0..1facf12 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -28741,6 +28741,18 @@ ix86_sched_init_global (FILE *, int, int) } +/* Implement TARGET_STATIC_RTX_ALIGNMENT. */ + +static HOST_WIDE_INT +ix86_static_rtx_alignment (machine_mode mode) +{ + if (mode == DFmode) + return 64; + if (ALIGN_MODE_128 (mode)) + return MAX (128, GET_MODE_ALIGNMENT (mode)); + return GET_MODE_ALIGNMENT (mode); +} + /* Implement TARGET_CONSTANT_ALIGNMENT. */ static HOST_WIDE_INT @@ -28749,10 +28761,9 @@ ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align) if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST || TREE_CODE (exp) == INTEGER_CST) { - if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) - return 64; - else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) - return 128; + machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); + HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode); + return MAX (mode_align, align); } else if (!optimize_size && TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) @@ -50295,6 +50306,8 @@ ix86_run_selftests (void) #undef TARGET_CAN_CHANGE_MODE_CLASS #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class +#undef TARGET_STATIC_RTX_ALIGNMENT +#define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment #undef TARGET_CONSTANT_ALIGNMENT #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment diff --git a/gcc/config/mmix/mmix.c b/gcc/config/mmix/mmix.c index e911594..4a73162 100644 --- a/gcc/config/mmix/mmix.c +++ b/gcc/config/mmix/mmix.c @@ -168,6 +168,7 @@ static void mmix_print_operand (FILE *, rtx, int); static void mmix_print_operand_address (FILE *, machine_mode, rtx); static bool mmix_print_operand_punct_valid_p (unsigned char); static void mmix_conditional_register_usage (void); +static HOST_WIDE_INT mmix_static_rtx_alignment (machine_mode); static HOST_WIDE_INT mmix_constant_alignment (const_tree, HOST_WIDE_INT); static HOST_WIDE_INT mmix_starting_frame_offset (void); @@ -284,6 +285,8 @@ static HOST_WIDE_INT mmix_starting_frame_offset (void); #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE mmix_option_override +#undef TARGET_STATIC_RTX_ALIGNMENT +#define TARGET_STATIC_RTX_ALIGNMENT mmix_static_rtx_alignment #undef TARGET_CONSTANT_ALIGNMENT #define TARGET_CONSTANT_ALIGNMENT mmix_constant_alignment @@ -342,6 +345,14 @@ mmix_data_alignment (tree type ATTRIBUTE_UNUSED, int basic_align) return basic_align; } +/* Implement TARGET_STATIC_RTX_ALIGNMENT. */ + +static HOST_WIDE_INT +mmix_static_rtx_alignment (machine_mode mode) +{ + return MAX (GET_MODE_ALIGNMENT (mode), 32); +} + /* Implement tARGET_CONSTANT_ALIGNMENT. */ static HOST_WIDE_INT diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c index eda7fca..bf21cca 100644 --- a/gcc/config/spu/spu.c +++ b/gcc/config/spu/spu.c @@ -7196,6 +7196,18 @@ spu_truly_noop_truncation (unsigned int outprec, unsigned int inprec) return inprec <= 32 && outprec <= inprec; } +/* Implement TARGET_STATIC_RTX_ALIGNMENT. + + Make all static objects 16-byte aligned. This allows us to assume + they are also padded to 16 bytes, which means we can use a single + load or store instruction to access them. */ + +static HOST_WIDE_INT +spu_static_rtx_alignment (machine_mode mode) +{ + return MAX (GET_MODE_ALIGNMENT (mode), 128); +} + /* Implement TARGET_CONSTANT_ALIGNMENT. Make all static objects 16-byte aligned. This allows us to assume @@ -7447,6 +7459,8 @@ static const struct attribute_spec spu_attribute_table[] = #undef TARGET_TRULY_NOOP_TRUNCATION #define TARGET_TRULY_NOOP_TRUNCATION spu_truly_noop_truncation +#undef TARGET_STATIC_RTX_ALIGNMENT +#define TARGET_STATIC_RTX_ALIGNMENT spu_static_rtx_alignment #undef TARGET_CONSTANT_ALIGNMENT #define TARGET_CONSTANT_ALIGNMENT spu_constant_alignment diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 8484c1d..c02f4d3 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -1078,6 +1078,13 @@ On 32-bit ELF the largest supported section alignment in bits is @samp{(0x80000000 * 8)}, but this is not representable on 32-bit hosts. @end defmac +@deftypefn {Target Hook} HOST_WIDE_INT TARGET_STATIC_RTX_ALIGNMENT (machine_mode @var{mode}) +This hook returns the preferred alignment in bits for a +statically-allocated rtx, such as a constant pool entry. @var{mode} +is the mode of the rtx. The default implementation returns +@samp{GET_MODE_ALIGNMENT (@var{mode})}. +@end deftypefn + @defmac DATA_ALIGNMENT (@var{type}, @var{basic-align}) If defined, a C expression to compute the alignment for a variable in the static store. @var{type} is the data type, and @var{basic-align} is diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 015f59e..37308e1 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -1026,6 +1026,8 @@ On 32-bit ELF the largest supported section alignment in bits is @samp{(0x80000000 * 8)}, but this is not representable on 32-bit hosts. @end defmac +@hook TARGET_STATIC_RTX_ALIGNMENT + @defmac DATA_ALIGNMENT (@var{type}, @var{basic-align}) If defined, a C expression to compute the alignment for a variable in the static store. @var{type} is the data type, and @var{basic-align} is diff --git a/gcc/target.def b/gcc/target.def index 435849c..6a1cd31 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -3336,6 +3336,15 @@ HOOK_VECTOR_END (addr_space) #define HOOK_PREFIX "TARGET_" DEFHOOK +(static_rtx_alignment, + "This hook returns the preferred alignment in bits for a\n\ +statically-allocated rtx, such as a constant pool entry. @var{mode}\n\ +is the mode of the rtx. The default implementation returns\n\ +@samp{GET_MODE_ALIGNMENT (@var{mode})}.", + HOST_WIDE_INT, (machine_mode mode), + default_static_rtx_alignment) + +DEFHOOK (constant_alignment, "This hook returns the alignment in bits of a constant that is being\n\ placed in memory. @var{constant} is the constant and @var{basic_align}\n\ diff --git a/gcc/targhooks.c b/gcc/targhooks.c index 41cab38..92ecc90 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -1173,6 +1173,14 @@ tree default_mangle_decl_assembler_name (tree decl ATTRIBUTE_UNUSED, return id; } +/* The default implementation of TARGET_STATIC_RTX_ALIGNMENT. */ + +HOST_WIDE_INT +default_static_rtx_alignment (machine_mode mode) +{ + return GET_MODE_ALIGNMENT (mode); +} + /* The default implementation of TARGET_CONSTANT_ALIGNMENT. */ HOST_WIDE_INT diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 1510bb9..f60bca2 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -94,6 +94,7 @@ extern int default_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, in extern tree default_builtin_reciprocal (tree); +extern HOST_WIDE_INT default_static_rtx_alignment (machine_mode); extern HOST_WIDE_INT default_constant_alignment (const_tree, HOST_WIDE_INT); extern HOST_WIDE_INT constant_alignment_word_strings (const_tree, HOST_WIDE_INT); diff --git a/gcc/varasm.c b/gcc/varasm.c index d324ca03..a139151 100644 --- a/gcc/varasm.c +++ b/gcc/varasm.c @@ -3783,11 +3783,8 @@ force_const_mem (machine_mode mode, rtx x) *slot = desc; /* Align the location counter as required by EXP's data type. */ - align = GET_MODE_ALIGNMENT (mode == VOIDmode ? word_mode : mode); - - tree type = lang_hooks.types.type_for_mode (mode, 0); - if (type != NULL_TREE) - align = targetm.constant_alignment (make_tree (type, x), align); + machine_mode align_mode = (mode == VOIDmode ? word_mode : mode); + align = targetm.static_rtx_alignment (align_mode); pool->offset += (align / BITS_PER_UNIT) - 1; pool->offset &= ~ ((align / BITS_PER_UNIT) - 1); @@ -3829,7 +3826,6 @@ force_const_mem (machine_mode mode, rtx x) /* Construct the MEM. */ desc->mem = def = gen_const_mem (mode, symbol); - set_mem_attributes (def, lang_hooks.types.type_for_mode (mode, 0), 1); set_mem_align (def, align); /* If we're dropping a label to the constant pool, make sure we -- cgit v1.1 From 8c2e5ecf991b9427afab0679b26a7a99dae2e078 Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Thu, 26 Oct 2017 12:22:21 +0000 Subject: Use -xbrace_comment=no with recent Solaris/x86 as * configure.ac (gcc_cv_as_ix86_xbrace_comment): Check if assembler supports -xbrace_comment option. * configure: Regenerate. * config.in: Regenerate. * config/i386/sol2.h (ASM_XBRACE_COMMENT_SPEC): Define. (ASM_CPU_SPEC): Use it. From-SVN: r254103 --- gcc/ChangeLog | 9 +++++++++ gcc/config.in | 6 ++++++ gcc/config/i386/sol2.h | 10 +++++++++- gcc/configure | 32 ++++++++++++++++++++++++++++++++ gcc/configure.ac | 5 +++++ 5 files changed, 61 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3033a7f..95d9e39 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2017-10-26 Rainer Orth + + * configure.ac (gcc_cv_as_ix86_xbrace_comment): Check if assembler + supports -xbrace_comment option. + * configure: Regenerate. + * config.in: Regenerate. + * config/i386/sol2.h (ASM_XBRACE_COMMENT_SPEC): Define. + (ASM_CPU_SPEC): Use it. + 2017-10-26 Richard Sandiford * target.def (static_rtx_alignment): New hook. diff --git a/gcc/config.in b/gcc/config.in index 89d7108..5651bcb 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -717,6 +717,12 @@ #endif +/* Define if your assembler supports -xbrace_comment option. */ +#ifndef USED_FOR_TARGET +#undef HAVE_AS_XBRACE_COMMENT_OPTION +#endif + + /* Define to 1 if you have the `atoq' function. */ #ifndef USED_FOR_TARGET #undef HAVE_ATOQ diff --git a/gcc/config/i386/sol2.h b/gcc/config/i386/sol2.h index 6173360..05e5e1a 100644 --- a/gcc/config/i386/sol2.h +++ b/gcc/config/i386/sol2.h @@ -65,8 +65,16 @@ along with GCC; see the file COPYING3. If not see #define ASM_CPU64_DEFAULT_SPEC "-xarch=generic64" #endif +/* Since Studio 12.6, as needs -xbrace_comment=no so its AVX512 syntax is + fully compatible with gas. */ +#ifdef HAVE_AS_XBRACE_COMMENT_OPTION +#define ASM_XBRACE_COMMENT_SPEC "-xbrace_comment=no" +#else +#define ASM_XBRACE_COMMENT_SPEC "" +#endif + #undef ASM_CPU_SPEC -#define ASM_CPU_SPEC "%(asm_cpu_default)" +#define ASM_CPU_SPEC "%(asm_cpu_default) " ASM_XBRACE_COMMENT_SPEC /* Don't include ASM_PIC_SPEC. While the Solaris 10+ assembler accepts -K PIC, it gives many warnings: diff --git a/gcc/configure b/gcc/configure index aa5937d..c49e665 100755 --- a/gcc/configure +++ b/gcc/configure @@ -25552,6 +25552,38 @@ $as_echo "$as_me: WARNING: LTO for $target requires binutils >= 2.20.1, but vers ;; esac + { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -xbrace_comment" >&5 +$as_echo_n "checking assembler for -xbrace_comment... " >&6; } +if test "${gcc_cv_as_ix86_xbrace_comment+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + gcc_cv_as_ix86_xbrace_comment=no + if test x$gcc_cv_as != x; then + $as_echo '.text' > conftest.s + if { ac_try='$gcc_cv_as $gcc_cv_as_flags -xbrace_comment=no -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } + then + gcc_cv_as_ix86_xbrace_comment=yes + else + echo "configure: failed program was" >&5 + cat conftest.s >&5 + fi + rm -f conftest.o conftest.s + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_ix86_xbrace_comment" >&5 +$as_echo "$gcc_cv_as_ix86_xbrace_comment" >&6; } +if test $gcc_cv_as_ix86_xbrace_comment = yes; then + +$as_echo "#define HAVE_AS_XBRACE_COMMENT_OPTION 1" >>confdefs.h + +fi + + # Test if the assembler supports the section flag 'e' for specifying # an excluded section. { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for .section with e" >&5 diff --git a/gcc/configure.ac b/gcc/configure.ac index d905d0d..7c0a903 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -4103,6 +4103,11 @@ foo: nop ;; esac + gcc_GAS_CHECK_FEATURE([-xbrace_comment], gcc_cv_as_ix86_xbrace_comment,, + [-xbrace_comment=no], [.text],, + [AC_DEFINE(HAVE_AS_XBRACE_COMMENT_OPTION, 1, + [Define if your assembler supports -xbrace_comment option.])]) + # Test if the assembler supports the section flag 'e' for specifying # an excluded section. gcc_GAS_CHECK_FEATURE([.section with e], gcc_cv_as_section_has_e, -- cgit v1.1 From 75bafecbd14e68a0f94b3bbc9e414b214c2e7805 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Thu, 26 Oct 2017 12:47:14 +0000 Subject: [C++ PATCH] Kill IDENTIFIER_LABEL_VALUE https://gcc.gnu.org/ml/gcc-patches/2017-10/msg01935.html * decl.c (sort_labels): Restore function. (pop_labels): Sort labels (identify_goto): Add translation markup. From-SVN: r254104 --- gcc/cp/ChangeLog | 6 ++++++ gcc/cp/decl.c | 53 +++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 43 insertions(+), 16 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 8177664..29139c5 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,9 @@ +2017-10-26 Nathan Sidwell + + * decl.c (sort_labels): Restore function. + (pop_labels): Sort labels + (identify_goto): Add translation markup. + 2017-10-25 Nathan Sidwell Kill IDENTIFIER_LABEL_VALUE. diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 42b5274..519aa06 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -372,6 +372,18 @@ check_label_used (tree label) } } +/* Helper function to sort named label entries in a vector by DECL_UID. */ + +static int +sort_labels (const void *a, const void *b) +{ + tree label1 = *(tree const *) a; + tree label2 = *(tree const *) b; + + /* DECL_UIDs can never be equal. */ + return DECL_UID (label1) > DECL_UID (label2) ? -1 : +1; +} + /* At the end of a function, all labels declared within the function go out of scope. BLOCK is the top-level block for the function. */ @@ -382,6 +394,12 @@ pop_labels (tree block) if (!named_labels) return; + /* We need to add the labels to the block chain, so debug + information is emitted. But, we want the order to be stable so + need to sort them first. Otherwise the debug output could be + randomly ordered. I guess it's mostly stable, unless the hash + table implementation changes. */ + auto_vec labels (named_labels->elements ()); hash_table::iterator end (named_labels->end ()); for (hash_table::iterator iter (named_labels->begin ()); iter != end; ++iter) @@ -390,18 +408,21 @@ pop_labels (tree block) gcc_checking_assert (!ent->outer); if (ent->label_decl) - { - check_label_used (ent->label_decl); - - /* Put the labels into the "variables" of the top-level block, - so debugger can see them. */ - DECL_CHAIN (ent->label_decl) = BLOCK_VARS (block); - BLOCK_VARS (block) = ent->label_decl; - } + labels.quick_push (ent->label_decl); ggc_free (ent); } - named_labels = NULL; + labels.qsort (sort_labels); + + while (labels.length ()) + { + tree label = labels.pop (); + + DECL_CHAIN (label) = BLOCK_VARS (block); + BLOCK_VARS (block) = label; + + check_label_used (label); + } } /* At the end of a block with local labels, restore the outer definition. */ @@ -3066,8 +3087,8 @@ identify_goto (tree decl, location_t loc, const location_t *locus, { bool complained = emit_diagnostic (diag_kind, loc, 0, - decl ? "jump to label %qD" : "jump to case label", - decl); + decl ? N_("jump to label %qD") + : N_("jump to case label"), decl); if (complained && locus) inform (*locus, " from here"); return complained; @@ -3136,32 +3157,32 @@ check_previous_goto_1 (tree decl, cp_binding_level* level, tree names, { case sk_try: if (!saw_eh) - inf = "enters try block"; + inf = N_("enters try block"); saw_eh = true; break; case sk_catch: if (!saw_eh) - inf = "enters catch block"; + inf = N_("enters catch block"); saw_eh = true; break; case sk_omp: if (!saw_omp) - inf = "enters OpenMP structured block"; + inf = N_("enters OpenMP structured block"); saw_omp = true; break; case sk_transaction: if (!saw_tm) - inf = "enters synchronized or atomic statement"; + inf = N_("enters synchronized or atomic statement"); saw_tm = true; break; case sk_block: if (!saw_cxif && level_for_constexpr_if (b->level_chain)) { - inf = "enters constexpr if statement"; + inf = N_("enters constexpr if statement"); loc = EXPR_LOCATION (b->level_chain->this_entity); saw_cxif = true; } -- cgit v1.1 From 18b279715c298992748426a41083ec76f3875bfa Mon Sep 17 00:00:00 2001 From: James Greenhalgh Date: Thu, 26 Oct 2017 14:17:40 +0000 Subject: [obvious][arm testsuite] Fixup expected location in require-pic-register-loc.c After r254010 we now add -gcolumn-info by default, that means the tests in gcc.target/arm/require-pic-register-loc.c need adjusting to not expect to see column zero. gcc/testsuite/ * gcc.target/arm/require-pic-register-loc.c: Use wider regex for column information. From-SVN: r254106 --- gcc/testsuite/ChangeLog | 5 +++++ gcc/testsuite/gcc.target/arm/require-pic-register-loc.c | 10 +++++----- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d71ab16..01824b3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-10-26 James Greenhalgh + + * gcc.target/arm/require-pic-register-loc.c: Use wider regex for + column information. + 2017-10-26 Tamar Christina * gcc.dg/vect/vect-reduc-dot-s8a.c diff --git a/gcc/testsuite/gcc.target/arm/require-pic-register-loc.c b/gcc/testsuite/gcc.target/arm/require-pic-register-loc.c index bd85e86..268e9e4 100644 --- a/gcc/testsuite/gcc.target/arm/require-pic-register-loc.c +++ b/gcc/testsuite/gcc.target/arm/require-pic-register-loc.c @@ -18,12 +18,12 @@ main (int argc) /* line 9. */ return 0; } -/* { dg-final { scan-assembler-not "\.loc 1 7 0" } } */ -/* { dg-final { scan-assembler-not "\.loc 1 8 0" } } */ -/* { dg-final { scan-assembler-not "\.loc 1 9 0" } } */ +/* { dg-final { scan-assembler-not "\.loc 1 7 \[0-9\]\+" } } */ +/* { dg-final { scan-assembler-not "\.loc 1 8 \[0-9\]\+" } } */ +/* { dg-final { scan-assembler-not "\.loc 1 9 \[0-9\]\+" } } */ /* The loc at the start of the prologue. */ -/* { dg-final { scan-assembler-times "\.loc 1 10 0" 1 } } */ +/* { dg-final { scan-assembler-times "\.loc 1 10 \[0-9\]\+" 1 } } */ /* The loc at the end of the prologue, with the first user line. */ -/* { dg-final { scan-assembler-times "\.loc 1 11 0" 1 } } */ +/* { dg-final { scan-assembler-times "\.loc 1 11 \[0-9\]\+" 1 } } */ -- cgit v1.1 From 7984457f8295811880c37e7861aa7c0454ce9845 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 26 Oct 2017 16:09:17 +0000 Subject: Stop print_hex from printing bits above the precision 2017-10-26 Richard Sandiford gcc/ * wide-int-print.cc (print_hex): Loop based on extract_uhwi. Don't print any bits outside the precision of the value. * wide-int.cc (test_printing): Add some new tests. From-SVN: r254109 --- gcc/ChangeLog | 6 ++++++ gcc/wide-int-print.cc | 34 ++++++++++++++++------------------ gcc/wide-int.cc | 11 +++++++++++ 3 files changed, 33 insertions(+), 18 deletions(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 95d9e39..9cf528c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2017-10-26 Richard Sandiford + + * wide-int-print.cc (print_hex): Loop based on extract_uhwi. + Don't print any bits outside the precision of the value. + * wide-int.cc (test_printing): Add some new tests. + 2017-10-26 Rainer Orth * configure.ac (gcc_cv_as_ix86_xbrace_comment): Check if assembler diff --git a/gcc/wide-int-print.cc b/gcc/wide-int-print.cc index 36d8ad8..8874e81 100644 --- a/gcc/wide-int-print.cc +++ b/gcc/wide-int-print.cc @@ -103,30 +103,28 @@ print_decu (const wide_int_ref &wi, FILE *file) } void -print_hex (const wide_int_ref &wi, char *buf) +print_hex (const wide_int_ref &val, char *buf) { - int i = wi.get_len (); - - if (wi == 0) + if (val == 0) buf += sprintf (buf, "0x0"); else { - if (wi::neg_p (wi)) + buf += sprintf (buf, "0x"); + int start = ROUND_DOWN (val.get_precision (), HOST_BITS_PER_WIDE_INT); + int width = val.get_precision () - start; + bool first_p = true; + for (int i = start; i >= 0; i -= HOST_BITS_PER_WIDE_INT) { - int j; - /* If the number is negative, we may need to pad value with - 0xFFF... because the leading elements may be missing and - we do not print a '-' with hex. */ - buf += sprintf (buf, "0x"); - for (j = BLOCKS_NEEDED (wi.get_precision ()); j > i; j--) - buf += sprintf (buf, HOST_WIDE_INT_PRINT_PADDED_HEX, HOST_WIDE_INT_M1); - + unsigned HOST_WIDE_INT uhwi = wi::extract_uhwi (val, i, width); + if (!first_p) + buf += sprintf (buf, HOST_WIDE_INT_PRINT_PADDED_HEX, uhwi); + else if (uhwi != 0) + { + buf += sprintf (buf, HOST_WIDE_INT_PRINT_HEX_PURE, uhwi); + first_p = false; + } + width = HOST_BITS_PER_WIDE_INT; } - else - buf += sprintf (buf, "0x" HOST_WIDE_INT_PRINT_HEX_PURE, wi.elt (--i)); - - while (--i >= 0) - buf += sprintf (buf, HOST_WIDE_INT_PRINT_PADDED_HEX, wi.elt (i)); } } diff --git a/gcc/wide-int.cc b/gcc/wide-int.cc index 1a1a68c..ba0fd25 100644 --- a/gcc/wide-int.cc +++ b/gcc/wide-int.cc @@ -2253,6 +2253,17 @@ test_printing () VALUE_TYPE a = from_int (42); assert_deceq ("42", a, SIGNED); assert_hexeq ("0x2a", a); + assert_hexeq ("0x1fffffffffffffffff", wi::shwi (-1, 69)); + assert_hexeq ("0xffffffffffffffff", wi::mask (64, false, 69)); + assert_hexeq ("0xffffffffffffffff", wi::mask (64, false)); + if (WIDE_INT_MAX_PRECISION > 128) + { + assert_hexeq ("0x20000000000000000fffffffffffffffe", + wi::lshift (1, 129) + wi::lshift (1, 64) - 2); + assert_hexeq ("0x200000000000004000123456789abcdef", + wi::lshift (1, 129) + wi::lshift (1, 74) + + wi::lshift (0x1234567, 32) + 0x89abcdef); + } } /* Verify that various operations work correctly for VALUE_TYPE, -- cgit v1.1 From 9eaf97d6d7f1511638fb9209b7acf30e8f26a060 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 26 Oct 2017 16:12:09 +0000 Subject: Make more use of df_read_modify_subreg_p This patch uses df_read_modify_subreg_p to check whether writing to a subreg would preserve some of the existing contents. This has the effect of putting more emphasis on the REGMODE_NATURAL_SIZE-based definition of whether something can be partially modified, instead of using UNITS_PER_WORD unconditionally. This becomes important for SVE, where UNITS_PER_WORD has no significance for subregs of multi-register LD2/ST2, LD3/ST3 and LD4/ST4 tuples. 2017-10-26 Richard Sandiford Alan Hayward David Sherwood gcc/ * caller-save.c (mark_referenced_regs): Use read_modify_subreg_p. * combine.c (find_single_use_1): Likewise. (expand_field_assignment): Likewise. (move_deaths): Likewise. * lra-constraints.c (simplify_operand_subreg): Likewise. (curr_insn_transform): Likewise. * lra.c (collect_non_operand_hard_regs): Likewise. (add_regs_to_insn_regno_info): Likewise. * rtlanal.c (reg_referenced_p): Likewise. (covers_regno_no_parallel_p): Likewise. Co-Authored-By: Alan Hayward Co-Authored-By: David Sherwood From-SVN: r254110 --- gcc/ChangeLog | 15 +++++++++++++++ gcc/caller-save.c | 5 +---- gcc/combine.c | 19 +++++-------------- gcc/lra-constraints.c | 6 ++---- gcc/lra.c | 16 ++++++---------- gcc/rtlanal.c | 17 +++++------------ 6 files changed, 34 insertions(+), 44 deletions(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9cf528c..4d5c9d2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,4 +1,19 @@ 2017-10-26 Richard Sandiford + Alan Hayward + David Sherwood + + * caller-save.c (mark_referenced_regs): Use read_modify_subreg_p. + * combine.c (find_single_use_1): Likewise. + (expand_field_assignment): Likewise. + (move_deaths): Likewise. + * lra-constraints.c (simplify_operand_subreg): Likewise. + (curr_insn_transform): Likewise. + * lra.c (collect_non_operand_hard_regs): Likewise. + (add_regs_to_insn_regno_info): Likewise. + * rtlanal.c (reg_referenced_p): Likewise. + (covers_regno_no_parallel_p): Likewise. + +2017-10-26 Richard Sandiford * wide-int-print.cc (print_hex): Loop based on extract_uhwi. Don't print any bits outside the precision of the value. diff --git a/gcc/caller-save.c b/gcc/caller-save.c index 7c787f7..576a023 100644 --- a/gcc/caller-save.c +++ b/gcc/caller-save.c @@ -1034,10 +1034,7 @@ mark_referenced_regs (rtx *loc, refmarker_fn *mark, void *arg) /* If we're setting only part of a multi-word register, we shall mark it as referenced, because the words that are not being set should be restored. */ - && ((GET_MODE_SIZE (GET_MODE (*loc)) - >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (*loc)))) - || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (*loc))) - <= UNITS_PER_WORD)))) + && !read_modify_subreg_p (*loc))) return; } if (code == MEM || code == SUBREG) diff --git a/gcc/combine.c b/gcc/combine.c index d71e50f..93adfc1 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -579,10 +579,7 @@ find_single_use_1 (rtx dest, rtx *loc) && !REG_P (SET_DEST (x)) && ! (GET_CODE (SET_DEST (x)) == SUBREG && REG_P (SUBREG_REG (SET_DEST (x))) - && (((GET_MODE_SIZE (GET_MODE (SUBREG_REG (SET_DEST (x)))) - + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD) - == ((GET_MODE_SIZE (GET_MODE (SET_DEST (x))) - + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD)))) + && !read_modify_subreg_p (SET_DEST (x)))) break; return find_single_use_1 (dest, &SET_SRC (x)); @@ -7361,15 +7358,12 @@ expand_field_assignment (const_rtx x) } } - /* A SUBREG between two modes that occupy the same numbers of words - can be done by moving the SUBREG to the source. */ + /* If the destination is a subreg that overwrites the whole of the inner + register, we can move the subreg to the source. */ else if (GET_CODE (SET_DEST (x)) == SUBREG /* We need SUBREGs to compute nonzero_bits properly. */ && nonzero_sign_valid - && (((GET_MODE_SIZE (GET_MODE (SET_DEST (x))) - + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD) - == ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (SET_DEST (x)))) - + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD))) + && !read_modify_subreg_p (SET_DEST (x))) { x = gen_rtx_SET (SUBREG_REG (SET_DEST (x)), gen_lowpart @@ -13993,10 +13987,7 @@ move_deaths (rtx x, rtx maybe_kill_insn, int from_luid, rtx_insn *to_insn, if (GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == STRICT_LOW_PART || (GET_CODE (dest) == SUBREG - && (((GET_MODE_SIZE (GET_MODE (dest)) - + UNITS_PER_WORD - 1) / UNITS_PER_WORD) - == ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))) - + UNITS_PER_WORD - 1) / UNITS_PER_WORD)))) + && !read_modify_subreg_p (dest))) { move_deaths (dest, maybe_kill_insn, from_luid, to_insn, pnotes); return; diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c index 6163d7d..c3bbfd7 100644 --- a/gcc/lra-constraints.c +++ b/gcc/lra-constraints.c @@ -1679,7 +1679,7 @@ simplify_operand_subreg (int nop, machine_mode reg_mode) bitmap_set_bit (&lra_subreg_reload_pseudos, REGNO (new_reg)); insert_before = (type != OP_OUT - || GET_MODE_SIZE (innermode) > GET_MODE_SIZE (mode)); + || read_modify_subreg_p (operand)); insert_after = (type != OP_IN); insert_move_for_subreg (insert_before ? &before : NULL, insert_after ? &after : NULL, @@ -4232,9 +4232,7 @@ curr_insn_transform (bool check_only_p) constraints. */ if (type == OP_OUT && (curr_static_id->operand[i].strict_low - || (GET_MODE_SIZE (GET_MODE (reg)) > UNITS_PER_WORD - && (GET_MODE_SIZE (mode) - < GET_MODE_SIZE (GET_MODE (reg)))))) + || read_modify_subreg_p (*loc))) type = OP_INOUT; loc = &SUBREG_REG (*loc); mode = GET_MODE (*loc); diff --git a/gcc/lra.c b/gcc/lra.c index 3122f2c..04acf88 100644 --- a/gcc/lra.c +++ b/gcc/lra.c @@ -832,14 +832,12 @@ collect_non_operand_hard_regs (rtx *x, lra_insn_recog_data_t data, subreg_p = false; if (code == SUBREG) { + if (read_modify_subreg_p (op)) + subreg_p = true; op = SUBREG_REG (op); code = GET_CODE (op); if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (op))) - { - mode = GET_MODE (op); - if (GET_MODE_SIZE (mode) > REGMODE_NATURAL_SIZE (mode)) - subreg_p = true; - } + mode = GET_MODE (op); } if (REG_P (op)) { @@ -1427,14 +1425,12 @@ add_regs_to_insn_regno_info (lra_insn_recog_data_t data, rtx x, int uid, subreg_p = false; if (GET_CODE (x) == SUBREG) { + if (read_modify_subreg_p (x)) + subreg_p = true; x = SUBREG_REG (x); code = GET_CODE (x); if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (x))) - { - mode = GET_MODE (x); - if (GET_MODE_SIZE (mode) > REGMODE_NATURAL_SIZE (mode)) - subreg_p = true; - } + mode = GET_MODE (x); } if (REG_P (x)) { diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c index 560bfd4..beb24ba 100644 --- a/gcc/rtlanal.c +++ b/gcc/rtlanal.c @@ -1124,10 +1124,7 @@ reg_referenced_p (const_rtx x, const_rtx body) && !REG_P (SET_DEST (body)) && ! (GET_CODE (SET_DEST (body)) == SUBREG && REG_P (SUBREG_REG (SET_DEST (body))) - && (((GET_MODE_SIZE (GET_MODE (SUBREG_REG (SET_DEST (body)))) - + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD) - == ((GET_MODE_SIZE (GET_MODE (SET_DEST (body))) - + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD))) + && !read_modify_subreg_p (SET_DEST (body))) && reg_overlap_mentioned_p (x, SET_DEST (body))) return 1; return 0; @@ -2017,20 +2014,16 @@ dead_or_set_p (const rtx_insn *insn, const_rtx x) return 1; } -/* Return TRUE iff DEST is a register or subreg of a register and - doesn't change the number of words of the inner register, and any - part of the register is TEST_REGNO. */ +/* Return TRUE iff DEST is a register or subreg of a register, is a + complete rather than read-modify-write destination, and contains + register TEST_REGNO. */ static bool covers_regno_no_parallel_p (const_rtx dest, unsigned int test_regno) { unsigned int regno, endregno; - if (GET_CODE (dest) == SUBREG - && (((GET_MODE_SIZE (GET_MODE (dest)) - + UNITS_PER_WORD - 1) / UNITS_PER_WORD) - == ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))) - + UNITS_PER_WORD - 1) / UNITS_PER_WORD))) + if (GET_CODE (dest) == SUBREG && !read_modify_subreg_p (dest)) dest = SUBREG_REG (dest); if (!REG_P (dest)) -- cgit v1.1 From 37e4d57b99efe65710bb4a000093c596ab3f5124 Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra Date: Thu, 26 Oct 2017 16:34:03 +0000 Subject: Improve addressing of TI/TFmode In https://gcc.gnu.org/ml/gcc-patches/2017-06/msg01125.html Jiong pointed out some addressing inefficiencies due to a recent change in regcprop (https://gcc.gnu.org/ml/gcc-patches/2017-04/msg00775.html). This patch improves aarch64_legitimize_address_displacement to split unaligned offsets of TImode and TFmode accesses. The resulting code is better and no longer relies on the original regcprop optimization. For the test we now produce: add x1, sp, 4 stp xzr, xzr, [x1, 24] rather than: mov x1, sp add x1, x1, 28 stp xzr, xzr, [x1] gcc/ * config/aarch64/aarch64.c (aarch64_legitimize_address_displacement): Improve unaligned TImode/TFmode base/offset split. testsuite/ * gcc.target/aarch64/ldp_stp_unaligned_2.c: New file. From-SVN: r254111 --- gcc/ChangeLog | 5 +++++ gcc/config/aarch64/aarch64.c | 12 ++++++++---- gcc/testsuite/ChangeLog | 4 ++++ gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c | 18 ++++++++++++++++++ 4 files changed, 35 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4d5c9d2..7e0417b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2017-10-26 Wilco Dijkstra + + * config/aarch64/aarch64.c (aarch64_legitimize_address_displacement): + Improve unaligned TImode/TFmode base/offset split. + 2017-10-26 Richard Sandiford Alan Hayward David Sherwood diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index d1aaf19..83630fc 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -4727,16 +4727,20 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x, /* Split an out-of-range address displacement into a base and offset. Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise to increase opportunities for sharing the base address of different sizes. - For unaligned accesses and TI/TF mode use the signed 9-bit range. */ + Unaligned accesses use the signed 9-bit range, TImode/TFmode use + the intersection of signed scaled 7-bit and signed 9-bit offset. */ static bool aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode) { HOST_WIDE_INT offset = INTVAL (*disp); - HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc); + HOST_WIDE_INT base; - if (mode == TImode || mode == TFmode - || (offset & (GET_MODE_SIZE (mode) - 1)) != 0) + if (mode == TImode || mode == TFmode) + base = (offset + 0x100) & ~0x1f8; + else if ((offset & (GET_MODE_SIZE (mode) - 1)) != 0) base = (offset + 0x100) & ~0x1ff; + else + base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc); *off = GEN_INT (base); *disp = GEN_INT (offset - base); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 01824b3..283c787 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2017-10-26 Wilco Dijkstra + + * gcc.target/aarch64/ldp_stp_unaligned_2.c: New file. + 2017-10-26 James Greenhalgh * gcc.target/arm/require-pic-register-loc.c: Use wider regex for diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c new file mode 100644 index 0000000..1e46755 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -fomit-frame-pointer" } */ + +/* Check that we split unaligned LDP/STP into base and aligned offset. */ + +typedef struct +{ + int a, b, c, d, e; +} S; + +void foo (S *); + +void test (int x) +{ + S s = { .a = x }; + foo (&s); +} + +/* { dg-final { scan-assembler-not "mov\tx\[0-9\]+, sp" } } */ -- cgit v1.1 From 1f7bffd09412d801016ecd014f34db77da2cc63e Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra Date: Thu, 26 Oct 2017 16:40:25 +0000 Subject: Simplify frame layout for stack probing This patch makes some changes to the frame layout in order to simplify stack probing. We want to use the save of LR as a probe in any non-leaf function. With shrinkwrapping we may only save LR before a call, so it is useful to define a fixed location in the callee-saves. So force LR at the bottom of the callee-saves even with -fomit-frame-pointer. Also remove a rarely used frame layout that saves the callee-saves first with -fomit-frame-pointer. Doing so allows the store of LR to be used as a valid stack probe in all frames. gcc/ * config/aarch64/aarch64.c (aarch64_layout_frame): Ensure LR is always stored at the bottom of the callee-saves. Remove rarely used frame layout which saves callee-saves at top of frame, so the store of LR can be used as a valid probe in all cases. From-SVN: r254112 --- gcc/ChangeLog | 7 +++++++ gcc/config/aarch64/aarch64.c | 26 ++++++++++---------------- 2 files changed, 17 insertions(+), 16 deletions(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7e0417b..f32a30b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,12 @@ 2017-10-26 Wilco Dijkstra + * config/aarch64/aarch64.c (aarch64_layout_frame): + Ensure LR is always stored at the bottom of the callee-saves. + Remove rarely used frame layout which saves callee-saves at top of + frame, so the store of LR can be used as a valid probe in all cases. + +2017-10-26 Wilco Dijkstra + * config/aarch64/aarch64.c (aarch64_legitimize_address_displacement): Improve unaligned TImode/TFmode base/offset split. diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 83630fc..ed97e2d 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -2885,7 +2885,8 @@ aarch64_frame_pointer_required (void) /* Mark the registers that need to be saved by the callee and calculate the size of the callee-saved registers area and frame record (both FP - and LR may be omitted). */ + and LR may be omitted). If the function is not a leaf, ensure LR is + saved at the bottom of the callee-save area. */ static void aarch64_layout_frame (void) { @@ -2936,7 +2937,14 @@ aarch64_layout_frame (void) cfun->machine->frame.wb_candidate1 = R29_REGNUM; cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD; cfun->machine->frame.wb_candidate2 = R30_REGNUM; - offset += 2 * UNITS_PER_WORD; + offset = 2 * UNITS_PER_WORD; + } + else if (!crtl->is_leaf) + { + /* Ensure LR is saved at the bottom of the callee-saves. */ + cfun->machine->frame.reg_offset[R30_REGNUM] = 0; + cfun->machine->frame.wb_candidate1 = R30_REGNUM; + offset = UNITS_PER_WORD; } /* Now assign stack slots for them. */ @@ -3035,20 +3043,6 @@ aarch64_layout_frame (void) cfun->machine->frame.final_adjust = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust; } - else if (!frame_pointer_needed - && varargs_and_saved_regs_size < max_push_offset) - { - /* Frame with large local area and outgoing arguments (this pushes the - callee-saves first, followed by the locals and outgoing area): - stp reg1, reg2, [sp, -varargs_and_saved_regs_size]! - stp reg3, reg4, [sp, 16] - sub sp, sp, frame_size - varargs_and_saved_regs_size */ - cfun->machine->frame.callee_adjust = varargs_and_saved_regs_size; - cfun->machine->frame.final_adjust - = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust; - cfun->machine->frame.hard_fp_offset = cfun->machine->frame.callee_adjust; - cfun->machine->frame.locals_offset = cfun->machine->frame.hard_fp_offset; - } else { /* Frame with large local area and outgoing arguments using frame pointer: -- cgit v1.1 From 204d2c03acff4bf3b73cb5d2c9578b50c2aac703 Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra Date: Thu, 26 Oct 2017 16:51:37 +0000 Subject: Introduce emit_frame_chain The current frame code combines the separate concepts of a frame chain (saving old FP,LR in a record and pointing new FP to it) and a frame pointer used to access locals. Add emit_frame_chain to the aarch64_frame descriptor and use it in the prolog and epilog code. For now just initialize it as before, so generated code is identical. Also correctly set EXIT_IGNORE_STACK. The current AArch64 epilog code restores SP from FP if alloca is used. If a frame pointer is used but there is no alloca, SP must remain valid for the epilog to work correctly. gcc/ * config/aarch64/aarch64.h (EXIT_IGNORE_STACK): Set if alloca is used. (aarch64_frame): Add emit_frame_chain boolean. * config/aarch64/aarch64.c (aarch64_frame_pointer_required) Move eh_return case to aarch64_layout_frame. (aarch64_layout_frame): Initialize emit_frame_chain. (aarch64_expand_prologue): Use emit_frame_chain. From-SVN: r254114 --- gcc/ChangeLog | 9 +++++++++ gcc/config/aarch64/aarch64.c | 19 ++++++++++--------- gcc/config/aarch64/aarch64.h | 9 ++++++--- 3 files changed, 25 insertions(+), 12 deletions(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f32a30b..81e5bba 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,14 @@ 2017-10-26 Wilco Dijkstra + * config/aarch64/aarch64.h (EXIT_IGNORE_STACK): Set if alloca is used. + (aarch64_frame): Add emit_frame_chain boolean. + * config/aarch64/aarch64.c (aarch64_frame_pointer_required) + Move eh_return case to aarch64_layout_frame. + (aarch64_layout_frame): Initialize emit_frame_chain. + (aarch64_expand_prologue): Use emit_frame_chain. + +2017-10-26 Wilco Dijkstra + * config/aarch64/aarch64.c (aarch64_layout_frame): Ensure LR is always stored at the bottom of the callee-saves. Remove rarely used frame layout which saves callee-saves at top of diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index ed97e2d..1cc1043 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -2876,10 +2876,6 @@ aarch64_frame_pointer_required (void) && !df_regs_ever_live_p (LR_REGNUM))) return true; - /* Force a frame pointer for EH returns so the return address is at FP+8. */ - if (crtl->calls_eh_return) - return true; - return false; } @@ -2896,6 +2892,10 @@ aarch64_layout_frame (void) if (reload_completed && cfun->machine->frame.laid_out) return; + /* Force a frame chain for EH returns so the return address is at FP+8. */ + cfun->machine->frame.emit_frame_chain + = frame_pointer_needed || crtl->calls_eh_return; + #define SLOT_NOT_REQUIRED (-2) #define SLOT_REQUIRED (-1) @@ -2930,7 +2930,7 @@ aarch64_layout_frame (void) last_fp_reg = regno; } - if (frame_pointer_needed) + if (cfun->machine->frame.emit_frame_chain) { /* FP and LR are placed in the linkage record. */ cfun->machine->frame.reg_offset[R29_REGNUM] = 0; @@ -3659,6 +3659,7 @@ aarch64_expand_prologue (void) HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset; unsigned reg1 = cfun->machine->frame.wb_candidate1; unsigned reg2 = cfun->machine->frame.wb_candidate2; + bool emit_frame_chain = cfun->machine->frame.emit_frame_chain; rtx_insn *insn; /* Sign return address for functions. */ @@ -3691,7 +3692,7 @@ aarch64_expand_prologue (void) if (callee_adjust != 0) aarch64_push_regs (reg1, reg2, callee_adjust); - if (frame_pointer_needed) + if (emit_frame_chain) { if (callee_adjust == 0) aarch64_save_callee_saves (DImode, callee_offset, R29_REGNUM, @@ -3699,14 +3700,14 @@ aarch64_expand_prologue (void) insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, GEN_INT (callee_offset))); - RTX_FRAME_RELATED_P (insn) = 1; + RTX_FRAME_RELATED_P (insn) = frame_pointer_needed; emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); } aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM, - callee_adjust != 0 || frame_pointer_needed); + callee_adjust != 0 || emit_frame_chain); aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM, - callee_adjust != 0 || frame_pointer_needed); + callee_adjust != 0 || emit_frame_chain); aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed); } diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 75fda01..bc1ccc3 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -343,9 +343,9 @@ extern unsigned aarch64_architecture_version; (epilogue_completed && (REGNO) == LR_REGNUM) /* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, - the stack pointer does not matter. The value is tested only in - functions that have frame pointers. */ -#define EXIT_IGNORE_STACK 1 + the stack pointer does not matter. This is only true if the function + uses alloca. */ +#define EXIT_IGNORE_STACK (cfun->calls_alloca) #define STATIC_CHAIN_REGNUM R18_REGNUM #define HARD_FRAME_POINTER_REGNUM R29_REGNUM @@ -595,6 +595,9 @@ struct GTY (()) aarch64_frame /* The size of the stack adjustment after saving callee-saves. */ HOST_WIDE_INT final_adjust; + /* Store FP,LR and setup a frame pointer. */ + bool emit_frame_chain; + unsigned wb_candidate1; unsigned wb_candidate2; -- cgit v1.1 From bd5a2c67cfd636b6c78f213c8ee6dac62323eff9 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 26 Oct 2017 16:53:43 +0000 Subject: Add wider_subreg_mode helper functions This patch adds helper functions that say which of the two modes involved in a subreg is the larger, preferring the outer mode in the event of a tie. It also converts IRA and reload to track modes instead of byte sizes, since this is slightly more convenient when variable-sized modes are added later. 2017-10-26 Richard Sandiford Alan Hayward David Sherwood gcc/ * rtl.h (wider_subreg_mode): New function. * ira.h (ira_sort_regnos_for_alter_reg): Take a machine_mode * rather than an unsigned int *. * ira-color.c (regno_max_ref_width): Replace with... (regno_max_ref_mode): ...this new variable. (coalesced_pseudo_reg_slot_compare): Update accordingly. Use wider_subreg_mode. (ira_sort_regnos_for_alter_reg): Likewise. Take a machine_mode * rather than an unsigned int *. * lra-constraints.c (uses_hard_regs_p): Use wider_subreg_mode. (process_alt_operands): Likewise. (invariant_p): Likewise. * lra-spills.c (assign_mem_slot): Likewise. (add_pseudo_to_slot): Likewise. * lra.c (collect_non_operand_hard_regs): Likewise. (add_regs_to_insn_regno_info): Likewise. * reload1.c (regno_max_ref_width): Replace with... (regno_max_ref_mode): ...this new variable. (reload): Update accordingly. Update call to ira_sort_regnos_for_alter_reg. (alter_reg): Update to use regno_max_ref_mode. Call wider_subreg_mode. (init_eliminable_invariants): Update to use regno_max_ref_mode. (scan_paradoxical_subregs): Likewise. Co-Authored-By: Alan Hayward Co-Authored-By: David Sherwood From-SVN: r254115 --- gcc/ChangeLog | 28 ++++++++++++++++++++++++++++ gcc/ira-color.c | 30 +++++++++++++++++------------- gcc/ira.h | 2 +- gcc/lra-constraints.c | 10 +++------- gcc/lra-spills.c | 9 +++------ gcc/lra.c | 6 ++---- gcc/reload1.c | 32 +++++++++++++++++--------------- gcc/rtl.h | 18 ++++++++++++++++++ 8 files changed, 89 insertions(+), 46 deletions(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 81e5bba..4312ac2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,31 @@ +2017-10-26 Richard Sandiford + Alan Hayward + David Sherwood + + * rtl.h (wider_subreg_mode): New function. + * ira.h (ira_sort_regnos_for_alter_reg): Take a machine_mode * + rather than an unsigned int *. + * ira-color.c (regno_max_ref_width): Replace with... + (regno_max_ref_mode): ...this new variable. + (coalesced_pseudo_reg_slot_compare): Update accordingly. + Use wider_subreg_mode. + (ira_sort_regnos_for_alter_reg): Likewise. Take a machine_mode * + rather than an unsigned int *. + * lra-constraints.c (uses_hard_regs_p): Use wider_subreg_mode. + (process_alt_operands): Likewise. + (invariant_p): Likewise. + * lra-spills.c (assign_mem_slot): Likewise. + (add_pseudo_to_slot): Likewise. + * lra.c (collect_non_operand_hard_regs): Likewise. + (add_regs_to_insn_regno_info): Likewise. + * reload1.c (regno_max_ref_width): Replace with... + (regno_max_ref_mode): ...this new variable. + (reload): Update accordingly. Update call to + ira_sort_regnos_for_alter_reg. + (alter_reg): Update to use regno_max_ref_mode. Call wider_subreg_mode. + (init_eliminable_invariants): Update to use regno_max_ref_mode. + (scan_paradoxical_subregs): Likewise. + 2017-10-26 Wilco Dijkstra * config/aarch64/aarch64.h (EXIT_IGNORE_STACK): Set if alloca is used. diff --git a/gcc/ira-color.c b/gcc/ira-color.c index 31a4a80..240eb48 100644 --- a/gcc/ira-color.c +++ b/gcc/ira-color.c @@ -3908,7 +3908,7 @@ coalesced_pseudo_reg_freq_compare (const void *v1p, const void *v2p) /* Widest width in which each pseudo reg is referred to (via subreg). It is used for sorting pseudo registers. */ -static unsigned int *regno_max_ref_width; +static machine_mode *regno_max_ref_mode; /* Sort pseudos according their slot numbers (putting ones with smaller numbers first, or last when the frame pointer is not @@ -3921,7 +3921,7 @@ coalesced_pseudo_reg_slot_compare (const void *v1p, const void *v2p) ira_allocno_t a1 = ira_regno_allocno_map[regno1]; ira_allocno_t a2 = ira_regno_allocno_map[regno2]; int diff, slot_num1, slot_num2; - int total_size1, total_size2; + machine_mode mode1, mode2; if (a1 == NULL || ALLOCNO_HARD_REGNO (a1) >= 0) { @@ -3936,11 +3936,11 @@ coalesced_pseudo_reg_slot_compare (const void *v1p, const void *v2p) if ((diff = slot_num1 - slot_num2) != 0) return (frame_pointer_needed || (!FRAME_GROWS_DOWNWARD) == STACK_GROWS_DOWNWARD ? diff : -diff); - total_size1 = MAX (PSEUDO_REGNO_BYTES (regno1), - regno_max_ref_width[regno1]); - total_size2 = MAX (PSEUDO_REGNO_BYTES (regno2), - regno_max_ref_width[regno2]); - if ((diff = total_size2 - total_size1) != 0) + mode1 = wider_subreg_mode (PSEUDO_REGNO_MODE (regno1), + regno_max_ref_mode[regno1]); + mode2 = wider_subreg_mode (PSEUDO_REGNO_MODE (regno2), + regno_max_ref_mode[regno2]); + if ((diff = GET_MODE_SIZE (mode2) - GET_MODE_SIZE (mode1)) != 0) return diff; return regno1 - regno2; } @@ -4144,7 +4144,7 @@ coalesce_spill_slots (ira_allocno_t *spilled_coalesced_allocnos, int num) reload. */ void ira_sort_regnos_for_alter_reg (int *pseudo_regnos, int n, - unsigned int *reg_max_ref_width) + machine_mode *reg_max_ref_mode) { int max_regno = max_reg_num (); int i, regno, num, slot_num; @@ -4225,10 +4225,14 @@ ira_sort_regnos_for_alter_reg (int *pseudo_regnos, int n, ira_assert (ALLOCNO_HARD_REGNO (a) < 0); ALLOCNO_HARD_REGNO (a) = -slot_num; if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) - fprintf (ira_dump_file, " a%dr%d(%d,%d)", - ALLOCNO_NUM (a), ALLOCNO_REGNO (a), ALLOCNO_FREQ (a), - MAX (PSEUDO_REGNO_BYTES (ALLOCNO_REGNO (a)), - reg_max_ref_width[ALLOCNO_REGNO (a)])); + { + machine_mode mode = wider_subreg_mode + (PSEUDO_REGNO_MODE (ALLOCNO_REGNO (a)), + reg_max_ref_mode[ALLOCNO_REGNO (a)]); + fprintf (ira_dump_file, " a%dr%d(%d,%d)", + ALLOCNO_NUM (a), ALLOCNO_REGNO (a), ALLOCNO_FREQ (a), + GET_MODE_SIZE (mode)); + } if (a == allocno) break; @@ -4239,7 +4243,7 @@ ira_sort_regnos_for_alter_reg (int *pseudo_regnos, int n, ira_spilled_reg_stack_slots_num = slot_num - 1; ira_free (spilled_coalesced_allocnos); /* Sort regnos according the slot numbers. */ - regno_max_ref_width = reg_max_ref_width; + regno_max_ref_mode = reg_max_ref_mode; qsort (pseudo_regnos, n, sizeof (int), coalesced_pseudo_reg_slot_compare); FOR_EACH_ALLOCNO (a, ai) ALLOCNO_ADD_DATA (a) = NULL; diff --git a/gcc/ira.h b/gcc/ira.h index 69021c4..fba2ca5 100644 --- a/gcc/ira.h +++ b/gcc/ira.h @@ -195,7 +195,7 @@ extern void ira_set_pseudo_classes (bool, FILE *); extern void ira_expand_reg_equiv (void); extern void ira_update_equiv_info_by_shuffle_insn (int, int, rtx_insn *); -extern void ira_sort_regnos_for_alter_reg (int *, int, unsigned int *); +extern void ira_sort_regnos_for_alter_reg (int *, int, machine_mode *); extern void ira_mark_allocation_change (int); extern void ira_mark_memory_move_deletion (int, int); extern bool ira_reassign_pseudos (int *, int, HARD_REG_SET, HARD_REG_SET *, diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c index c3bbfd7..a423f06 100644 --- a/gcc/lra-constraints.c +++ b/gcc/lra-constraints.c @@ -1772,10 +1772,9 @@ uses_hard_regs_p (rtx x, HARD_REG_SET set) mode = GET_MODE (x); if (code == SUBREG) { + mode = wider_subreg_mode (x); x = SUBREG_REG (x); code = GET_CODE (x); - if (GET_MODE_SIZE (GET_MODE (x)) > GET_MODE_SIZE (mode)) - mode = GET_MODE (x); } if (REG_P (x)) @@ -1953,10 +1952,8 @@ process_alt_operands (int only_alternative) biggest_mode[nop] = GET_MODE (op); if (GET_CODE (op) == SUBREG) { + biggest_mode[nop] = wider_subreg_mode (op); operand_reg[nop] = reg = SUBREG_REG (op); - if (GET_MODE_SIZE (biggest_mode[nop]) - < GET_MODE_SIZE (GET_MODE (reg))) - biggest_mode[nop] = GET_MODE (reg); } if (! REG_P (reg)) operand_reg[nop] = NULL_RTX; @@ -5659,8 +5656,7 @@ invariant_p (const_rtx x) { x = SUBREG_REG (x); code = GET_CODE (x); - if (GET_MODE_SIZE (GET_MODE (x)) > GET_MODE_SIZE (mode)) - mode = GET_MODE (x); + mode = wider_subreg_mode (mode, GET_MODE (x)); } if (MEM_P (x)) diff --git a/gcc/lra-spills.c b/gcc/lra-spills.c index 5997b1e..9abcda4 100644 --- a/gcc/lra-spills.c +++ b/gcc/lra-spills.c @@ -134,8 +134,7 @@ assign_mem_slot (int i) machine_mode mode = GET_MODE (regno_reg_rtx[i]); HOST_WIDE_INT inherent_size = PSEUDO_REGNO_BYTES (i); machine_mode wider_mode - = (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (lra_reg_info[i].biggest_mode) - ? mode : lra_reg_info[i].biggest_mode); + = wider_subreg_mode (mode, lra_reg_info[i].biggest_mode); HOST_WIDE_INT total_size = GET_MODE_SIZE (wider_mode); HOST_WIDE_INT adjust = 0; @@ -312,10 +311,8 @@ add_pseudo_to_slot (int regno, int slot_num) and a total size which provides room for paradoxical subregs. We need to make sure the size and alignment of the slot are sufficient for both. */ - machine_mode mode = (GET_MODE_SIZE (PSEUDO_REGNO_MODE (regno)) - >= GET_MODE_SIZE (lra_reg_info[regno].biggest_mode) - ? PSEUDO_REGNO_MODE (regno) - : lra_reg_info[regno].biggest_mode); + machine_mode mode = wider_subreg_mode (PSEUDO_REGNO_MODE (regno), + lra_reg_info[regno].biggest_mode); unsigned int align = spill_slot_alignment (mode); slots[slot_num].align = MAX (slots[slot_num].align, align); slots[slot_num].size = MAX (slots[slot_num].size, GET_MODE_SIZE (mode)); diff --git a/gcc/lra.c b/gcc/lra.c index 04acf88..66fbfd5 100644 --- a/gcc/lra.c +++ b/gcc/lra.c @@ -832,12 +832,11 @@ collect_non_operand_hard_regs (rtx *x, lra_insn_recog_data_t data, subreg_p = false; if (code == SUBREG) { + mode = wider_subreg_mode (op); if (read_modify_subreg_p (op)) subreg_p = true; op = SUBREG_REG (op); code = GET_CODE (op); - if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (op))) - mode = GET_MODE (op); } if (REG_P (op)) { @@ -1425,12 +1424,11 @@ add_regs_to_insn_regno_info (lra_insn_recog_data_t data, rtx x, int uid, subreg_p = false; if (GET_CODE (x) == SUBREG) { + mode = wider_subreg_mode (x); if (read_modify_subreg_p (x)) subreg_p = true; x = SUBREG_REG (x); code = GET_CODE (x); - if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (x))) - mode = GET_MODE (x); } if (REG_P (x)) { diff --git a/gcc/reload1.c b/gcc/reload1.c index e2ee2fe..e15bd8a 100644 --- a/gcc/reload1.c +++ b/gcc/reload1.c @@ -97,8 +97,8 @@ static regset_head reg_has_output_reload; in the current insn. */ static HARD_REG_SET reg_is_output_reload; -/* Widest width in which each pseudo reg is referred to (via subreg). */ -static unsigned int *reg_max_ref_width; +/* Widest mode in which each pseudo reg is referred to (via subreg). */ +static machine_mode *reg_max_ref_mode; /* Vector to remember old contents of reg_renumber before spilling. */ static short *reg_old_renumber; @@ -830,7 +830,7 @@ reload (rtx_insn *first, int global) if (ira_conflicts_p) /* Ask IRA to order pseudo-registers for better stack slot sharing. */ - ira_sort_regnos_for_alter_reg (temp_pseudo_reg_arr, n, reg_max_ref_width); + ira_sort_regnos_for_alter_reg (temp_pseudo_reg_arr, n, reg_max_ref_mode); for (i = 0; i < n; i++) alter_reg (temp_pseudo_reg_arr[i], -1, false); @@ -1252,7 +1252,7 @@ reload (rtx_insn *first, int global) /* Indicate that we no longer have known memory locations or constants. */ free_reg_equiv (); - free (reg_max_ref_width); + free (reg_max_ref_mode); free (reg_old_renumber); free (pseudo_previous_regs); free (pseudo_forbidden_regs); @@ -2142,8 +2142,9 @@ alter_reg (int i, int from_reg, bool dont_share_p) machine_mode mode = GET_MODE (regno_reg_rtx[i]); unsigned int inherent_size = PSEUDO_REGNO_BYTES (i); unsigned int inherent_align = GET_MODE_ALIGNMENT (mode); - unsigned int total_size = MAX (inherent_size, reg_max_ref_width[i]); - unsigned int min_align = reg_max_ref_width[i] * BITS_PER_UNIT; + machine_mode wider_mode = wider_subreg_mode (mode, reg_max_ref_mode[i]); + unsigned int total_size = GET_MODE_SIZE (wider_mode); + unsigned int min_align = GET_MODE_BITSIZE (reg_max_ref_mode[i]); int adjust = 0; something_was_spilled = true; @@ -4083,9 +4084,9 @@ init_eliminable_invariants (rtx_insn *first, bool do_subregs) grow_reg_equivs (); if (do_subregs) - reg_max_ref_width = XCNEWVEC (unsigned int, max_regno); + reg_max_ref_mode = XCNEWVEC (machine_mode, max_regno); else - reg_max_ref_width = NULL; + reg_max_ref_mode = NULL; num_eliminable_invariants = 0; @@ -4404,7 +4405,7 @@ finish_spills (int global) return something_changed; } -/* Find all paradoxical subregs within X and update reg_max_ref_width. */ +/* Find all paradoxical subregs within X and update reg_max_ref_mode. */ static void scan_paradoxical_subregs (rtx x) @@ -4427,13 +4428,14 @@ scan_paradoxical_subregs (rtx x) return; case SUBREG: - if (REG_P (SUBREG_REG (x)) - && (GET_MODE_SIZE (GET_MODE (x)) - > reg_max_ref_width[REGNO (SUBREG_REG (x))])) + if (REG_P (SUBREG_REG (x))) { - reg_max_ref_width[REGNO (SUBREG_REG (x))] - = GET_MODE_SIZE (GET_MODE (x)); - mark_home_live_1 (REGNO (SUBREG_REG (x)), GET_MODE (x)); + unsigned int regno = REGNO (SUBREG_REG (x)); + if (partial_subreg_p (reg_max_ref_mode[regno], GET_MODE (x))) + { + reg_max_ref_mode[regno] = GET_MODE (x); + mark_home_live_1 (regno, GET_MODE (x)); + } } return; diff --git a/gcc/rtl.h b/gcc/rtl.h index f854550..8e82f04 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -2877,6 +2877,24 @@ subreg_lowpart_offset (machine_mode outermode, machine_mode innermode) GET_MODE_SIZE (innermode)); } +/* Given that a subreg has outer mode OUTERMODE and inner mode INNERMODE, + return the mode that is big enough to hold both the outer and inner + values. Prefer the outer mode in the event of a tie. */ + +inline machine_mode +wider_subreg_mode (machine_mode outermode, machine_mode innermode) +{ + return partial_subreg_p (outermode, innermode) ? innermode : outermode; +} + +/* Likewise for subreg X. */ + +inline machine_mode +wider_subreg_mode (const_rtx x) +{ + return wider_subreg_mode (GET_MODE (x), GET_MODE (SUBREG_REG (x))); +} + extern unsigned int subreg_size_highpart_offset (unsigned int, unsigned int); /* Return the SUBREG_BYTE for an OUTERMODE highpart of an INNERMODE value. */ -- cgit v1.1 From 73b0ac0b82fb81dbbab5738d6443b14225c3fe53 Mon Sep 17 00:00:00 2001 From: Michael Meissner Date: Thu, 26 Oct 2017 17:33:38 +0000 Subject: aix.h (TARGET_IEEEQUAD_DEFAULT): Set long double default to IBM. [gcc] 2017-10-26 Michael Meissner * config/rs6000/aix.h (TARGET_IEEEQUAD_DEFAULT): Set long double default to IBM. * config/rs6000/darwin.h (TARGET_IEEEQUAD_DEFAULT): Likewise. * config/rs6000/rs6000.opt (-mabi=ieeelongdouble): Move the warning to rs6000.c. Remove the Undocumented flag, since it has been documented. (-mabi=ibmlongdouble): Likewise. * config/rs6000/rs6000.c (TARGET_IEEEQUAD_DEFAULT): If it is not already set, set the default format for long double. (rs6000_debug_reg_global): Print whether long double is IBM or IEEE. (rs6000_option_override_internal): Rework setting long double format. Only warn if the user is changing the long double default and they did not use -Wno-psabi. * doc/invoke.texi (PowerPC options): Update the documentation for -mabi=ieeelongdouble and -mabi=ibmlongdouble. From-SVN: r254116 --- gcc/ChangeLog | 19 ++++++++++++++++++ gcc/config/rs6000/aix.h | 3 +++ gcc/config/rs6000/darwin.h | 3 +++ gcc/config/rs6000/rs6000.c | 46 ++++++++++++++++++++++++++++++++++++++------ gcc/config/rs6000/rs6000.opt | 4 ++-- gcc/doc/invoke.texi | 10 ++++++++-- 6 files changed, 75 insertions(+), 10 deletions(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4312ac2..c6e0b85 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2017-10-26 Michael Meissner + + * config/rs6000/aix.h (TARGET_IEEEQUAD_DEFAULT): Set long double + default to IBM. + * config/rs6000/darwin.h (TARGET_IEEEQUAD_DEFAULT): Likewise. + * config/rs6000/rs6000.opt (-mabi=ieeelongdouble): Move the + warning to rs6000.c. Remove the Undocumented flag, since it has + been documented. + (-mabi=ibmlongdouble): Likewise. + * config/rs6000/rs6000.c (TARGET_IEEEQUAD_DEFAULT): If it is not + already set, set the default format for long double. + (rs6000_debug_reg_global): Print whether long double is IBM or + IEEE. + (rs6000_option_override_internal): Rework setting long double + format. Only warn if the user is changing the long double default + and they did not use -Wno-psabi. + * doc/invoke.texi (PowerPC options): Update the documentation for + -mabi=ieeelongdouble and -mabi=ibmlongdouble. + 2017-10-26 Richard Sandiford Alan Hayward David Sherwood diff --git a/gcc/config/rs6000/aix.h b/gcc/config/rs6000/aix.h index 607b42c..7354181 100644 --- a/gcc/config/rs6000/aix.h +++ b/gcc/config/rs6000/aix.h @@ -76,6 +76,9 @@ #undef TARGET_IEEEQUAD #define TARGET_IEEEQUAD 0 +#undef TARGET_IEEEQUAD_DEFAULT +#define TARGET_IEEEQUAD_DEFAULT 0 + /* The AIX linker will discard static constructors in object files before collect has a chance to see them, so scan the object files directly. */ #define COLLECT_EXPORT_LIST diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h index 9a88a8d..a6a7b2c 100644 --- a/gcc/config/rs6000/darwin.h +++ b/gcc/config/rs6000/darwin.h @@ -272,6 +272,9 @@ extern int darwin_emit_branch_islands; #undef TARGET_IEEEQUAD #define TARGET_IEEEQUAD 0 +#undef TARGET_IEEEQUAD_DEFAULT +#define TARGET_IEEEQUAD_DEFAULT 0 + /* Since Darwin doesn't do TOCs, stub this out. */ #define ASM_OUTPUT_SPECIAL_POOL_ENTRY_P(X, MODE) ((void)X, (void)MODE, 0) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 3095419..3162d52 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -86,6 +86,20 @@ #define TARGET_NO_PROTOTYPE 0 #endif + /* Set -mabi=ieeelongdouble on some old targets. In the future, power server + systems will also set long double to be IEEE 128-bit. AIX and Darwin + explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so + those systems will not pick up this default. This needs to be after all + of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are + properly defined. */ +#ifndef TARGET_IEEEQUAD_DEFAULT +#if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD) +#define TARGET_IEEEQUAD_DEFAULT 1 +#else +#define TARGET_IEEEQUAD_DEFAULT 0 +#endif +#endif + #define min(A,B) ((A) < (B) ? (A) : (B)) #define max(A,B) ((A) > (B) ? (A) : (B)) @@ -2878,6 +2892,13 @@ rs6000_debug_reg_global (void) fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size); fprintf (stderr, DEBUG_FMT_D, "long_double_size", rs6000_long_double_type_size); + if (rs6000_long_double_type_size == 128) + { + fprintf (stderr, DEBUG_FMT_S, "long double type", + TARGET_IEEEQUAD ? "IEEE" : "IBM"); + fprintf (stderr, DEBUG_FMT_S, "default long double type", + TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM"); + } fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority", (int)rs6000_sched_restricted_insns_priority); fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins", @@ -4560,13 +4581,26 @@ rs6000_option_override_internal (bool global_init_p) rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE; } - /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin - explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not - pick up this default. */ -#if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD) + /* Set -mabi=ieeelongdouble on some old targets. In the future, power server + systems will also set long double to be IEEE 128-bit. AIX and Darwin + explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so + those systems will not pick up this default. Warn if the user changes the + default unless -Wno-psabi. */ if (!global_options_set.x_rs6000_ieeequad) - rs6000_ieeequad = 1; -#endif + rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT; + + else if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128) + { + static bool warned_change_long_double; + if (!warned_change_long_double) + { + warned_change_long_double = true; + if (TARGET_IEEEQUAD) + warning (OPT_Wpsabi, "Using IEEE extended precision long double"); + else + warning (OPT_Wpsabi, "Using IBM extended precision long double"); + } + } /* Enable the default support for IEEE 128-bit floating point on Linux VSX sytems. In GCC 7, we would enable the the IEEE 128-bit floating point diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index c42818f..e7d0829 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -381,10 +381,10 @@ mabi=d32 Target RejectNegative Undocumented Warn(using old darwin ABI) Var(rs6000_darwin64_abi, 0) mabi=ieeelongdouble -Target RejectNegative Undocumented Warn(using IEEE extended precision long double) Var(rs6000_ieeequad) Save +Target RejectNegative Var(rs6000_ieeequad) Save mabi=ibmlongdouble -Target RejectNegative Undocumented Warn(using IBM extended precision long double) Var(rs6000_ieeequad, 0) +Target RejectNegative Var(rs6000_ieeequad, 0) mcpu= Target RejectNegative Joined Var(rs6000_cpu_index) Init(-1) Enum(rs6000_cpu_opt_value) Save diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 71b2445..f950c3c 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -22614,12 +22614,18 @@ Disable Book-E SPE ABI extensions for the current ABI@. @item -mabi=ibmlongdouble @opindex mabi=ibmlongdouble Change the current ABI to use IBM extended-precision long double. -This is a PowerPC 32-bit SYSV ABI option. +This is not likely to work if your system defaults to using IEEE +extended-precision long double. If you change the long double type +from IEEE extended-precision, the compiler will issue a warning unless +you use the @option{-Wno-psabi} option. @item -mabi=ieeelongdouble @opindex mabi=ieeelongdouble Change the current ABI to use IEEE extended-precision long double. -This is a PowerPC 32-bit Linux ABI option. +This is not likely to work if your system defaults to using IBM +extended-precision long double. If you change the long double type +from IBM extended-precision, the compiler will issue a warning unless +you use the @option{-Wno-psabi} option. @item -mabi=elfv1 @opindex mabi=elfv1 -- cgit v1.1 From 7e23f4a6f80994e305f35f6de6f79966becaa92d Mon Sep 17 00:00:00 2001 From: Olga Makhotina Date: Thu, 26 Oct 2017 18:18:56 +0000 Subject: Adding missing CMP* intrinsics gcc/ * config/i386/avx512fintrin.h (_mm512_cmpeq_pd_mask, _mm512_cmple_pd_mask, _mm512_cmplt_pd_mask, _mm512_cmpneq_pd_mask, _mm512_cmpnle_pd_mask, _mm512_cmpnlt_pd_mask, _mm512_cmpord_pd_mask, _mm512_cmpunord_pd_mask, _mm512_mask_cmpeq_pd_mask, _mm512_mask_cmple_pd_mask, _mm512_mask_cmplt_pd_mask, _mm512_mask_cmpneq_pd_mask, _mm512_mask_cmpnle_pd_mask, _mm512_mask_cmpnlt_pd_mask, _mm512_mask_cmpord_pd_mask, _mm512_mask_cmpunord_pd_mask, _mm512_cmpeq_ps_mask, _mm512_cmple_ps_mask, _mm512_cmplt_ps_mask, _mm512_cmpneq_ps_mask, _mm512_cmpnle_ps_mask, _mm512_cmpnlt_ps_mask, _mm512_cmpord_ps_mask, _mm512_cmpunord_ps_mask, _mm512_mask_cmpeq_ps_mask, _mm512_mask_cmple_ps_mask, _mm512_mask_cmplt_ps_mask, _mm512_mask_cmpneq_ps_mask, _mm512_mask_cmpnle_ps_mask, _mm512_mask_cmpnlt_ps_mask, _mm512_mask_cmpord_ps_mask, _mm512_mask_cmpunord_ps_mask): New intrinsics. gcc/testsuite/ * gcc.target/i386/avx512f-vcmpps-1.c (_mm512_cmpeq_ps_mask, _mm512_cmple_ps_mask, _mm512_cmplt_ps_mask, _mm512_cmpneq_ps_mask, _mm512_cmpnle_ps_mask, _mm512_cmpnlt_ps_mask, _mm512_cmpord_ps_mask, _mm512_cmpunord_ps_mask, _mm512_mask_cmpeq_ps_mask, _mm512_mask_cmple_ps_mask, _mm512_mask_cmplt_ps_mask, _mm512_mask_cmpneq_ps_mask, _mm512_mask_cmpnle_ps_mask, _mm512_mask_cmpnlt_ps_mask, _mm512_mask_cmpord_ps_mask, _mm512_mask_cmpunord_ps_mask): Test new intrinsics. * gcc.target/i386/avx512f-vcmpps-2.c (_mm512_cmpeq_ps_mask, _mm512_cmple_ps_mask, _mm512_cmplt_ps_mask, _mm512_cmpneq_ps_mask, _mm512_cmpnle_ps_mask, _mm512_cmpnlt_ps_mask, _mm512_cmpord_ps_mask, _mm512_cmpunord_ps_mask, _mm512_mask_cmpeq_ps_mask, _mm512_mask_cmple_ps_mask, _mm512_mask_cmplt_ps_mask, _mm512_mask_cmpneq_ps_mask, _mm512_mask_cmpnle_ps_mask, _mm512_mask_cmpnlt_ps_mask, _mm512_mask_cmpord_ps_mask, _mm512_mask_cmpunord_ps_mask): Test new intrinsics. * gcc.target/i386/avx512f-vcmppd-1.c (_mm512_cmpeq_pd_mask, _mm512_cmple_pd_mask, _mm512_cmplt_pd_mask, _mm512_cmpneq_pd_mask, _mm512_cmpnle_pd_mask, _mm512_cmpnlt_pd_mask, _mm512_cmpord_pd_mask, _mm512_cmpunord_pd_mask, _mm512_mask_cmpeq_pd_mask, _mm512_mask_cmple_pd_mask, _mm512_mask_cmplt_pd_mask, _mm512_mask_cmpneq_pd_mask, _mm512_mask_cmpnle_pd_mask, _mm512_mask_cmpnlt_pd_mask, _mm512_mask_cmpord_pd_mask, _mm512_mask_cmpunord_pd_mask): Test new intrinsics. * gcc.target/i386/avx512f-vcmppd-2.c (_mm512_cmpeq_pd_mask, _mm512_cmple_pd_mask, _mm512_cmplt_pd_mask, _mm512_cmpneq_pd_mask, _mm512_cmpnle_pd_mask, _mm512_cmpnlt_pd_mask, _mm512_cmpord_pd_mask, _mm512_cmpunord_pd_mask, _mm512_mask_cmpeq_pd_mask, _mm512_mask_cmple_pd_mask, _mm512_mask_cmplt_pd_mask, _mm512_mask_cmpneq_pd_mask, _mm512_mask_cmpnle_pd_mask, _mm512_mask_cmpnlt_pd_mask, _mm512_mask_cmpord_pd_mask, _mm512_mask_cmpunord_pd_mask): Test new intrinsics. From-SVN: r254118 --- gcc/ChangeLog | 20 ++ gcc/config/i386/avx512fintrin.h | 320 +++++++++++++++++++++++ gcc/testsuite/ChangeLog | 39 +++ gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c | 29 +- gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c | 77 +++--- gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c | 28 +- gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c | 78 +++--- 7 files changed, 520 insertions(+), 71 deletions(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c6e0b85..d5d32b9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2017-10-26 Olga Makhotina + + * config/i386/avx512fintrin.h (_mm512_cmpeq_pd_mask, + _mm512_cmple_pd_mask, _mm512_cmplt_pd_mask, + _mm512_cmpneq_pd_mask, _mm512_cmpnle_pd_mask, + _mm512_cmpnlt_pd_mask, _mm512_cmpord_pd_mask, + _mm512_cmpunord_pd_mask, _mm512_mask_cmpeq_pd_mask, + _mm512_mask_cmple_pd_mask, _mm512_mask_cmplt_pd_mask, + _mm512_mask_cmpneq_pd_mask, _mm512_mask_cmpnle_pd_mask, + _mm512_mask_cmpnlt_pd_mask, _mm512_mask_cmpord_pd_mask, + _mm512_mask_cmpunord_pd_mask, _mm512_cmpeq_ps_mask, + _mm512_cmple_ps_mask, _mm512_cmplt_ps_mask, + _mm512_cmpneq_ps_mask, _mm512_cmpnle_ps_mask, + _mm512_cmpnlt_ps_mask, _mm512_cmpord_ps_mask, + _mm512_cmpunord_ps_mask, _mm512_mask_cmpeq_ps_mask, + _mm512_mask_cmple_ps_mask, _mm512_mask_cmplt_ps_mask, + _mm512_mask_cmpneq_ps_mask, _mm512_mask_cmpnle_ps_mask, + _mm512_mask_cmpnlt_ps_mask, _mm512_mask_cmpord_ps_mask, + _mm512_mask_cmpunord_ps_mask): New intrinsics. + 2017-10-26 Michael Meissner * config/rs6000/aix.h (TARGET_IEEEQUAD_DEFAULT): Set long double diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index 72f57f7..5dc5fae 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -14005,6 +14005,326 @@ _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P) extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_EQ_OQ, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpeq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_EQ_OQ, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmplt_pd_mask (__m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_LT_OS, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmplt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_LT_OS, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmple_pd_mask (__m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_LE_OS, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmple_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_LE_OS, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpunord_pd_mask (__m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_UNORD_Q, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpunord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_UNORD_Q, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpneq_pd_mask (__m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_NEQ_UQ, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpneq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_NEQ_UQ, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpnlt_pd_mask (__m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_NLT_US, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpnlt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_NLT_US, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpnle_pd_mask (__m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_NLE_US, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpnle_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_NLE_US, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpord_pd_mask (__m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_ORD_Q, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_ORD_Q, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpeq_ps_mask (__m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_EQ_OQ, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpeq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_EQ_OQ, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmplt_ps_mask (__m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_LT_OS, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmplt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_LT_OS, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmple_ps_mask (__m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_LE_OS, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmple_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_LE_OS, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpunord_ps_mask (__m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_UNORD_Q, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpunord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_UNORD_Q, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpneq_ps_mask (__m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_NEQ_UQ, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpneq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_NEQ_UQ, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpnlt_ps_mask (__m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_NLT_US, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpnlt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_NLT_US, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpnle_ps_mask (__m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_NLE_US, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpnle_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_NLE_US, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpord_ps_mask (__m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_ORD_Q, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_ORD_Q, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P) { return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 283c787..c035cd7 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,42 @@ +2017-10-26 Olga Makhotina + + * gcc.target/i386/avx512f-vcmpps-1.c (_mm512_cmpeq_ps_mask, + _mm512_cmple_ps_mask, _mm512_cmplt_ps_mask, + _mm512_cmpneq_ps_mask, _mm512_cmpnle_ps_mask, + _mm512_cmpnlt_ps_mask, _mm512_cmpord_ps_mask, + _mm512_cmpunord_ps_mask, _mm512_mask_cmpeq_ps_mask, + _mm512_mask_cmple_ps_mask, _mm512_mask_cmplt_ps_mask, + _mm512_mask_cmpneq_ps_mask, _mm512_mask_cmpnle_ps_mask, + _mm512_mask_cmpnlt_ps_mask, _mm512_mask_cmpord_ps_mask, + _mm512_mask_cmpunord_ps_mask): Test new intrinsics. + * gcc.target/i386/avx512f-vcmpps-2.c (_mm512_cmpeq_ps_mask, + _mm512_cmple_ps_mask, _mm512_cmplt_ps_mask, + _mm512_cmpneq_ps_mask, _mm512_cmpnle_ps_mask, + _mm512_cmpnlt_ps_mask, _mm512_cmpord_ps_mask, + _mm512_cmpunord_ps_mask, _mm512_mask_cmpeq_ps_mask, + _mm512_mask_cmple_ps_mask, _mm512_mask_cmplt_ps_mask, + _mm512_mask_cmpneq_ps_mask, _mm512_mask_cmpnle_ps_mask, + _mm512_mask_cmpnlt_ps_mask, _mm512_mask_cmpord_ps_mask, + _mm512_mask_cmpunord_ps_mask): Test new intrinsics. + * gcc.target/i386/avx512f-vcmppd-1.c (_mm512_cmpeq_pd_mask, + _mm512_cmple_pd_mask, _mm512_cmplt_pd_mask, + _mm512_cmpneq_pd_mask, _mm512_cmpnle_pd_mask, + _mm512_cmpnlt_pd_mask, _mm512_cmpord_pd_mask, + _mm512_cmpunord_pd_mask, _mm512_mask_cmpeq_pd_mask, + _mm512_mask_cmple_pd_mask, _mm512_mask_cmplt_pd_mask, + _mm512_mask_cmpneq_pd_mask, _mm512_mask_cmpnle_pd_mask, + _mm512_mask_cmpnlt_pd_mask, _mm512_mask_cmpord_pd_mask, + _mm512_mask_cmpunord_pd_mask): Test new intrinsics. + * gcc.target/i386/avx512f-vcmppd-2.c (_mm512_cmpeq_pd_mask, + _mm512_cmple_pd_mask, _mm512_cmplt_pd_mask, + _mm512_cmpneq_pd_mask, _mm512_cmpnle_pd_mask, + _mm512_cmpnlt_pd_mask, _mm512_cmpord_pd_mask, + _mm512_cmpunord_pd_mask, _mm512_mask_cmpeq_pd_mask, + _mm512_mask_cmple_pd_mask, _mm512_mask_cmplt_pd_mask, + _mm512_mask_cmpneq_pd_mask, _mm512_mask_cmpnle_pd_mask, + _mm512_mask_cmpnlt_pd_mask, _mm512_mask_cmpord_pd_mask, + _mm512_mask_cmpunord_pd_mask): Test new intrinsics. + 2017-10-26 Wilco Dijkstra * gcc.target/aarch64/ldp_stp_unaligned_2.c: New file. diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c index 4b53e37..d3c30fc 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2 -mavx512f" } */ -/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 9 } } */ +/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 9 } } */ /* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ @@ -17,4 +17,29 @@ avx512f_test (void) m = _mm512_mask_cmp_pd_mask (m, x, x, _CMP_FALSE_OQ); m = _mm512_cmp_round_pd_mask (x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC); m = _mm512_mask_cmp_round_pd_mask (m, x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC); + + m = _mm512_cmpeq_pd_mask (x, x); + m = _mm512_mask_cmpeq_pd_mask (m, x, x); + + m = _mm512_cmplt_pd_mask (x, x); + m = _mm512_mask_cmplt_pd_mask (m, x, x); + + m = _mm512_cmple_pd_mask (x, x); + m = _mm512_mask_cmple_pd_mask (m, x, x); + + m = _mm512_cmpunord_pd_mask (x, x); + m = _mm512_mask_cmpunord_pd_mask (m, x, x); + + m = _mm512_cmpneq_pd_mask (x, x); + m = _mm512_mask_cmpneq_pd_mask (m, x, x); + + m = _mm512_cmpnlt_pd_mask (x, x); + m = _mm512_mask_cmpnlt_pd_mask (m, x, x); + + m = _mm512_cmpnle_pd_mask (x, x); + m = _mm512_mask_cmpnle_pd_mask (m, x, x); + + m = _mm512_cmpord_pd_mask (x, x); + m = _mm512_mask_cmpord_pd_mask (m, x, x); } + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c index 52e226d..cee1197 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c @@ -11,58 +11,69 @@ #define SIZE (AVX512F_LEN / 64) #include "avx512f-mask-type.h" +#undef SUF +#undef SSIZE +#undef GEN_CMP +#undef CHECK_CMP + #if AVX512F_LEN == 512 -#define CMP(imm, rel) \ - dst_ref = 0; \ - for (i = 0; i < 8; i++) \ - { \ - dst_ref = (((int) rel) << i) | dst_ref; \ - } \ - source1.x = _mm512_loadu_pd(s1); \ - source2.x = _mm512_loadu_pd(s2); \ - dst1 = _mm512_cmp_pd_mask(source1.x, source2.x, imm);\ - dst2 = _mm512_mask_cmp_pd_mask(mask, source1.x, source2.x, imm);\ - if (dst_ref != dst1) abort(); \ - if ((dst_ref & mask) != dst2) abort(); +#define SUF(fun) _mm512##fun +#define SSIZE 8 + +#define GEN_CMP(type) \ + { \ + dst3 = _mm512_cmp##type##_pd_mask(source1.x, source2.x);\ + dst4 = _mm512_mask_cmp##type##_pd_mask(mask, source1.x, source2.x);\ + if (dst3 != dst1) abort(); \ + if (dst4 != dst2) abort(); \ + } + +#define CHECK_CMP(imm) \ + if (imm == _CMP_EQ_OQ) GEN_CMP(eq) \ + if (imm == _CMP_LT_OS) GEN_CMP(lt) \ + if (imm == _CMP_LE_OS) GEN_CMP(le) \ + if (imm == _CMP_UNORD_Q) GEN_CMP(unord) \ + if (imm == _CMP_NEQ_UQ) GEN_CMP(neq) \ + if (imm == _CMP_NLT_US) GEN_CMP(nlt) \ + if (imm == _CMP_NLE_US) GEN_CMP(nle) \ + if (imm == _CMP_ORD_Q) GEN_CMP(ord) + #endif #if AVX512F_LEN == 256 -#undef CMP -#define CMP(imm, rel) \ - dst_ref = 0; \ - for (i = 0; i < 4; i++) \ - { \ - dst_ref = (((int) rel) << i) | dst_ref; \ - } \ - source1.x = _mm256_loadu_pd(s1); \ - source2.x = _mm256_loadu_pd(s2); \ - dst1 = _mm256_cmp_pd_mask(source1.x, source2.x, imm);\ - dst2 = _mm256_mask_cmp_pd_mask(mask, source1.x, source2.x, imm);\ - if (dst_ref != dst1) abort(); \ - if ((dst_ref & mask) != dst2) abort(); +#define SUF(fun) _mm256##fun +#define SSIZE 4 +#define GEN_CMP(type) +#define CHECK_CMP(imm) #endif #if AVX512F_LEN == 128 +#define SUF(fun) _mm##fun +#define SSIZE 2 +#define GEN_CMP(type) +#define CHECK_CMP(imm) +#endif + #undef CMP #define CMP(imm, rel) \ dst_ref = 0; \ - for (i = 0; i < 2; i++) \ + for (i = 0; i < SSIZE; i++) \ { \ dst_ref = (((int) rel) << i) | dst_ref; \ } \ - source1.x = _mm_loadu_pd(s1); \ - source2.x = _mm_loadu_pd(s2); \ - dst1 = _mm_cmp_pd_mask(source1.x, source2.x, imm);\ - dst2 = _mm_mask_cmp_pd_mask(mask, source1.x, source2.x, imm);\ + source1.x = SUF(_loadu_pd)(s1); \ + source2.x = SUF(_loadu_pd)(s2); \ + dst1 = SUF(_cmp_pd_mask)(source1.x, source2.x, imm);\ + dst2 = SUF(_mask_cmp_pd_mask)(mask, source1.x, source2.x, imm);\ if (dst_ref != dst1) abort(); \ - if ((dst_ref & mask) != dst2) abort(); -#endif + if ((dst_ref & mask) != dst2) abort(); \ + CHECK_CMP(imm) void TEST () { UNION_TYPE (AVX512F_LEN, d) source1, source2; - MASK_TYPE dst1, dst2, dst_ref; + MASK_TYPE dst1, dst2, dst3, dst4, dst_ref; MASK_TYPE mask = MASK_VALUE; int i; double s1[8]={2134.3343, 6678.346, 453.345635, 54646.464, diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c index 9812915..27be360 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2 -mavx512f" } */ -/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 9 } } */ +/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 9 } } */ /* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ @@ -17,4 +17,28 @@ avx512f_test (void) m = _mm512_mask_cmp_ps_mask (m, x, x, _CMP_FALSE_OQ); m = _mm512_cmp_round_ps_mask (x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC); m = _mm512_mask_cmp_round_ps_mask (m, x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC); + + m = _mm512_cmpeq_ps_mask (x, x); + m = _mm512_mask_cmpeq_ps_mask (m, x, x); + + m = _mm512_cmplt_ps_mask (x, x); + m = _mm512_mask_cmplt_ps_mask (m, x, x); + + m = _mm512_cmple_ps_mask (x, x); + m = _mm512_mask_cmple_ps_mask (m, x, x); + + m = _mm512_cmpunord_ps_mask (x, x); + m = _mm512_mask_cmpunord_ps_mask (m, x, x); + + m = _mm512_cmpneq_ps_mask (x, x); + m = _mm512_mask_cmpneq_ps_mask (m, x, x); + + m = _mm512_cmpnlt_ps_mask (x, x); + m = _mm512_mask_cmpnlt_ps_mask (m, x, x); + + m = _mm512_cmpnle_ps_mask (x, x); + m = _mm512_mask_cmpnle_ps_mask (m, x, x); + + m = _mm512_cmpord_ps_mask (x, x); + m = _mm512_mask_cmpord_ps_mask (m, x, x); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c index 2ffa2ed..22e368f 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c @@ -11,59 +11,69 @@ #define SIZE (AVX512F_LEN / 32) #include "avx512f-mask-type.h" +#undef SUF +#undef SSIZE +#undef GEN_CMP +#undef CHECK_CMP + #if AVX512F_LEN == 512 -#undef CMP -#define CMP(imm, rel) \ - dst_ref = 0; \ - for (i = 0; i < 16; i++) \ - { \ - dst_ref = (((int) rel) << i) | dst_ref; \ - } \ - source1.x = _mm512_loadu_ps(s1); \ - source2.x = _mm512_loadu_ps(s2); \ - dst1 = _mm512_cmp_ps_mask(source1.x, source2.x, imm);\ - dst2 = _mm512_mask_cmp_ps_mask(mask, source1.x, source2.x, imm);\ - if (dst_ref != dst1) abort(); \ - if ((dst_ref & mask) != dst2) abort(); +#define SUF(fun) _mm512##fun +#define SSIZE 16 + +#define GEN_CMP(type) \ + { \ + dst3 = _mm512_cmp##type##_ps_mask(source1.x, source2.x);\ + dst4 = _mm512_mask_cmp##type##_ps_mask(mask, source1.x, source2.x);\ + if (dst3 != dst1) abort(); \ + if (dst4 != dst2) abort(); \ + } + +#define CHECK_CMP(imm) \ + if (imm == _CMP_EQ_OQ) GEN_CMP(eq) \ + if (imm == _CMP_LT_OS) GEN_CMP(lt) \ + if (imm == _CMP_LE_OS) GEN_CMP(le) \ + if (imm == _CMP_UNORD_Q) GEN_CMP(unord) \ + if (imm == _CMP_NEQ_UQ) GEN_CMP(neq) \ + if (imm == _CMP_NLT_US) GEN_CMP(nlt) \ + if (imm == _CMP_NLE_US) GEN_CMP(nle) \ + if (imm == _CMP_ORD_Q) GEN_CMP(ord) + #endif #if AVX512F_LEN == 256 -#undef CMP -#define CMP(imm, rel) \ - dst_ref = 0; \ - for (i = 0; i < 8; i++) \ - { \ - dst_ref = (((int) rel) << i) | dst_ref; \ - } \ - source1.x = _mm256_loadu_ps(s1); \ - source2.x = _mm256_loadu_ps(s2); \ - dst1 = _mm256_cmp_ps_mask(source1.x, source2.x, imm);\ - dst2 = _mm256_mask_cmp_ps_mask(mask, source1.x, source2.x, imm);\ - if (dst_ref != dst1) abort(); \ - if ((dst_ref & mask) != dst2) abort(); +#define SUF(fun) _mm256##fun +#define SSIZE 8 +#define GEN_CMP(type) +#define CHECK_CMP(imm) #endif #if AVX512F_LEN == 128 +#define SUF(fun) _mm##fun +#define SSIZE 4 +#define GEN_CMP(type) +#define CHECK_CMP(imm) +#endif + #undef CMP #define CMP(imm, rel) \ dst_ref = 0; \ - for (i = 0; i < 4; i++) \ + for (i = 0; i < SSIZE; i++) \ { \ dst_ref = (((int) rel) << i) | dst_ref; \ } \ - source1.x = _mm_loadu_ps(s1); \ - source2.x = _mm_loadu_ps(s2); \ - dst1 = _mm_cmp_ps_mask(source1.x, source2.x, imm);\ - dst2 = _mm_mask_cmp_ps_mask(mask, source1.x, source2.x, imm);\ + source1.x = SUF(_loadu_ps)(s1); \ + source2.x = SUF(_loadu_ps)(s2); \ + dst1 = SUF(_cmp_ps_mask)(source1.x, source2.x, imm);\ + dst2 = SUF(_mask_cmp_ps_mask)(mask, source1.x, source2.x, imm);\ if (dst_ref != dst1) abort(); \ - if ((dst_ref & mask) != dst2) abort(); -#endif + if ((dst_ref & mask) != dst2) abort(); \ + CHECK_CMP(imm) void TEST () { UNION_TYPE (AVX512F_LEN,) source1, source2; - MASK_TYPE dst1, dst2, dst_ref; + MASK_TYPE dst1, dst2, dst3, dst4, dst_ref; MASK_TYPE mask = MASK_VALUE; int i; float s1[16] = {2134.3343, 6678.346, 453.345635, 54646.464, -- cgit v1.1 From 8a866b8296874b5f6f3371bb368c8191c828d98f Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Thu, 26 Oct 2017 20:09:24 +0000 Subject: Fix unsharing of GIMPLE_OMP_{SINGLE,TARGET,TEAMS} in gimple_copy 2017-10-26 Tom de Vries PR tree-optimization/82707 * gimple.c (gimple_copy): Fix unsharing of GIMPLE_OMP_{SINGLE,TARGET,TEAMS}. From-SVN: r254120 --- gcc/ChangeLog | 6 ++++++ gcc/gimple.c | 26 +++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d5d32b9..791f3c1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2017-10-26 Tom de Vries + + PR tree-optimization/82707 + * gimple.c (gimple_copy): Fix unsharing of + GIMPLE_OMP_{SINGLE,TARGET,TEAMS}. + 2017-10-26 Olga Makhotina * config/i386/avx512fintrin.h (_mm512_cmpeq_pd_mask, diff --git a/gcc/gimple.c b/gcc/gimple.c index 1f291e1..37f2248 100644 --- a/gcc/gimple.c +++ b/gcc/gimple.c @@ -1840,11 +1840,35 @@ gimple_copy (gimple *stmt) gimple_omp_sections_set_clauses (copy, t); t = unshare_expr (gimple_omp_sections_control (stmt)); gimple_omp_sections_set_control (copy, t); - /* FALLTHRU */ + goto copy_omp_body; case GIMPLE_OMP_SINGLE: + { + gomp_single *omp_single_copy = as_a (copy); + t = unshare_expr (gimple_omp_single_clauses (stmt)); + gimple_omp_single_set_clauses (omp_single_copy, t); + } + goto copy_omp_body; + case GIMPLE_OMP_TARGET: + { + gomp_target *omp_target_stmt = as_a (stmt); + gomp_target *omp_target_copy = as_a (copy); + t = unshare_expr (gimple_omp_target_clauses (omp_target_stmt)); + gimple_omp_target_set_clauses (omp_target_copy, t); + t = unshare_expr (gimple_omp_target_data_arg (omp_target_stmt)); + gimple_omp_target_set_data_arg (omp_target_copy, t); + } + goto copy_omp_body; + case GIMPLE_OMP_TEAMS: + { + gomp_teams *omp_teams_copy = as_a (copy); + t = unshare_expr (gimple_omp_teams_clauses (stmt)); + gimple_omp_teams_set_clauses (omp_teams_copy, t); + } + /* FALLTHRU */ + case GIMPLE_OMP_SECTION: case GIMPLE_OMP_MASTER: case GIMPLE_OMP_TASKGROUP: -- cgit v1.1 From 1cf6c17b6dbd361ed7c74414cc753e4037d0d0e8 Mon Sep 17 00:00:00 2001 From: James E Wilson Date: Thu, 26 Oct 2017 13:41:20 -0700 Subject: Add some usage info -fdebug-prefix-map= docs. gcc/ * doc/invoke.texi (-fdebug-prefix-map): Expand documentation. From-SVN: r254121 --- gcc/ChangeLog | 4 ++++ gcc/doc/invoke.texi | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 791f3c1..a2bc674 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,7 @@ +2017-10-26 Jim Wilson + + * doc/invoke.texi (-fdebug-prefix-map): Expand documentation. + 2017-10-26 Tom de Vries PR tree-optimization/82707 diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index f950c3c..0d2121b 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -6982,7 +6982,12 @@ link processing time. Merging is enabled by default. @item -fdebug-prefix-map=@var{old}=@var{new} @opindex fdebug-prefix-map When compiling files in directory @file{@var{old}}, record debugging -information describing them as in @file{@var{new}} instead. +information describing them as in @file{@var{new}} instead. This can be +used to replace a build time path with an install time path in the debug info. +It can also be used to change an absolute path to a relative path by using +@file{.} for @var{new}. This can give more reproducible builds, which are +location independent, but may require an extra command to tell GDB where to +find the source files. @item -fvar-tracking @opindex fvar-tracking -- cgit v1.1 From 31498bee1acc00d89ea82c9c8614aa65f1eff2e8 Mon Sep 17 00:00:00 2001 From: Jim Wilson Date: Thu, 26 Oct 2017 13:44:58 -0700 Subject: Fix hyphenation build-time path and install-time path. gcc/ * doc/invoke.texi (-fdebug-prefix-map): Expand documentation. From-SVN: r254122 --- gcc/doc/invoke.texi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 0d2121b..efe8f86 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -6983,7 +6983,7 @@ link processing time. Merging is enabled by default. @opindex fdebug-prefix-map When compiling files in directory @file{@var{old}}, record debugging information describing them as in @file{@var{new}} instead. This can be -used to replace a build time path with an install time path in the debug info. +used to replace a build-time path with an install-time path in the debug info. It can also be used to change an absolute path to a relative path by using @file{.} for @var{new}. This can give more reproducible builds, which are location independent, but may require an extra command to tell GDB where to -- cgit v1.1 From de10fca02a806c8c323041c5e904abaaef510fc0 Mon Sep 17 00:00:00 2001 From: Sandra Loosemore Date: Thu, 26 Oct 2017 16:49:48 -0400 Subject: nios2.c: Include xregex.h. 2017-10-26 Sandra Loosemore gcc/ * config/nios2/nios2.c: Include xregex.h. (nios2_gprel_sec_regex): New. (nios2_option_overide): Initialize it. Don't allow GP-relative addressing with PIC. (nios2_small_section_name_p): Check for regex match. * config/nios2/nios2.opt (mgprel-sec=): New option. * doc/invoke.texi (Option Summary): Add -mgprel-sec. (Nios II Options): Document -mgprel-sec. gcc/testsuite/ * gcc.target/nios2/gpopt-gprel-sec.c: New. From-SVN: r254123 --- gcc/ChangeLog | 11 +++++++ gcc/config/nios2/nios2.c | 25 +++++++++++++++- gcc/config/nios2/nios2.opt | 4 +++ gcc/doc/invoke.texi | 13 ++++++++ gcc/testsuite/ChangeLog | 4 +++ gcc/testsuite/gcc.target/nios2/gpopt-gprel-sec.c | 38 ++++++++++++++++++++++++ 6 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/nios2/gpopt-gprel-sec.c (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a2bc674..7ee9da2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2017-10-26 Sandra Loosemore + + * config/nios2/nios2.c: Include xregex.h. + (nios2_gprel_sec_regex): New. + (nios2_option_overide): Initialize it. Don't allow GP-relative + addressing with PIC. + (nios2_small_section_name_p): Check for regex match. + * config/nios2/nios2.opt (mgprel-sec=): New option. + * doc/invoke.texi (Option Summary): Add -mgprel-sec. + (Nios II Options): Document -mgprel-sec. + 2017-10-26 Jim Wilson * doc/invoke.texi (-fdebug-prefix-map): Expand documentation. diff --git a/gcc/config/nios2/nios2.c b/gcc/config/nios2/nios2.c index f5963d4..3aade7b 100644 --- a/gcc/config/nios2/nios2.c +++ b/gcc/config/nios2/nios2.c @@ -49,6 +49,7 @@ #include "stor-layout.h" #include "builtins.h" #include "tree-pass.h" +#include "xregex.h" /* This file should be included last. */ #include "target-def.h" @@ -103,6 +104,9 @@ static int custom_code_index[256]; /* Set to true if any conflicts (re-use of a code between 0-255) are found. */ static bool custom_code_conflict = false; +/* State for command-line options. */ +regex_t nios2_gprel_sec_regex; + /* Definition of builtin function types for nios2. */ @@ -1371,6 +1375,23 @@ nios2_option_override (void) nios2_gpopt_option = gpopt_local; } + /* GP-relative addressing doesn't make sense for PIC. */ + if (flag_pic) + { + if (nios2_gpopt_option != gpopt_none) + error ("-mgpopt not supported with PIC."); + if (nios2_gprel_sec) + error ("-mgprel-sec= not supported with PIC."); + } + + /* Process -mgprel-sec=. */ + if (nios2_gprel_sec) + { + if (regcomp (&nios2_gprel_sec_regex, nios2_gprel_sec, + REG_EXTENDED | REG_NOSUB)) + error ("-mgprel-sec= argument is not a valid regular expression."); + } + /* If we don't have mul, we don't have mulx either! */ if (!TARGET_HAS_MUL && TARGET_HAS_MULX) target_flags &= ~MASK_HAS_MULX; @@ -2268,7 +2289,9 @@ nios2_small_section_name_p (const char *section) return (strcmp (section, ".sbss") == 0 || strncmp (section, ".sbss.", 6) == 0 || strcmp (section, ".sdata") == 0 - || strncmp (section, ".sdata.", 7) == 0); + || strncmp (section, ".sdata.", 7) == 0 + || (nios2_gprel_sec + && regexec (&nios2_gprel_sec_regex, section, 0, NULL, 0) == 0)); } /* Return true if EXP should be placed in the small data section. */ diff --git a/gcc/config/nios2/nios2.opt b/gcc/config/nios2/nios2.opt index 08cb935..d08405e 100644 --- a/gcc/config/nios2/nios2.opt +++ b/gcc/config/nios2/nios2.opt @@ -586,3 +586,7 @@ Enable generation of R2 BMX instructions. mcdx Target Report Mask(HAS_CDX) Enable generation of R2 CDX instructions. + +mgprel-sec= +Target RejectNegative Joined Var(nios2_gprel_sec) Init(NULL) +Regular expression matching additional GP-addressible small-data section names. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index efe8f86..d2001ca 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -948,6 +948,7 @@ Objective-C and Objective-C++ Dialects}. @emph{Nios II Options} @gccoptlist{-G @var{num} -mgpopt=@var{option} -mgpopt -mno-gpopt @gol +-mgprel-sec=@var{regexp} @gol -mel -meb @gol -mno-bypass-cache -mbypass-cache @gol -mno-cache-volatile -mcache-volatile @gol @@ -21171,6 +21172,18 @@ GOT data sections. In this case, the 16-bit offset for GP-relative addressing may not be large enough to allow access to the entire small data section. +@item -mgprel-sec=@var{regexp} +@opindex mgprel-sec +This option specifies additional section names that can be accessed via +GP-relative addressing. It is most useful in conjunction with +@code{section} attributes on variable declarations +(@pxref{Common Variable Attributes}) and a custom linker script. +The @var{regexp} is a POSIX Extended Regular Expression. + +This option does not affect the behavior of the @option{-G} option, and +and the specified sections are in addition to the standard @code{.sdata} +and @code{.sbss} small-data sections that are recognized by @option{-mgpopt}. + @item -mel @itemx -meb @opindex mel diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c035cd7..f8d8249 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2017-10-26 Sandra Loosemore + + * gcc.target/nios2/gpopt-gprel-sec.c: New. + 2017-10-26 Olga Makhotina * gcc.target/i386/avx512f-vcmpps-1.c (_mm512_cmpeq_ps_mask, diff --git a/gcc/testsuite/gcc.target/nios2/gpopt-gprel-sec.c b/gcc/testsuite/gcc.target/nios2/gpopt-gprel-sec.c new file mode 100644 index 0000000..1083fe6 --- /dev/null +++ b/gcc/testsuite/gcc.target/nios2/gpopt-gprel-sec.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mgpopt=local -mgprel-sec=\\.frog.+" } */ + +extern int a __attribute__ ((section (".frog1"))); +static volatile int b __attribute__ ((section (".frog2"))) = 1; +extern int c __attribute__ ((section (".data"))); +static volatile int d __attribute__ ((section (".data"))) = 2; + +extern int e; +static volatile int f = 3; + +volatile int g __attribute__ ((weak)) = 4; + +extern int h[100]; +static int i[100]; +static int j[100] __attribute__ ((section (".sdata"))); + +typedef int (*ftype) (int); +extern int foo (int); + +extern int bar (int, int*, int*, int*, ftype); + +int baz (void) +{ + return bar (a + b + c + d + e + f + g, h, i, j, foo); +} + +/* { dg-final { scan-assembler "%gprel\\(a\\)" } } */ +/* { dg-final { scan-assembler "%gprel\\(b\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(c\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(d\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(e\\)" } } */ +/* { dg-final { scan-assembler "%gprel\\(f\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(g\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(h\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(i\\)" } } */ +/* { dg-final { scan-assembler "%gprel\\(j\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(foo\\)" } } */ -- cgit v1.1 From 1cef1159dbf7af419d9cd2c75fadcd1eb2fa0ab3 Mon Sep 17 00:00:00 2001 From: Sandra Loosemore Date: Thu, 26 Oct 2017 16:52:15 -0400 Subject: constraints.md ("S"): Match r0rel_constant_p too. 2017-10-26 Sandra Loosemore gcc/ * config/nios2/constraints.md ("S"): Match r0rel_constant_p too. * config/nios2/nios2-protos.h (r0rel_constant_p): Declare. * config/nios2/nios2.c: (nios2_r0rel_sec_regex): New. (nios2_option_overide): Initialize it. Don't allow R0-relative addressing with PIC. (nios2_rtx_costs): Handle r0rel_constant_p like gprel_constant_p. (nios2_symbolic_constant_p): Likewise. (nios2_legitimate_address_p): Likewise. (nios2_r0rel_section_name_p): New. (nios2_symbol_ref_in_r0rel_data_p): New. (nios2_emit_move_sequence): Handle r0rel_constant_p. (r0rel_constant_p): New. (nios2_print_operand_address): Handle r0rel_constant_p. (nios2_cdx_narrow_form_p): Likewise. * config/nios2/nios2.opt (mr0rel-sec=): New option. * doc/invoke.texi (Option Summary): Add -mr0rel-sec. (Nios II Options): Document -mr0rel-sec. gcc/testsuite/ * gcc.target/nios2/gpopt-r0rel-sec.c: New. From-SVN: r254124 --- gcc/ChangeLog | 20 +++++ gcc/config/nios2/constraints.md | 4 +- gcc/config/nios2/nios2-protos.h | 1 + gcc/config/nios2/nios2.c | 94 ++++++++++++++++++++---- gcc/config/nios2/nios2.opt | 6 +- gcc/doc/invoke.texi | 16 +++- gcc/testsuite/ChangeLog | 4 + gcc/testsuite/gcc.target/nios2/gpopt-r0rel-sec.c | 38 ++++++++++ 8 files changed, 165 insertions(+), 18 deletions(-) create mode 100644 gcc/testsuite/gcc.target/nios2/gpopt-r0rel-sec.c (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7ee9da2..558ec9b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,25 @@ 2017-10-26 Sandra Loosemore + * config/nios2/constraints.md ("S"): Match r0rel_constant_p too. + * config/nios2/nios2-protos.h (r0rel_constant_p): Declare. + * config/nios2/nios2.c: (nios2_r0rel_sec_regex): New. + (nios2_option_overide): Initialize it. Don't allow R0-relative + addressing with PIC. + (nios2_rtx_costs): Handle r0rel_constant_p like gprel_constant_p. + (nios2_symbolic_constant_p): Likewise. + (nios2_legitimate_address_p): Likewise. + (nios2_r0rel_section_name_p): New. + (nios2_symbol_ref_in_r0rel_data_p): New. + (nios2_emit_move_sequence): Handle r0rel_constant_p. + (r0rel_constant_p): New. + (nios2_print_operand_address): Handle r0rel_constant_p. + (nios2_cdx_narrow_form_p): Likewise. + * config/nios2/nios2.opt (mr0rel-sec=): New option. + * doc/invoke.texi (Option Summary): Add -mr0rel-sec. + (Nios II Options): Document -mr0rel-sec. + +2017-10-26 Sandra Loosemore + * config/nios2/nios2.c: Include xregex.h. (nios2_gprel_sec_regex): New. (nios2_option_overide): Initialize it. Don't allow GP-relative diff --git a/gcc/config/nios2/constraints.md b/gcc/config/nios2/constraints.md index c6c53926..51f71cf 100644 --- a/gcc/config/nios2/constraints.md +++ b/gcc/config/nios2/constraints.md @@ -95,8 +95,8 @@ (match_test "TARGET_ARCH_R2 && ANDCLEAR_INT (ival)"))) (define_constraint "S" - "An immediate stored in small data, accessible by GP." - (match_test "gprel_constant_p (op)")) + "An immediate stored in small data, accessible by GP, or by offset from r0." + (match_test "gprel_constant_p (op) || r0rel_constant_p (op)")) (define_constraint "T" "A constant unspec offset representing a relocation." diff --git a/gcc/config/nios2/nios2-protos.h b/gcc/config/nios2/nios2-protos.h index 6df65bb..84d450b 100644 --- a/gcc/config/nios2/nios2-protos.h +++ b/gcc/config/nios2/nios2-protos.h @@ -52,6 +52,7 @@ extern const char * nios2_add_insn_asm (rtx_insn *, rtx *); extern bool nios2_legitimate_pic_operand_p (rtx); extern bool gprel_constant_p (rtx); +extern bool r0rel_constant_p (rtx); extern bool nios2_regno_ok_for_base_p (int, bool); extern bool nios2_unspec_reloc_p (rtx); diff --git a/gcc/config/nios2/nios2.c b/gcc/config/nios2/nios2.c index 3aade7b..cdd5e9a 100644 --- a/gcc/config/nios2/nios2.c +++ b/gcc/config/nios2/nios2.c @@ -106,6 +106,7 @@ static bool custom_code_conflict = false; /* State for command-line options. */ regex_t nios2_gprel_sec_regex; +regex_t nios2_r0rel_sec_regex; /* Definition of builtin function types for nios2. */ @@ -1375,22 +1376,30 @@ nios2_option_override (void) nios2_gpopt_option = gpopt_local; } - /* GP-relative addressing doesn't make sense for PIC. */ + /* GP-relative and r0-relative addressing don't make sense for PIC. */ if (flag_pic) - { + { if (nios2_gpopt_option != gpopt_none) - error ("-mgpopt not supported with PIC."); + error ("-mgpopt not supported with PIC."); if (nios2_gprel_sec) - error ("-mgprel-sec= not supported with PIC."); + error ("-mgprel-sec= not supported with PIC."); + if (nios2_r0rel_sec) + error ("-mr0rel-sec= not supported with PIC."); } - /* Process -mgprel-sec=. */ + /* Process -mgprel-sec= and -m0rel-sec=. */ if (nios2_gprel_sec) { if (regcomp (&nios2_gprel_sec_regex, nios2_gprel_sec, REG_EXTENDED | REG_NOSUB)) error ("-mgprel-sec= argument is not a valid regular expression."); } + if (nios2_r0rel_sec) + { + if (regcomp (&nios2_r0rel_sec_regex, nios2_r0rel_sec, + REG_EXTENDED | REG_NOSUB)) + error ("-mr0rel-sec= argument is not a valid regular expression."); + } /* If we don't have mul, we don't have mulx either! */ if (!TARGET_HAS_MUL && TARGET_HAS_MULX) @@ -1478,7 +1487,7 @@ nios2_rtx_costs (rtx x, machine_mode mode, case SYMBOL_REF: case CONST: case CONST_DOUBLE: - if (gprel_constant_p (x)) + if (gprel_constant_p (x) || r0rel_constant_p (x)) { *total = COSTS_N_INSNS (1); return true; @@ -2028,6 +2037,7 @@ nios2_symbolic_constant_p (rtx x) return (SYMBOL_REF_P (base) && !SYMBOL_REF_TLS_MODEL (base) && !gprel_constant_p (base) + && !r0rel_constant_p (base) && SMALL_INT (INTVAL (offset))); } return false; @@ -2129,7 +2139,7 @@ nios2_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED, /* Else, fall through. */ case CONST: - if (gprel_constant_p (operand)) + if (gprel_constant_p (operand) || r0rel_constant_p (operand)) return true; /* Else, fall through. */ @@ -2294,6 +2304,14 @@ nios2_small_section_name_p (const char *section) && regexec (&nios2_gprel_sec_regex, section, 0, NULL, 0) == 0)); } +/* Return true if SECTION is a r0-relative section name. */ +static bool +nios2_r0rel_section_name_p (const char *section) +{ + return (nios2_r0rel_sec + && regexec (&nios2_r0rel_sec_regex, section, 0, NULL, 0) == 0); +} + /* Return true if EXP should be placed in the small data section. */ static bool nios2_in_small_data_p (const_tree exp) @@ -2400,6 +2418,33 @@ nios2_symbol_ref_in_small_data_p (rtx sym) } } +/* Likewise for r0-relative addressing. */ +static bool +nios2_symbol_ref_in_r0rel_data_p (rtx sym) +{ + tree decl; + + gcc_assert (GET_CODE (sym) == SYMBOL_REF); + decl = SYMBOL_REF_DECL (sym); + + /* TLS variables are not accessed through r0. */ + if (SYMBOL_REF_TLS_MODEL (sym) != 0) + return false; + + /* On Nios II R2, there is no r0-relative relocation that can be + used with "io" instructions. So, if we are implicitly generating + those instructions, we cannot emit r0-relative accesses. */ + if (TARGET_ARCH_R2 + && (TARGET_BYPASS_CACHE || TARGET_BYPASS_CACHE_VOLATILE)) + return false; + + /* If the user has explicitly placed the symbol in a r0rel section + via an attribute, generate r0-relative addressing. */ + if (decl && DECL_SECTION_NAME (decl)) + return nios2_r0rel_section_name_p (DECL_SECTION_NAME (decl)); + return false; +} + /* Implement TARGET_SECTION_TYPE_FLAGS. */ static unsigned int @@ -2633,8 +2678,9 @@ nios2_emit_move_sequence (rtx *operands, machine_mode mode) return true; } } - else if (gprel_constant_p (from)) - /* Handled directly by movsi_internal as gp + offset. */ + else if (gprel_constant_p (from) || r0rel_constant_p (from)) + /* Handled directly by movsi_internal as gp + offset + or r0 + offset. */ ; else if (nios2_large_constant_p (from)) /* This case covers either a regular symbol reference or an UNSPEC @@ -2984,6 +3030,20 @@ gprel_constant_p (rtx op) return false; } +/* Likewise if this is a zero-relative accessible reference. */ +bool +r0rel_constant_p (rtx op) +{ + if (GET_CODE (op) == SYMBOL_REF + && nios2_symbol_ref_in_r0rel_data_p (op)) + return true; + else if (GET_CODE (op) == CONST + && GET_CODE (XEXP (op, 0)) == PLUS) + return r0rel_constant_p (XEXP (XEXP (op, 0), 0)); + + return false; +} + /* Return the name string for a supported unspec reloc offset. */ static const char * nios2_unspec_reloc_name (int unspec) @@ -3048,7 +3108,13 @@ nios2_print_operand_address (FILE *file, machine_mode mode, rtx op) fprintf (file, ")(%s)", reg_names[GP_REGNO]); return; } - + else if (r0rel_constant_p (op)) + { + fprintf (file, "%%lo("); + output_addr_const (file, op); + fprintf (file, ")(r0)"); + return; + } break; case PLUS: @@ -4654,8 +4720,8 @@ nios2_cdx_narrow_form_p (rtx_insn *insn) || TARGET_BYPASS_CACHE) return false; addr = XEXP (mem, 0); - /* GP-based references are never narrow. */ - if (gprel_constant_p (addr)) + /* GP-based and R0-based references are never narrow. */ + if (gprel_constant_p (addr) || r0rel_constant_p (addr)) return false; /* %lo requires a 16-bit relocation and is never narrow. */ if (GET_CODE (addr) == LO_SUM) @@ -4701,8 +4767,8 @@ nios2_cdx_narrow_form_p (rtx_insn *insn) || TARGET_BYPASS_CACHE) return false; addr = XEXP (mem, 0); - /* GP-based references are never narrow. */ - if (gprel_constant_p (addr)) + /* GP-based and r0-based references are never narrow. */ + if (gprel_constant_p (addr) || r0rel_constant_p (addr)) return false; /* %lo requires a 16-bit relocation and is never narrow. */ if (GET_CODE (addr) == LO_SUM) diff --git a/gcc/config/nios2/nios2.opt b/gcc/config/nios2/nios2.opt index d08405e..a50dbee 100644 --- a/gcc/config/nios2/nios2.opt +++ b/gcc/config/nios2/nios2.opt @@ -589,4 +589,8 @@ Enable generation of R2 CDX instructions. mgprel-sec= Target RejectNegative Joined Var(nios2_gprel_sec) Init(NULL) -Regular expression matching additional GP-addressible small-data section names. +Regular expression matching additional GP-addressible section names. + +mr0rel-sec= +Target RejectNegative Joined Var(nios2_r0rel_sec) Init(NULL) +Regular expression matching section names for r0-relative addressing. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index d2001ca..2fc087a 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -948,7 +948,7 @@ Objective-C and Objective-C++ Dialects}. @emph{Nios II Options} @gccoptlist{-G @var{num} -mgpopt=@var{option} -mgpopt -mno-gpopt @gol --mgprel-sec=@var{regexp} @gol +-mgprel-sec=@var{regexp} -mr0rel-sec=@var{regexp} @gol -mel -meb @gol -mno-bypass-cache -mbypass-cache @gol -mno-cache-volatile -mcache-volatile @gol @@ -21184,6 +21184,20 @@ This option does not affect the behavior of the @option{-G} option, and and the specified sections are in addition to the standard @code{.sdata} and @code{.sbss} small-data sections that are recognized by @option{-mgpopt}. +@item -mr0rel-sec=@var{regexp} +@opindex mr0rel-sec +This option specifies names of sections that can be accessed via a +16-bit offset from @code{r0}; that is, in the low 32K or high 32K +of the 32-bit address space. It is most useful in conjunction with +@code{section} attributes on variable declarations +(@pxref{Common Variable Attributes}) and a custom linker script. +The @var{regexp} is a POSIX Extended Regular Expression. + +In contrast to the use of GP-relative addressing for small data, +zero-based addressing is never generated by default and there are no +conventional section names used in standard linker scripts for sections +in the low or high areas of memory. + @item -mel @itemx -meb @opindex mel diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f8d8249..2f840a9 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,9 @@ 2017-10-26 Sandra Loosemore + * gcc.target/nios2/gpopt-r0rel-sec.c: New. + +2017-10-26 Sandra Loosemore + * gcc.target/nios2/gpopt-gprel-sec.c: New. 2017-10-26 Olga Makhotina diff --git a/gcc/testsuite/gcc.target/nios2/gpopt-r0rel-sec.c b/gcc/testsuite/gcc.target/nios2/gpopt-r0rel-sec.c new file mode 100644 index 0000000..5fda9e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/nios2/gpopt-r0rel-sec.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mgpopt=local -mr0rel-sec=\\.frog.+" } */ + +extern int a __attribute__ ((section (".frog1"))); +static volatile int b __attribute__ ((section (".frog2"))) = 1; +extern int c __attribute__ ((section (".data"))); +static volatile int d __attribute__ ((section (".data"))) = 2; + +extern int e; +static volatile int f = 3; + +volatile int g __attribute__ ((weak)) = 4; + +extern int h[100]; +static int i[100]; +static int j[100] __attribute__ ((section (".sdata"))); + +typedef int (*ftype) (int); +extern int foo (int); + +extern int bar (int, int*, int*, int*, ftype); + +int baz (void) +{ + return bar (a + b + c + d + e + f + g, h, i, j, foo); +} + +/* { dg-final { scan-assembler "%lo\\(a\\)\\(r0\\)" } } */ +/* { dg-final { scan-assembler "%lo\\(b\\)\\(r0\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(c\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(d\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(e\\)" } } */ +/* { dg-final { scan-assembler "%gprel\\(f\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(g\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(h\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(i\\)" } } */ +/* { dg-final { scan-assembler "%gprel\\(j\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(foo\\)" } } */ -- cgit v1.1 From e1b76fde8ffaa74dffc895c2e2e625e30428b435 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Thu, 26 Oct 2017 23:04:12 +0000 Subject: compiler: explicitly convert between type aliases Otherwise we can get a crash in the backend. Test case is https://golang.org/cl/73790. Reviewed-on: https://go-review.googlesource.com/73810 From-SVN: r254126 --- gcc/go/gofrontend/MERGE | 2 +- gcc/go/gofrontend/expressions.cc | 16 +++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index 8b1846d..0fa2ccc 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -a409ac2c78899e638a014c97891925bec93cb3ad +64d570c590a76921cbdca4efb22e4675e19cc809 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. diff --git a/gcc/go/gofrontend/expressions.cc b/gcc/go/gofrontend/expressions.cc index 8337cbe..dad22eb 100644 --- a/gcc/go/gofrontend/expressions.cc +++ b/gcc/go/gofrontend/expressions.cc @@ -144,8 +144,8 @@ Expression::convert_for_assignment(Gogo*, Type* lhs_type, || rhs->is_error_expression()) return Expression::make_error(location); - if (lhs_type->forwarded() != rhs_type->forwarded() - && lhs_type->interface_type() != NULL) + bool are_identical = Type::are_identical(lhs_type, rhs_type, false, NULL); + if (!are_identical && lhs_type->interface_type() != NULL) { if (rhs_type->interface_type() == NULL) return Expression::convert_type_to_interface(lhs_type, rhs, location); @@ -153,8 +153,7 @@ Expression::convert_for_assignment(Gogo*, Type* lhs_type, return Expression::convert_interface_to_interface(lhs_type, rhs, false, location); } - else if (lhs_type->forwarded() != rhs_type->forwarded() - && rhs_type->interface_type() != NULL) + else if (!are_identical && rhs_type->interface_type() != NULL) return Expression::convert_interface_to_type(lhs_type, rhs, location); else if (lhs_type->is_slice_type() && rhs_type->is_nil_type()) { @@ -165,8 +164,15 @@ Expression::convert_for_assignment(Gogo*, Type* lhs_type, } else if (rhs_type->is_nil_type()) return Expression::make_nil(location); - else if (Type::are_identical(lhs_type, rhs_type, false, NULL)) + else if (are_identical) { + if (lhs_type->forwarded() != rhs_type->forwarded()) + { + // Different but identical types require an explicit + // conversion. This happens with type aliases. + return Expression::make_cast(lhs_type, rhs, location); + } + // No conversion is needed. return rhs; } -- cgit v1.1