From 434d521d118fc7e7759b2b42bdddfa70caec637b Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Tue, 19 Jul 2022 09:53:39 -0400 Subject: analyzer: log out-edge description in exploded_graph::process_node I found this logging tweak very helpful when working on PR analyzer/106284. gcc/analyzer/ChangeLog: * engine.cc (exploded_graph::process_node): Show any description of the out-edge when logging it for consideration. Signed-off-by: David Malcolm --- gcc/analyzer/engine.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index 9ffcc41..4f7f9d0 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -3974,8 +3974,12 @@ exploded_graph::process_node (exploded_node *node) { found_a_superedge = true; if (logger) - logger->log ("considering SN: %i -> SN: %i", - succ->m_src->m_index, succ->m_dest->m_index); + { + label_text succ_desc (succ->get_description (false)); + logger->log ("considering SN: %i -> SN: %i (%s)", + succ->m_src->m_index, succ->m_dest->m_index, + succ_desc.get ()); + } program_point next_point = program_point::before_supernode (succ->m_dest, succ, -- cgit v1.1 From 2c044ff123ee573b7cd63d88f544091b7aeeb8f6 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Tue, 19 Jul 2022 09:53:39 -0400 Subject: analyzer: fix taint handling of switch statements [PR106321] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR analyzer/106321 reports false positives from -Wanalyzer-tainted-array-index on switch statements, seen e.g. in the Linux kernel in drivers/vfio/pci/vfio_pci_core.c, where vfio_pci_core_ioctl has: | 744 | switch (info.index) { | | ~~~~~~ ~~~~~~~~~~ | | | | | | | (8) ...to here | | (9) following ‘case 0 ... 5:’ branch... |...... | 751 | case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX: | | ~~~~ | | | | | (10) ...to here and then a false complaint about "use of attacker-controlled value ‘info.index’ in array lookup without upper-bounds checking", where info.index has clearly had its bounds checked by the switch/case. It turns out that when I rewrote switch handling for the analyzer in r12-3101-g8ca7fa84a3af35, I removed notifications to state machines about the constraints on cases. This patch fixes that oversight by adding a new on_bounded_ranges vfunc for region_model_context, called on switch statement edges, which calls a new state_machine vfunc. It implements it for the "taint" state machine, so that it updates the "has bounds" flags at out-edges for switch statements, based on whether the bounds from the edge appear to actually constrain the switch index. gcc/analyzer/ChangeLog: PR analyzer/106321 * constraint-manager.h (bounded_ranges::get_count): New. (bounded_ranges::get_range): New. * engine.cc (impl_region_model_context::on_bounded_ranges): New. * exploded-graph.h (impl_region_model_context::on_bounded_ranges): New decl. * region-model.cc (region_model::apply_constraints_for_gswitch): Potentially call ctxt->on_bounded_ranges. * region-model.h (region_model_context::on_bounded_ranges): New vfunc. (noop_region_model_context::on_bounded_ranges): New. (region_model_context_decorator::on_bounded_ranges): New. * sm-taint.cc: Include "analyzer/constraint-manager.h". (taint_state_machine::on_bounded_ranges): New. * sm.h (state_machine::on_bounded_ranges): New. gcc/testsuite/ChangeLog: PR analyzer/106321 * gcc.dg/analyzer/torture/taint-read-index-2.c: Add test coverage for switch statements. Signed-off-by: David Malcolm --- gcc/analyzer/constraint-manager.h | 3 + gcc/analyzer/engine.cc | 26 +++++++ gcc/analyzer/exploded-graph.h | 3 + gcc/analyzer/region-model.cc | 2 + gcc/analyzer/region-model.h | 17 +++++ gcc/analyzer/sm-taint.cc | 58 +++++++++++++++ gcc/analyzer/sm.h | 9 +++ .../gcc.dg/analyzer/torture/taint-read-index-2.c | 85 ++++++++++++++++++++++ 8 files changed, 203 insertions(+) (limited to 'gcc') diff --git a/gcc/analyzer/constraint-manager.h b/gcc/analyzer/constraint-manager.h index f67c764..1271f18 100644 --- a/gcc/analyzer/constraint-manager.h +++ b/gcc/analyzer/constraint-manager.h @@ -138,6 +138,9 @@ public: static int cmp (const bounded_ranges *a, const bounded_ranges *b); + unsigned get_count () const { return m_ranges.length (); } + const bounded_range &get_range (unsigned idx) const { return m_ranges[idx]; } + private: void canonicalize (); void validate () const; diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index 4f7f9d0..85b7c5e 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -916,6 +916,32 @@ impl_region_model_context::on_condition (const svalue *lhs, } } +/* Implementation of region_model_context::on_bounded_ranges vfunc. + Notify all state machines about the ranges, which could lead to + state transitions. */ + +void +impl_region_model_context::on_bounded_ranges (const svalue &sval, + const bounded_ranges &ranges) +{ + int sm_idx; + sm_state_map *smap; + FOR_EACH_VEC_ELT (m_new_state->m_checker_states, sm_idx, smap) + { + const state_machine &sm = m_ext_state.get_sm (sm_idx); + impl_sm_context sm_ctxt (*m_eg, sm_idx, sm, m_enode_for_diag, + m_old_state, m_new_state, + m_old_state->m_checker_states[sm_idx], + m_new_state->m_checker_states[sm_idx], + m_path_ctxt); + sm.on_bounded_ranges (&sm_ctxt, + (m_enode_for_diag + ? m_enode_for_diag->get_supernode () + : NULL), + m_stmt, sval, ranges); + } +} + /* Implementation of region_model_context::on_phi vfunc. Notify all state machines about the phi, which could lead to state transitions. */ diff --git a/gcc/analyzer/exploded-graph.h b/gcc/analyzer/exploded-graph.h index 0613f55..f957568 100644 --- a/gcc/analyzer/exploded-graph.h +++ b/gcc/analyzer/exploded-graph.h @@ -65,6 +65,9 @@ class impl_region_model_context : public region_model_context enum tree_code op, const svalue *rhs) final override; + void on_bounded_ranges (const svalue &sval, + const bounded_ranges &ranges) final override; + void on_unknown_change (const svalue *sval, bool is_mutable) final override; void on_phi (const gphi *phi, tree rhs) final override; diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index 8b7b4e1..5bb7112 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -4228,6 +4228,8 @@ region_model::apply_constraints_for_gswitch (const switch_cfg_superedge &edge, bool sat = m_constraints->add_bounded_ranges (index_sval, all_cases_ranges); if (!sat && out) *out = new rejected_ranges_constraint (*this, index, all_cases_ranges); + if (sat && ctxt && !all_cases_ranges->empty_p ()) + ctxt->on_bounded_ranges (*index_sval, *all_cases_ranges); return sat; } diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h index 6dda43f..42f8abe 100644 --- a/gcc/analyzer/region-model.h +++ b/gcc/analyzer/region-model.h @@ -931,6 +931,13 @@ class region_model_context enum tree_code op, const svalue *rhs) = 0; + /* Hook for clients to be notified when the condition that + SVAL is within RANGES is added to the region model. + Similar to on_condition, but for use when handling switch statements. + RANGES is non-empty. */ + virtual void on_bounded_ranges (const svalue &sval, + const bounded_ranges &ranges) = 0; + /* Hooks for clients to be notified when an unknown change happens to SVAL (in response to a call to an unknown function). */ virtual void on_unknown_change (const svalue *sval, bool is_mutable) = 0; @@ -991,6 +998,10 @@ public: const svalue *rhs ATTRIBUTE_UNUSED) override { } + void on_bounded_ranges (const svalue &, + const bounded_ranges &) override + { + } void on_unknown_change (const svalue *sval ATTRIBUTE_UNUSED, bool is_mutable ATTRIBUTE_UNUSED) override { @@ -1087,6 +1098,12 @@ class region_model_context_decorator : public region_model_context m_inner->on_condition (lhs, op, rhs); } + void on_bounded_ranges (const svalue &sval, + const bounded_ranges &ranges) override + { + m_inner->on_bounded_ranges (sval, ranges); + } + void on_unknown_change (const svalue *sval, bool is_mutable) override { m_inner->on_unknown_change (sval, is_mutable); diff --git a/gcc/analyzer/sm-taint.cc b/gcc/analyzer/sm-taint.cc index 2de9284..9cb7888 100644 --- a/gcc/analyzer/sm-taint.cc +++ b/gcc/analyzer/sm-taint.cc @@ -51,6 +51,7 @@ along with GCC; see the file COPYING3. If not see #include "analyzer/sm.h" #include "analyzer/program-state.h" #include "analyzer/pending-diagnostic.h" +#include "analyzer/constraint-manager.h" #if ENABLE_ANALYZER @@ -97,6 +98,11 @@ public: const svalue *lhs, enum tree_code op, const svalue *rhs) const final override; + void on_bounded_ranges (sm_context *sm_ctxt, + const supernode *node, + const gimple *stmt, + const svalue &sval, + const bounded_ranges &ranges) const final override; bool can_purge_p (state_t s) const final override; @@ -901,6 +907,58 @@ taint_state_machine::on_condition (sm_context *sm_ctxt, } } +/* Implementation of state_machine::on_bounded_ranges vfunc for + taint_state_machine, for handling switch statement cases. + Potentially transition state 'tainted' to 'has_ub' or 'has_lb', + and states 'has_ub' and 'has_lb' to 'stop'. */ + +void +taint_state_machine::on_bounded_ranges (sm_context *sm_ctxt, + const supernode *, + const gimple *stmt, + const svalue &sval, + const bounded_ranges &ranges) const +{ + gcc_assert (!ranges.empty_p ()); + gcc_assert (ranges.get_count () > 0); + + /* We have one or more ranges; this could be a "default:", or one or + more single or range cases. + + Look at the overall endpoints to see if the ranges impose any lower + bounds or upper bounds beyond those of the underlying numeric type. */ + + tree lowest_bound = ranges.get_range (0).m_lower; + tree highest_bound = ranges.get_range (ranges.get_count () - 1).m_upper; + gcc_assert (lowest_bound); + gcc_assert (highest_bound); + + bool ranges_have_lb + = (lowest_bound != TYPE_MIN_VALUE (TREE_TYPE (lowest_bound))); + bool ranges_have_ub + = (highest_bound != TYPE_MAX_VALUE (TREE_TYPE (highest_bound))); + + if (!ranges_have_lb && !ranges_have_ub) + return; + + /* We have new bounds from the ranges; combine them with any + existing bounds on SVAL. */ + state_t old_state = sm_ctxt->get_state (stmt, &sval); + if (old_state == m_tainted) + { + if (ranges_have_lb && ranges_have_ub) + sm_ctxt->set_next_state (stmt, &sval, m_stop); + else if (ranges_have_lb) + sm_ctxt->set_next_state (stmt, &sval, m_has_lb); + else if (ranges_have_ub) + sm_ctxt->set_next_state (stmt, &sval, m_has_ub); + } + else if (old_state == m_has_ub && ranges_have_lb) + sm_ctxt->set_next_state (stmt, &sval, m_stop); + else if (old_state == m_has_lb && ranges_have_ub) + sm_ctxt->set_next_state (stmt, &sval, m_stop); +} + bool taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const { diff --git a/gcc/analyzer/sm.h b/gcc/analyzer/sm.h index 353a6db..87ab11c 100644 --- a/gcc/analyzer/sm.h +++ b/gcc/analyzer/sm.h @@ -108,6 +108,15 @@ public: { } + virtual void + on_bounded_ranges (sm_context *sm_ctxt ATTRIBUTE_UNUSED, + const supernode *node ATTRIBUTE_UNUSED, + const gimple *stmt ATTRIBUTE_UNUSED, + const svalue &sval ATTRIBUTE_UNUSED, + const bounded_ranges &ranges ATTRIBUTE_UNUSED) const + { + } + /* Return true if it safe to discard the given state (to help when simplifying state objects). States that need leak detection should return false. */ diff --git a/gcc/testsuite/gcc.dg/analyzer/torture/taint-read-index-2.c b/gcc/testsuite/gcc.dg/analyzer/torture/taint-read-index-2.c index 6a4ebdb..b3dc177 100644 --- a/gcc/testsuite/gcc.dg/analyzer/torture/taint-read-index-2.c +++ b/gcc/testsuite/gcc.dg/analyzer/torture/taint-read-index-2.c @@ -54,3 +54,88 @@ test_4 (unsigned uarg) { return called_by_test_4 (uarg); } + +int __attribute__((tainted_args)) +test_5 (int idx) +{ + switch (idx) + { + default: + return 0; + case 5 ... 20: + return arr[idx]; /* { dg-bogus "bounds checking" } */ + /* 20 is still an out-of-bounds error (off-by-one) + but we don't check for that, just that bounds have been imposed. */ + + /* Extra cases to avoid optimizing the switch away. */ + case 22: + return 22; + case 23: + return -17; + } +} + +int __attribute__((tainted_args)) +test_6 (int idx) +{ + switch (idx) + { + default: + return arr[idx]; /* { dg-warning "without bounds checking" } */ + + case 2: + return arr[idx]; /* { dg-bogus "bounds checking" } */ + + case 6 ... 19: + return arr[idx]; /* { dg-bogus "bounds checking" } */ + + case 22: + return 22; + case 23: + return -17; + } +} + +int __attribute__((tainted_args)) +test_7 (int idx) +{ + switch (idx) + { + default: + return arr[idx]; /* { dg-warning "without bounds checking" } */ + + case 2 ... 4: + case 7 ... 9: + return arr[idx]; /* { dg-bogus "bounds checking" } */ + + case 12 ... 19: + return arr[idx]; /* { dg-bogus "bounds checking" } */ + + case 22: + return 22; + case 23: + return -17; + } +} + +int __attribute__((tainted_args)) +test_8 (unsigned idx) +{ + switch (idx) + { + default: + return arr[idx]; /* { dg-warning "without upper-bounds checking" } */ + + case 2 ... 4: + case 7 ... 9: + return arr[idx]; /* { dg-bogus "bounds checking" } */ + + case 12 ... 19: + return arr[idx]; /* { dg-bogus "bounds checking" } */ + + case 22: + return 22; + case 23: + return -17; + } +} -- cgit v1.1 From 465802c0d40adca5fd5b0a2af6ff8a323a55b589 Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Tue, 19 Jul 2022 10:55:52 +0100 Subject: c++: Enable __has_builtin for new reference binding built-ins The new built-ins need to be detectable using __has_builtin, and the library should use that to check for them. This fixes an error with Clang when C++23 is enabled. gcc/cp/ChangeLog: * cp-objcp-common.cc (names_builtin_p): Return true for RID_REF_CONSTRUCTS_FROM_TEMPORARY and RID_REF_CONVERTS_FROM_TEMPORARY. libstdc++-v3/ChangeLog: * include/std/type_traits (__cpp_lib_reference_from_temporary) (reference_constructs_from_temporary) (reference_converts_from_temporary): Only define when the built-ins are available. --- gcc/cp/cp-objcp-common.cc | 2 ++ 1 file changed, 2 insertions(+) (limited to 'gcc') diff --git a/gcc/cp/cp-objcp-common.cc b/gcc/cp/cp-objcp-common.cc index 0b70d55..4079a4b 100644 --- a/gcc/cp/cp-objcp-common.cc +++ b/gcc/cp/cp-objcp-common.cc @@ -461,6 +461,8 @@ names_builtin_p (const char *name) case RID_IS_ASSIGNABLE: case RID_IS_CONSTRUCTIBLE: case RID_UNDERLYING_TYPE: + case RID_REF_CONSTRUCTS_FROM_TEMPORARY: + case RID_REF_CONVERTS_FROM_TEMPORARY: return true; default: break; -- cgit v1.1 From 76c3f0dc2f815e0e450642efd5348c3ab852e4d0 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Fri, 14 Jan 2022 20:23:41 +0300 Subject: tree-ssa-sink: do not sink to in front of setjmp gcc/ChangeLog: * tree-ssa-sink.cc (select_best_block): Punt if selected block has incoming abnormal edges. gcc/testsuite/ChangeLog: * gcc.dg/setjmp-7.c: New test. --- gcc/testsuite/gcc.dg/setjmp-7.c | 13 +++++++++++++ gcc/tree-ssa-sink.cc | 6 ++++++ 2 files changed, 19 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/setjmp-7.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/setjmp-7.c b/gcc/testsuite/gcc.dg/setjmp-7.c new file mode 100644 index 0000000..44b5bcb --- /dev/null +++ b/gcc/testsuite/gcc.dg/setjmp-7.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-guess-branch-probability -w" } */ +/* { dg-require-effective-target indirect_jumps } */ + +struct __jmp_buf_tag { }; +typedef struct __jmp_buf_tag jmp_buf[1]; +struct globals { jmp_buf listingbuf; }; +extern struct globals *const ptr_to_globals; +void foo() +{ + if ( _setjmp ( ((*ptr_to_globals).listingbuf ))) + ; +} diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc index 2e744d6..9213052 100644 --- a/gcc/tree-ssa-sink.cc +++ b/gcc/tree-ssa-sink.cc @@ -208,6 +208,12 @@ select_best_block (basic_block early_bb, temp_bb = get_immediate_dominator (CDI_DOMINATORS, temp_bb); } + /* Placing a statement before a setjmp-like function would be invalid + (it cannot be reevaluated when execution follows an abnormal edge). + If we selected a block with abnormal predecessors, just punt. */ + if (bb_has_abnormal_pred (best_bb)) + return early_bb; + /* If we found a shallower loop nest, then we always consider that a win. This will always give us the most control dependent block within that loop nest. */ -- cgit v1.1 From 26cea5f108e0facdb080e385000bb141b086845f Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Fri, 14 Jan 2022 20:51:12 +0300 Subject: tree-cfg: do not duplicate returns_twice calls A returns_twice call may have associated abnormal edges that correspond to the "second return" from the call. If the call is duplicated, the copies of those edges also need to be abnormal, but e.g. tracer does not enforce that. Just prohibit the (unlikely to be useful) duplication. gcc/ChangeLog: * cfghooks.cc (duplicate_block): Expand comment. * tree-cfg.cc (gimple_can_duplicate_bb_p): Reject blocks with calls that may return twice. --- gcc/cfghooks.cc | 13 ++++++++++--- gcc/tree-cfg.cc | 7 +++++-- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'gcc') diff --git a/gcc/cfghooks.cc b/gcc/cfghooks.cc index e435891..c6ac953 100644 --- a/gcc/cfghooks.cc +++ b/gcc/cfghooks.cc @@ -1086,9 +1086,16 @@ can_duplicate_block_p (const_basic_block bb) return cfg_hooks->can_duplicate_block_p (bb); } -/* Duplicates basic block BB and redirects edge E to it. Returns the - new basic block. The new basic block is placed after the basic block - AFTER. */ +/* Duplicate basic block BB, place it after AFTER (if non-null) and redirect + edge E to it (if non-null). Return the new basic block. + + If BB contains a returns_twice call, the caller is responsible for recreating + incoming abnormal edges corresponding to the "second return" for the copy. + gimple_can_duplicate_bb_p rejects such blocks, while RTL likes to live + dangerously. + + If BB has incoming abnormal edges for some other reason, their destinations + should be tied to label(s) of the original BB and not the copy. */ basic_block duplicate_block (basic_block bb, edge e, basic_block after, copy_bb_data *id) diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc index bfcb142..a365aad 100644 --- a/gcc/tree-cfg.cc +++ b/gcc/tree-cfg.cc @@ -6313,12 +6313,15 @@ gimple_can_duplicate_bb_p (const_basic_block bb) { gimple *g = gsi_stmt (gsi); - /* An IFN_GOMP_SIMT_ENTER_ALLOC/IFN_GOMP_SIMT_EXIT call must be + /* Prohibit duplication of returns_twice calls, otherwise associated + abnormal edges also need to be duplicated properly. + An IFN_GOMP_SIMT_ENTER_ALLOC/IFN_GOMP_SIMT_EXIT call must be duplicated as part of its group, or not at all. The IFN_GOMP_SIMT_VOTE_ANY and IFN_GOMP_SIMT_XCHG_* are part of such a group, so the same holds there. */ if (is_gimple_call (g) - && (gimple_call_internal_p (g, IFN_GOMP_SIMT_ENTER_ALLOC) + && (gimple_call_flags (g) & ECF_RETURNS_TWICE + || gimple_call_internal_p (g, IFN_GOMP_SIMT_ENTER_ALLOC) || gimple_call_internal_p (g, IFN_GOMP_SIMT_EXIT) || gimple_call_internal_p (g, IFN_GOMP_SIMT_VOTE_ANY) || gimple_call_internal_p (g, IFN_GOMP_SIMT_XCHG_BFLY) -- cgit v1.1 From 7a158a5776f5ca95a318fcae0ca2dfefb0789538 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Fri, 14 Jan 2022 21:05:12 +0300 Subject: tree-cfg: check placement of returns_twice calls When a returns_twice call has an associated abnormal edge, the edge corresponds to the "second return" from the call. It wouldn't make sense if any executable statements appeared between the call and the destination of the edge (they wouldn't be re-executed upon the "second return"), so verify that. gcc/ChangeLog: * tree-cfg.cc (gimple_verify_flow_info): Check placement of returns_twice calls. --- gcc/tree-cfg.cc | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'gcc') diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc index a365aad..5bcf781 100644 --- a/gcc/tree-cfg.cc +++ b/gcc/tree-cfg.cc @@ -5653,6 +5653,7 @@ gimple_verify_flow_info (void) } /* Verify that body of basic block BB is free of control flow. */ + bool seen_nondebug_stmt = false; for (; !gsi_end_p (gsi); gsi_next (&gsi)) { gimple *stmt = gsi_stmt (gsi); @@ -5673,6 +5674,38 @@ gimple_verify_flow_info (void) gimple_label_label (label_stmt), bb->index); err = 1; } + + /* Check that no statements appear between a returns_twice call + and its associated abnormal edge. */ + if (gimple_code (stmt) == GIMPLE_CALL + && gimple_call_flags (stmt) & ECF_RETURNS_TWICE) + { + const char *misplaced = NULL; + /* TM is an exception: it points abnormal edges just after the + call that starts a transaction, i.e. it must end the BB. */ + if (gimple_call_builtin_p (stmt, BUILT_IN_TM_START)) + { + if (single_succ_p (bb) + && bb_has_abnormal_pred (single_succ (bb)) + && !gsi_one_nondebug_before_end_p (gsi)) + misplaced = "not last"; + } + else + { + if (seen_nondebug_stmt + && bb_has_abnormal_pred (bb)) + misplaced = "not first"; + } + if (misplaced) + { + error ("returns_twice call is %s in basic block %d", + misplaced, bb->index); + print_gimple_stmt (stderr, stmt, 0, TDF_SLIM); + err = 1; + } + } + if (!is_gimple_debug (stmt)) + seen_nondebug_stmt = true; } gsi = gsi_last_nondebug_bb (bb); -- cgit v1.1 From 68f37670eff0b872ce5dfd382c8d8f3206bdfc27 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Tue, 19 Jul 2022 14:04:13 -0400 Subject: c++: shortcut bad reference binding [PR94894] In case of l/rvalue or cv-qual mismatch during reference binding, we try to give more helpful diagnostics by computing a bad conversion that allows the mismatch. But in doing so, we may end up considering and instantiating a conversion function that could induce a hard error and in turn cause us to reject otherwise valid code. We could just give up on producing a better diagnostic here, but ideally we'd preserve the better diagnostics for invalid code while avoiding unnecessary template instantiations for valid code. To that end, this patch adapts the bad conversion shortcutting mechanism from r12-3346-g47543e5f9d1fc5 to additionally handle this situation. The main observation from there is that during overload resolution, if we know we have a strictly viable candidate then we don't need to distinguish between an unviable and non-strictly viable candidate. Thus we don't need to distinguish between an invalid and bad conversion either, which is what this patch exploits. Of course, we don't know whether we have a strictly viable candidate until after the fact, so we still need to remember when we deferred distinguishing between an invalid and bad conversion. This patch adds a special conversion kind ck_deferred_bad for this purpose. PR c++/94894 PR c++/105766 PR c++/106201 gcc/cp/ChangeLog: * call.cc (enum conversion_kind): Add ck_deferred_bad enumerator. (has_next): Return false for it. (reference_binding): Return a ck_deferred_bad conversion instead of an actual bad conversion when LOOKUP_SHORTCUT_BAD_CONVS is set. Remove now obsolete early exit for the incomplete TO case. (implicit_conversion_1): Don't mask out LOOKUP_SHORTCUT_BAD_CONVS. (add_function_candidate): Set LOOKUP_SHORTCUT_BAD_CONVS iff shortcut_bad_convs. (missing_conversion_p): Also return true for a ck_deferred_bad conversion. * cp-tree.h (LOOKUP_SHORTCUT_BAD_CONVS): Define. gcc/testsuite/ChangeLog: * g++.dg/conversion/ref8.C: New test. * g++.dg/conversion/ref9.C: New test. --- gcc/cp/call.cc | 92 ++++++++++++++++++++++------------ gcc/cp/cp-tree.h | 5 ++ gcc/testsuite/g++.dg/conversion/ref8.C | 22 ++++++++ gcc/testsuite/g++.dg/conversion/ref9.C | 21 ++++++++ 4 files changed, 108 insertions(+), 32 deletions(-) create mode 100644 gcc/testsuite/g++.dg/conversion/ref8.C create mode 100644 gcc/testsuite/g++.dg/conversion/ref9.C (limited to 'gcc') diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc index 191c68c..01a7be1 100644 --- a/gcc/cp/call.cc +++ b/gcc/cp/call.cc @@ -59,7 +59,13 @@ enum conversion_kind { ck_ambig, ck_list, ck_aggr, - ck_rvalue + ck_rvalue, + /* When LOOKUP_SHORTCUT_BAD_CONVS is set, we may return a conversion of + this kind whenever we know the true conversion is either bad or outright + invalid, but we don't want to attempt to compute the bad conversion (for + sake of avoiding unnecessary instantiation). bad_p should always be set + for these. */ + ck_deferred_bad, }; /* The rank of the conversion. Order of the enumerals matters; better @@ -775,7 +781,8 @@ has_next (conversion_kind code) return !(code == ck_identity || code == ck_ambig || code == ck_list - || code == ck_aggr); + || code == ck_aggr + || code == ck_deferred_bad); } static conversion * @@ -1912,18 +1919,38 @@ reference_binding (tree rto, tree rfrom, tree expr, bool c_cast_p, int flags, difference in top-level cv-qualification is subsumed by the initialization itself and does not constitute a conversion. */ + bool maybe_valid_p = true; + /* [dcl.init.ref] Otherwise, the reference shall be an lvalue reference to a non-volatile const type, or the reference shall be an rvalue - reference. + reference. */ + if (!CP_TYPE_CONST_NON_VOLATILE_P (to) && !TYPE_REF_IS_RVALUE (rto)) + maybe_valid_p = false; - We try below to treat this as a bad conversion to improve diagnostics, - but if TO is an incomplete class, we need to reject this conversion - now to avoid unnecessary instantiation. */ - if (!CP_TYPE_CONST_NON_VOLATILE_P (to) && !TYPE_REF_IS_RVALUE (rto) - && !COMPLETE_TYPE_P (to)) - return NULL; + /* [dcl.init.ref] + + Otherwise, a temporary of type "cv1 T1" is created and + initialized from the initializer expression using the rules for a + non-reference copy initialization. If T1 is reference-related to + T2, cv1 must be the same cv-qualification as, or greater + cv-qualification than, cv2; otherwise, the program is ill-formed. */ + if (related_p && !at_least_as_qualified_p (to, from)) + maybe_valid_p = false; + + /* We try below to treat an invalid reference binding as a bad conversion + to improve diagnostics, but doing so may cause otherwise unnecessary + instantiations that can lead to a hard error. So during the first pass + of overload resolution wherein we shortcut bad conversions, instead just + produce a special conversion indicating a second pass is necessary if + there's no strictly viable candidate. */ + if (!maybe_valid_p && (flags & LOOKUP_SHORTCUT_BAD_CONVS)) + { + conv = alloc_conversion (ck_deferred_bad); + conv->bad_p = true; + return conv; + } /* We're generating a temporary now, but don't bind any more in the conversion (specifically, don't slice the temporary returned by a @@ -1967,7 +1994,9 @@ reference_binding (tree rto, tree rfrom, tree expr, bool c_cast_p, int flags, sflags, complain); if (!new_second) return NULL; - return merge_conversion_sequences (t, new_second); + conv = merge_conversion_sequences (t, new_second); + gcc_assert (maybe_valid_p || conv->bad_p); + return conv; } } @@ -1976,24 +2005,7 @@ reference_binding (tree rto, tree rfrom, tree expr, bool c_cast_p, int flags, creation of a temporary. */ conv->need_temporary_p = true; conv->rvaluedness_matches_p = TYPE_REF_IS_RVALUE (rto); - - /* [dcl.init.ref] - - Otherwise, the reference shall be an lvalue reference to a - non-volatile const type, or the reference shall be an rvalue - reference. */ - if (!CP_TYPE_CONST_NON_VOLATILE_P (to) && !TYPE_REF_IS_RVALUE (rto)) - conv->bad_p = true; - - /* [dcl.init.ref] - - Otherwise, a temporary of type "cv1 T1" is created and - initialized from the initializer expression using the rules for a - non-reference copy initialization. If T1 is reference-related to - T2, cv1 must be the same cv-qualification as, or greater - cv-qualification than, cv2; otherwise, the program is ill-formed. */ - if (related_p && !at_least_as_qualified_p (to, from)) - conv->bad_p = true; + conv->bad_p |= !maybe_valid_p; return conv; } @@ -2015,7 +2027,8 @@ implicit_conversion_1 (tree to, tree from, tree expr, bool c_cast_p, resolution, or after we've chosen one. */ flags &= (LOOKUP_ONLYCONVERTING|LOOKUP_NO_CONVERSION|LOOKUP_COPY_PARM |LOOKUP_NO_TEMP_BIND|LOOKUP_NO_RVAL_BIND|LOOKUP_PREFER_RVALUE - |LOOKUP_NO_NARROWING|LOOKUP_PROTECT|LOOKUP_NO_NON_INTEGRAL); + |LOOKUP_NO_NARROWING|LOOKUP_PROTECT|LOOKUP_NO_NON_INTEGRAL + |LOOKUP_SHORTCUT_BAD_CONVS); /* FIXME: actually we don't want warnings either, but we can't just have 'complain &= ~(tf_warning|tf_error)' because it would cause @@ -2433,6 +2446,11 @@ add_function_candidate (struct z_candidate **candidates, if (! viable) goto out; + if (shortcut_bad_convs) + flags |= LOOKUP_SHORTCUT_BAD_CONVS; + else + flags &= ~LOOKUP_SHORTCUT_BAD_CONVS; + /* Third, for F to be a viable function, there shall exist for each argument an implicit conversion sequence that converts that argument to the corresponding parameter of F. */ @@ -6038,14 +6056,24 @@ perfect_candidate_p (z_candidate *cand) return true; } -/* True iff one of CAND's argument conversions is NULL. */ +/* True iff one of CAND's argument conversions is missing. */ static bool missing_conversion_p (const z_candidate *cand) { for (unsigned i = 0; i < cand->num_convs; ++i) - if (!cand->convs[i]) - return true; + { + conversion *conv = cand->convs[i]; + if (!conv) + return true; + if (conv->kind == ck_deferred_bad) + { + /* We don't know whether this conversion is outright invalid or + just bad, so conservatively assume it's missing. */ + gcc_checking_assert (conv->bad_p); + return true; + } + } return false; } diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index cf51c39..3278b41 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -5877,6 +5877,11 @@ enum overload_flags { NO_SPECIAL = 0, DTOR_FLAG, TYPENAME_FLAG }; #define LOOKUP_REVERSED (LOOKUP_REWRITTEN << 1) /* We're initializing an aggregate from a parenthesized list of values. */ #define LOOKUP_AGGREGATE_PAREN_INIT (LOOKUP_REVERSED << 1) +/* We're computing conversions as part of a first pass of overload resolution + wherein we don't try to distinguish an unviable candidate from a + non-strictly viable candidate and thus can avoid computing unnecessary + bad conversions. */ +#define LOOKUP_SHORTCUT_BAD_CONVS (LOOKUP_AGGREGATE_PAREN_INIT << 1) /* These flags are used by the conversion code. CONV_IMPLICIT : Perform implicit conversions (standard and user-defined). diff --git a/gcc/testsuite/g++.dg/conversion/ref8.C b/gcc/testsuite/g++.dg/conversion/ref8.C new file mode 100644 index 0000000..0dd29f7 --- /dev/null +++ b/gcc/testsuite/g++.dg/conversion/ref8.C @@ -0,0 +1,22 @@ +// PR c++/105766 +// { dg-do compile { target c++20 } } + +template +struct baz { + baz() = default; + baz(int) requires requires { T(0); }; +}; + +struct foo; + +struct bar { + bar() = default; + bar(foo&); + bar(int); +}; + +struct foo { + baz m_bars; +}; + +foo a; diff --git a/gcc/testsuite/g++.dg/conversion/ref9.C b/gcc/testsuite/g++.dg/conversion/ref9.C new file mode 100644 index 0000000..e6dfc03 --- /dev/null +++ b/gcc/testsuite/g++.dg/conversion/ref9.C @@ -0,0 +1,21 @@ +// PR c++/106201 +// { dg-do compile { target c++11 } } + +struct A { + template + A(const T&); +}; + +struct B { + template B(const T&); +}; + +void f(A&); +void f(B); + +struct C { }; + +int main() { + C c; + f(c); +} -- cgit v1.1 From f838d15641d256e21ffc126c3277b290ed743928 Mon Sep 17 00:00:00 2001 From: Harald Anlauf Date: Mon, 18 Jul 2022 22:34:53 +0200 Subject: Fortran: error recovery on invalid array reference of non-array [PR103590] gcc/fortran/ChangeLog: PR fortran/103590 * resolve.cc (find_array_spec): Change function result to bool to enable error recovery. Generate error message for invalid array reference of non-array entity instead of an internal error. (gfc_resolve_ref): Use function result from find_array_spec for error recovery. gcc/testsuite/ChangeLog: PR fortran/103590 * gfortran.dg/associate_54.f90: Adjust. * gfortran.dg/associate_59.f90: New test. --- gcc/fortran/resolve.cc | 13 ++++++++++--- gcc/testsuite/gfortran.dg/associate_54.f90 | 3 +-- gcc/testsuite/gfortran.dg/associate_59.f90 | 9 +++++++++ 3 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/associate_59.f90 (limited to 'gcc') diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc index 2ebf076..ca11475 100644 --- a/gcc/fortran/resolve.cc +++ b/gcc/fortran/resolve.cc @@ -4976,7 +4976,7 @@ gfc_resolve_dim_arg (gfc_expr *dim) static void resolve_assoc_var (gfc_symbol* sym, bool resolve_target); -static void +static bool find_array_spec (gfc_expr *e) { gfc_array_spec *as; @@ -5004,7 +5004,11 @@ find_array_spec (gfc_expr *e) { case REF_ARRAY: if (as == NULL) - gfc_internal_error ("find_array_spec(): Missing spec"); + { + gfc_error ("Invalid array reference of a non-array entity at %L", + &ref->u.ar.where); + return false; + } ref->u.ar.as = as; as = NULL; @@ -5028,6 +5032,8 @@ find_array_spec (gfc_expr *e) if (as != NULL) gfc_internal_error ("find_array_spec(): unused as(2)"); + + return true; } @@ -5346,7 +5352,8 @@ gfc_resolve_ref (gfc_expr *expr) for (ref = expr->ref; ref; ref = ref->next) if (ref->type == REF_ARRAY && ref->u.ar.as == NULL) { - find_array_spec (expr); + if (!find_array_spec (expr)) + return false; break; } diff --git a/gcc/testsuite/gfortran.dg/associate_54.f90 b/gcc/testsuite/gfortran.dg/associate_54.f90 index 003175a..680ad5d 100644 --- a/gcc/testsuite/gfortran.dg/associate_54.f90 +++ b/gcc/testsuite/gfortran.dg/associate_54.f90 @@ -26,9 +26,8 @@ contains integer, intent(in) :: a associate (state => obj%state(TEST_STATES)) ! { dg-error "is used as array" } ! state = a - state(TEST_STATE) = a + state(TEST_STATE) = a ! { dg-error "array reference of a non-array" } end associate end subroutine test_alter_state1 end module test - diff --git a/gcc/testsuite/gfortran.dg/associate_59.f90 b/gcc/testsuite/gfortran.dg/associate_59.f90 new file mode 100644 index 0000000..2da9773 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/associate_59.f90 @@ -0,0 +1,9 @@ +! { dg-do compile } +! PR fortran/103590 - ICE: find_array_spec(): Missing spec +! Contributed by G.Steinmetz + +program p + associate (a => 1) + print *, [character(a(1)) :: '1'] ! { dg-error "Scalar INTEGER expression" } + end associate +end -- cgit v1.1 From b0cc57cd76f511f29cab233654249817312ec2a6 Mon Sep 17 00:00:00 2001 From: Andrew MacLeod Date: Fri, 15 Jul 2022 09:35:29 -0400 Subject: Remove recursion from range_from_dom. Avoid calling range_of_dom recursively by putting all nodes to be calculated on the worklist, and figure out which kind they are when removed from the list. * gimple-range-cache.cc (ranger_cache::resolve_dom): New. (ranger_cache::range_from_dom): Put all nodes to be calculated in the worklist and resolve after the dom walk. * gimple-range-cache.h (resolve_dom): New prototype. --- gcc/gimple-range-cache.cc | 84 ++++++++++++++++++++++++++--------------------- gcc/gimple-range-cache.h | 1 + 2 files changed, 48 insertions(+), 37 deletions(-) (limited to 'gcc') diff --git a/gcc/gimple-range-cache.cc b/gcc/gimple-range-cache.cc index da7b805..20dd5ea 100644 --- a/gcc/gimple-range-cache.cc +++ b/gcc/gimple-range-cache.cc @@ -1312,6 +1312,38 @@ ranger_cache::fill_block_cache (tree name, basic_block bb, basic_block def_bb) fprintf (dump_file, " Propagation update done.\n"); } +// Resolve the range of BB if the dominators range is R by calculating incoming +// edges to this block. All lead back to the dominator so should be cheap. +// The range for BB is set and returned in R. + +void +ranger_cache::resolve_dom (vrange &r, tree name, basic_block bb) +{ + basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (name)); + basic_block dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb); + + // if it doesn't already have a value, store the incoming range. + if (!m_on_entry.bb_range_p (name, dom_bb) && def_bb != dom_bb) + { + // If the range can't be store, don't try to accumulate + // the range in PREV_BB due to excessive recalculations. + if (!m_on_entry.set_bb_range (name, dom_bb, r)) + return; + } + // With the dominator set, we should be able to cheaply query + // each incoming edge now and accumulate the results. + r.set_undefined (); + edge e; + edge_iterator ei; + Value_Range er (TREE_TYPE (name)); + FOR_EACH_EDGE (e, ei, bb->preds) + { + edge_range (er, e, name, RFD_READ_ONLY); + r.union_ (er); + } + // Set the cache in PREV_BB so it is not calculated again. + m_on_entry.set_bb_range (name, bb, r); +} // Get the range of NAME from dominators of BB and return it in R. Search the // dominator tree based on MODE. @@ -1341,7 +1373,7 @@ ranger_cache::range_from_dom (vrange &r, tree name, basic_block start_bb, // Default value is global range. get_global_range (r, name); - // Search until a value is found, pushing outgoing edges encountered. + // Search until a value is found, pushing blocks which may need calculating. for (bb = get_immediate_dominator (CDI_DOMINATORS, start_bb); bb; prev_bb = bb, bb = get_immediate_dominator (CDI_DOMINATORS, bb)) @@ -1351,40 +1383,7 @@ ranger_cache::range_from_dom (vrange &r, tree name, basic_block start_bb, // This block has an outgoing range. if (m_gori.has_edge_range_p (name, bb)) - { - // Only outgoing ranges to single_pred blocks are dominated by - // outgoing edge ranges, so those can be simply adjusted on the fly. - edge e = find_edge (bb, prev_bb); - if (e && single_pred_p (prev_bb)) - m_workback.quick_push (prev_bb); - else if (mode == RFD_FILL) - { - // Multiple incoming edges, so recursively satisfy this block - // if it doesn't already have a value, and store the range. - if (!m_on_entry.bb_range_p (name, bb) && def_bb != bb) - { - // If the dominator has not been set, look it up. - range_from_dom (r, name, bb, RFD_FILL); - // If the range can't be store, don't try to accumulate - // the range in PREV_BB due to excessive recalculations. - if (!m_on_entry.set_bb_range (name, bb, r)) - break; - } - // With the dominator set, we should be able to cheaply query - // each incoming edge now and accumulate the results. - r.set_undefined (); - edge_iterator ei; - Value_Range er (TREE_TYPE (name)); - FOR_EACH_EDGE (e, ei, prev_bb->preds) - { - edge_range (er, e, name, RFD_READ_ONLY); - r.union_ (er); - } - // Set the cache in PREV_BB so it is not calculated again. - m_on_entry.set_bb_range (name, prev_bb, r); - break; - } - } + m_workback.quick_push (prev_bb); if (def_bb == bb) break; @@ -1403,14 +1402,25 @@ ranger_cache::range_from_dom (vrange &r, tree name, basic_block start_bb, fprintf (dump_file, " at function top\n"); } - // Now process any outgoing edges that we seen along the way. + // Now process any blocks wit incoming edges that nay have adjustemnts. while (m_workback.length () > start_limit) { int_range_max er; prev_bb = m_workback.pop (); + if (!single_pred_p (prev_bb)) + { + // Non single pred means we need to cache a vsalue in the dominator + // so we can cheaply calculate incoming edges to this block, and + // then store the resulting value. If processing mode is not + // RFD_FILL, then the cache cant be stored to, so don't try. + // Otherwise this becomes a quadratic timed calculation. + if (mode == RFD_FILL) + resolve_dom (r, name, prev_bb); + continue; + } + edge e = single_pred_edge (prev_bb); bb = e->src; - if (m_gori.outgoing_edge_range_p (er, e, name, *this)) { r.intersect (er); diff --git a/gcc/gimple-range-cache.h b/gcc/gimple-range-cache.h index 0341192..45053b5 100644 --- a/gcc/gimple-range-cache.h +++ b/gcc/gimple-range-cache.h @@ -107,6 +107,7 @@ private: RFD_FILL // Scan DOM tree, updating important nodes. }; bool range_from_dom (vrange &r, tree name, basic_block bb, enum rfd_mode); + void resolve_dom (vrange &r, tree name, basic_block bb); void range_of_def (vrange &r, tree name, basic_block bb = NULL); void entry_range (vrange &r, tree expr, basic_block bb, enum rfd_mode); void exit_range (vrange &r, tree expr, basic_block bb, enum rfd_mode); -- cgit v1.1 From dbb093f4f15ea66f2ce5cd2dc1903a6894563356 Mon Sep 17 00:00:00 2001 From: Andrew MacLeod Date: Mon, 18 Jul 2022 15:04:23 -0400 Subject: Resolve complicated join nodes in range_from_dom. Join nodes which carry outgoing ranges on incoming edges are uncommon, but can still be resolved by setting the dominator range, and then calculating incoming edges. Avoid doing so if one of the incoing edges is not dominated by the same dominator. * gimple-range-cache.cc (ranger_cache::range_from_dom): Check for incoming ranges on join nodes and add to worklist. --- gcc/gimple-range-cache.cc | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'gcc') diff --git a/gcc/gimple-range-cache.cc b/gcc/gimple-range-cache.cc index 20dd5ea..f3292fc 100644 --- a/gcc/gimple-range-cache.cc +++ b/gcc/gimple-range-cache.cc @@ -1384,6 +1384,32 @@ ranger_cache::range_from_dom (vrange &r, tree name, basic_block start_bb, // This block has an outgoing range. if (m_gori.has_edge_range_p (name, bb)) m_workback.quick_push (prev_bb); + else + { + // Normally join blocks don't carry any new range information on + // incoming edges. If the first incoming edge to this block does + // generate a range, calculate the ranges if all incoming edges + // are also dominated by the dominator. (Avoids backedges which + // will break the rule of moving only upward in the domniator tree). + // If the first pred does not generate a range, then we will be + // using the dominator range anyway, so thats all the check needed. + if (EDGE_COUNT (prev_bb->preds) > 1 + && m_gori.has_edge_range_p (name, EDGE_PRED (prev_bb, 0)->src)) + { + edge e; + edge_iterator ei; + bool all_dom = true; + FOR_EACH_EDGE (e, ei, prev_bb->preds) + if (e->src != bb + && !dominated_by_p (CDI_DOMINATORS, e->src, bb)) + { + all_dom = false; + break; + } + if (all_dom) + m_workback.quick_push (prev_bb); + } + } if (def_bb == bb) break; -- cgit v1.1 From 7c0c10db24f5c7f8c30810699fb0aff3c900d41a Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Wed, 20 Jul 2022 00:16:34 +0000 Subject: Daily bump. --- gcc/ChangeLog | 66 +++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/analyzer/ChangeLog | 23 +++++++++++++++++ gcc/cp/ChangeLog | 23 +++++++++++++++++ gcc/fortran/ChangeLog | 9 +++++++ gcc/testsuite/ChangeLog | 35 ++++++++++++++++++++++++++ 6 files changed, 157 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 43b70ba..f6b5650 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,69 @@ +2022-07-19 Andrew MacLeod + + * gimple-range-cache.cc (ranger_cache::range_from_dom): Check + for incoming ranges on join nodes and add to worklist. + +2022-07-19 Andrew MacLeod + + * gimple-range-cache.cc (ranger_cache::resolve_dom): New. + (ranger_cache::range_from_dom): Put all nodes to be calculated + in the worklist and resolve after the dom walk. + * gimple-range-cache.h (resolve_dom): New prototype. + +2022-07-19 Alexander Monakov + + * tree-cfg.cc (gimple_verify_flow_info): Check placement of + returns_twice calls. + +2022-07-19 Alexander Monakov + + * cfghooks.cc (duplicate_block): Expand comment. + * tree-cfg.cc (gimple_can_duplicate_bb_p): Reject blocks with + calls that may return twice. + +2022-07-19 Alexander Monakov + + * tree-ssa-sink.cc (select_best_block): Punt if selected block + has incoming abnormal edges. + +2022-07-19 Martin Liska + + * doc/extend.texi: Remove trailing :. + +2022-07-19 Prathamesh Kulkarni + + * tree-ssa-forwprop.cc (simplify_permutation): Use lhs type + instead of TREE_TYPE (arg0) as result type in folding VEC_PERM_EXPR. + +2022-07-19 Richard Biener + + PR middle-end/106331 + * builtins.cc (get_memory_rtx): Compute alignment from + the original address and set MEM_OFFSET to unknown when + we create a MEM_EXPR from the base object of the address. + +2022-07-19 Richard Biener + + PR lto/106334 + * dwarf2out.cc (dwarf2out_register_external_die): Allow + map entry re-use during WPA. + +2022-07-19 Roger Sayle + Richard Biener + + PR c/106264 + * builtins.cc (fold_builtin_frexp): Call suppress_warning on + COMPOUND_EXPR to silence spurious warning if result isn't used. + (fold_builtin_modf): Likewise. + (do_mpfr_remquo): Likewise. + +2022-07-19 Takayuki 'January June' Suwa + + * config/xtensa/xtensa.cc (xtensa_rtx_costs): + Change the relative cost of '(set (reg) (const_int N))' where + N fits into signed 12-bit from 4 to 0 if optimizing for size. + And use the appropriate macro instead of the bare number 4. + 2022-07-18 Andrew MacLeod PR tree-optimization/106280 diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index a394c7a..964c55a 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20220719 +20220720 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index 4f010eb..884be16 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,26 @@ +2022-07-19 David Malcolm + + PR analyzer/106321 + * constraint-manager.h (bounded_ranges::get_count): New. + (bounded_ranges::get_range): New. + * engine.cc (impl_region_model_context::on_bounded_ranges): New. + * exploded-graph.h (impl_region_model_context::on_bounded_ranges): + New decl. + * region-model.cc (region_model::apply_constraints_for_gswitch): + Potentially call ctxt->on_bounded_ranges. + * region-model.h (region_model_context::on_bounded_ranges): New + vfunc. + (noop_region_model_context::on_bounded_ranges): New. + (region_model_context_decorator::on_bounded_ranges): New. + * sm-taint.cc: Include "analyzer/constraint-manager.h". + (taint_state_machine::on_bounded_ranges): New. + * sm.h (state_machine::on_bounded_ranges): New. + +2022-07-19 David Malcolm + + * engine.cc (exploded_graph::process_node): Show any description + of the out-edge when logging it for consideration. + 2022-07-15 David Malcolm PR analyzer/106284 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index ce6f8ea..2664940 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,26 @@ +2022-07-19 Patrick Palka + + PR c++/94894 + PR c++/105766 + PR c++/106201 + * call.cc (enum conversion_kind): Add ck_deferred_bad enumerator. + (has_next): Return false for it. + (reference_binding): Return a ck_deferred_bad conversion instead + of an actual bad conversion when LOOKUP_SHORTCUT_BAD_CONVS is set. + Remove now obsolete early exit for the incomplete TO case. + (implicit_conversion_1): Don't mask out LOOKUP_SHORTCUT_BAD_CONVS. + (add_function_candidate): Set LOOKUP_SHORTCUT_BAD_CONVS iff + shortcut_bad_convs. + (missing_conversion_p): Also return true for a ck_deferred_bad + conversion. + * cp-tree.h (LOOKUP_SHORTCUT_BAD_CONVS): Define. + +2022-07-19 Jonathan Wakely + + * cp-objcp-common.cc (names_builtin_p): Return true for + RID_REF_CONSTRUCTS_FROM_TEMPORARY and + RID_REF_CONVERTS_FROM_TEMPORARY. + 2022-07-15 Marek Polacek PR c++/104477 diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index cfe1e6d..9296258 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,12 @@ +2022-07-19 Harald Anlauf + + PR fortran/103590 + * resolve.cc (find_array_spec): Change function result to bool to + enable error recovery. Generate error message for invalid array + reference of non-array entity instead of an internal error. + (gfc_resolve_ref): Use function result from find_array_spec for + error recovery. + 2022-07-15 Steve Kargl PR fortran/104313 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 36913da..d04b83c 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,38 @@ +2022-07-19 Harald Anlauf + + PR fortran/103590 + * gfortran.dg/associate_54.f90: Adjust. + * gfortran.dg/associate_59.f90: New test. + +2022-07-19 Patrick Palka + + PR c++/94894 + PR c++/105766 + PR c++/106201 + * g++.dg/conversion/ref8.C: New test. + * g++.dg/conversion/ref9.C: New test. + +2022-07-19 Alexander Monakov + + * gcc.dg/setjmp-7.c: New test. + +2022-07-19 David Malcolm + + PR analyzer/106321 + * gcc.dg/analyzer/torture/taint-read-index-2.c: Add test coverage + for switch statements. + +2022-07-19 Richard Biener + + PR middle-end/106331 + * gfortran.dg/pr106331.f90: New testcase. + +2022-07-19 Roger Sayle + Richard Biener + + PR c/106264 + * gcc.dg/pr106264.c: New test case. + 2022-07-18 Richard Biener * gcc.dg/tree-ssa/ldist-24.c: XFAIL. -- cgit v1.1 From 68871a008e686dbe56ff0b502f2864176a140716 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Tue, 19 Jul 2022 20:22:18 -0400 Subject: analyzer: don't track string literals in the store [PR106359] Doing so speeds up -fanalyzer from taking over 4 hours to under a minute on the Linux kernel's sound/soc/codecs/cs47l90.c gcc/analyzer/ChangeLog: PR analyzer/106359 * region.h (string_region::tracked_p): New. * store.cc (binding_cluster::binding_cluster): Move here from store.h. Add assertion that base_region is tracked_p. * store.h (binding_cluster::binding_cluster): Move to store.cc. Signed-off-by: David Malcolm --- gcc/analyzer/region.h | 4 ++++ gcc/analyzer/store.cc | 7 +++++++ gcc/analyzer/store.h | 4 +--- 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/analyzer/region.h b/gcc/analyzer/region.h index 60d8149..fd0d4a0 100644 --- a/gcc/analyzer/region.h +++ b/gcc/analyzer/region.h @@ -1151,6 +1151,10 @@ public: void dump_to_pp (pretty_printer *pp, bool simple) const final override; + /* We assume string literals are immutable, so we don't track them in + the store. */ + bool tracked_p () const final override { return false; } + tree get_string_cst () const { return m_string_cst; } private: diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc index 06151d8..e3dabf3 100644 --- a/gcc/analyzer/store.cc +++ b/gcc/analyzer/store.cc @@ -1103,6 +1103,13 @@ binding_map::remove_overlapping_bindings (store_manager *mgr, /* class binding_cluster. */ +binding_cluster::binding_cluster (const region *base_region) +: m_base_region (base_region), m_map (), + m_escaped (false), m_touched (false) +{ + gcc_assert (base_region->tracked_p ()); +} + /* binding_cluster's copy ctor. */ binding_cluster::binding_cluster (const binding_cluster &other) diff --git a/gcc/analyzer/store.h b/gcc/analyzer/store.h index 368b299..9b54c7b 100644 --- a/gcc/analyzer/store.h +++ b/gcc/analyzer/store.h @@ -544,9 +544,7 @@ public: typedef hash_map map_t; typedef map_t::iterator iterator_t; - binding_cluster (const region *base_region) - : m_base_region (base_region), m_map (), - m_escaped (false), m_touched (false) {} + binding_cluster (const region *base_region); binding_cluster (const binding_cluster &other); binding_cluster& operator=(const binding_cluster &other); -- cgit v1.1 From f9d4c3b45c5ed5f45c8089c990dbd4e181929c3d Mon Sep 17 00:00:00 2001 From: liuhongt Date: Tue, 19 Jul 2022 17:24:52 +0800 Subject: Lower complex type move to enable vectorization for complex type load&store. 2022-07-20 Richard Biener Hongtao Liu gcc/ChangeLog: PR tree-optimization/106010 * tree-complex.cc (init_dont_simulate_again): Lower complex type move. (expand_complex_move): Also expand COMPLEX_CST for rhs. gcc/testsuite/ChangeLog: * gcc.target/i386/pr106010-1a.c: New test. * gcc.target/i386/pr106010-1b.c: New test. * gcc.target/i386/pr106010-1c.c: New test. * gcc.target/i386/pr106010-2a.c: New test. * gcc.target/i386/pr106010-2b.c: New test. * gcc.target/i386/pr106010-2c.c: New test. * gcc.target/i386/pr106010-3a.c: New test. * gcc.target/i386/pr106010-3b.c: New test. * gcc.target/i386/pr106010-3c.c: New test. * gcc.target/i386/pr106010-4a.c: New test. * gcc.target/i386/pr106010-4b.c: New test. * gcc.target/i386/pr106010-4c.c: New test. * gcc.target/i386/pr106010-5a.c: New test. * gcc.target/i386/pr106010-5b.c: New test. * gcc.target/i386/pr106010-5c.c: New test. * gcc.target/i386/pr106010-6a.c: New test. * gcc.target/i386/pr106010-6b.c: New test. * gcc.target/i386/pr106010-6c.c: New test. * gcc.target/i386/pr106010-7a.c: New test. * gcc.target/i386/pr106010-7b.c: New test. * gcc.target/i386/pr106010-7c.c: New test. * gcc.target/i386/pr106010-8a.c: New test. * gcc.target/i386/pr106010-8b.c: New test. * gcc.target/i386/pr106010-8c.c: New test. * gcc.target/i386/pr106010-9a.c: New test. * gcc.target/i386/pr106010-9b.c: New test. * gcc.target/i386/pr106010-9c.c: New test. * gcc.target/i386/pr106010-9d.c: New test. --- gcc/testsuite/gcc.target/i386/pr106010-1a.c | 58 ++++++++++ gcc/testsuite/gcc.target/i386/pr106010-1b.c | 63 +++++++++++ gcc/testsuite/gcc.target/i386/pr106010-1c.c | 41 ++++++++ gcc/testsuite/gcc.target/i386/pr106010-2a.c | 82 +++++++++++++++ gcc/testsuite/gcc.target/i386/pr106010-2b.c | 62 +++++++++++ gcc/testsuite/gcc.target/i386/pr106010-2c.c | 47 +++++++++ gcc/testsuite/gcc.target/i386/pr106010-3a.c | 80 ++++++++++++++ gcc/testsuite/gcc.target/i386/pr106010-3b.c | 126 ++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr106010-3c.c | 69 ++++++++++++ gcc/testsuite/gcc.target/i386/pr106010-4a.c | 101 ++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr106010-4b.c | 67 ++++++++++++ gcc/testsuite/gcc.target/i386/pr106010-4c.c | 54 ++++++++++ gcc/testsuite/gcc.target/i386/pr106010-5a.c | 117 +++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr106010-5b.c | 80 ++++++++++++++ gcc/testsuite/gcc.target/i386/pr106010-5c.c | 62 +++++++++++ gcc/testsuite/gcc.target/i386/pr106010-6a.c | 115 ++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr106010-6b.c | 157 ++++++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr106010-6c.c | 80 ++++++++++++++ gcc/testsuite/gcc.target/i386/pr106010-7a.c | 58 ++++++++++ gcc/testsuite/gcc.target/i386/pr106010-7b.c | 63 +++++++++++ gcc/testsuite/gcc.target/i386/pr106010-7c.c | 41 ++++++++ gcc/testsuite/gcc.target/i386/pr106010-8a.c | 58 ++++++++++ gcc/testsuite/gcc.target/i386/pr106010-8b.c | 53 ++++++++++ gcc/testsuite/gcc.target/i386/pr106010-8c.c | 38 +++++++ gcc/testsuite/gcc.target/i386/pr106010-9a.c | 89 ++++++++++++++++ gcc/testsuite/gcc.target/i386/pr106010-9b.c | 90 ++++++++++++++++ gcc/testsuite/gcc.target/i386/pr106010-9c.c | 90 ++++++++++++++++ gcc/testsuite/gcc.target/i386/pr106010-9d.c | 92 ++++++++++++++++ gcc/tree-complex.cc | 9 +- 29 files changed, 2141 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-1b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-1c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-2a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-2b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-2c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-3a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-3b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-3c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-4a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-4b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-4c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-5a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-5b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-5c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-6a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-6b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-6c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-7a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-7b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-7c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-8a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-8b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-8c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-9a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-9b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-9c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr106010-9d.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/i386/pr106010-1a.c b/gcc/testsuite/gcc.target/i386/pr106010-1a.c new file mode 100644 index 0000000..b608f48 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-1a.c @@ -0,0 +1,58 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx -ftree-vectorize -fvect-cost-model=unlimited -fdump-tree-vect-details -mprefer-vector-width=256" } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 6 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 2 "vect" } } */ + +#define N 10000 +void +__attribute__((noipa)) +foo_pd (_Complex double* a, _Complex double* b) +{ + for (int i = 0; i != N; i++) + a[i] = b[i]; +} + +void +__attribute__((noipa)) +foo_ps (_Complex float* a, _Complex float* b) +{ + for (int i = 0; i != N; i++) + a[i] = b[i]; +} + +void +__attribute__((noipa)) +foo_epi64 (_Complex long long* a, _Complex long long* b) +{ + for (int i = 0; i != N; i++) + a[i] = b[i]; +} + +void +__attribute__((noipa)) +foo_epi32 (_Complex int* a, _Complex int* b) +{ + for (int i = 0; i != N; i++) + a[i] = b[i]; +} + +void +__attribute__((noipa)) +foo_epi16 (_Complex short* a, _Complex short* b) +{ + for (int i = 0; i != N; i++) + a[i] = b[i]; +} + +void +__attribute__((noipa)) +foo_epi8 (_Complex char* a, _Complex char* b) +{ + for (int i = 0; i != N; i++) + a[i] = b[i]; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-1b.c b/gcc/testsuite/gcc.target/i386/pr106010-1b.c new file mode 100644 index 0000000..0f377c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-1b.c @@ -0,0 +1,63 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256" } */ +/* { dg-require-effective-target avx } */ + +#include "avx-check.h" +#include +#include "pr106010-1a.c" + +void +avx_test (void) +{ + _Complex double* pd_src = (_Complex double*) malloc (2 * N * sizeof (double)); + _Complex double* pd_dst = (_Complex double*) malloc (2 * N * sizeof (double)); + _Complex float* ps_src = (_Complex float*) malloc (2 * N * sizeof (float)); + _Complex float* ps_dst = (_Complex float*) malloc (2 * N * sizeof (float)); + _Complex long long* epi64_src = (_Complex long long*) malloc (2 * N * sizeof (long long)); + _Complex long long* epi64_dst = (_Complex long long*) malloc (2 * N * sizeof (long long)); + _Complex int* epi32_src = (_Complex int*) malloc (2 * N * sizeof (int)); + _Complex int* epi32_dst = (_Complex int*) malloc (2 * N * sizeof (int)); + _Complex short* epi16_src = (_Complex short*) malloc (2 * N * sizeof (short)); + _Complex short* epi16_dst = (_Complex short*) malloc (2 * N * sizeof (short)); + _Complex char* epi8_src = (_Complex char*) malloc (2 * N * sizeof (char)); + _Complex char* epi8_dst = (_Complex char*) malloc (2 * N * sizeof (char)); + char* p_init = (char*) malloc (2 * N * sizeof (double)); + + __builtin_memset (pd_dst, 0, 2 * N * sizeof (double)); + __builtin_memset (ps_dst, 0, 2 * N * sizeof (float)); + __builtin_memset (epi64_dst, 0, 2 * N * sizeof (long long)); + __builtin_memset (epi32_dst, 0, 2 * N * sizeof (int)); + __builtin_memset (epi16_dst, 0, 2 * N * sizeof (short)); + __builtin_memset (epi8_dst, 0, 2 * N * sizeof (char)); + + for (int i = 0; i != 2 * N * sizeof (double); i++) + p_init[i] = i; + + memcpy (pd_src, p_init, 2 * N * sizeof (double)); + memcpy (ps_src, p_init, 2 * N * sizeof (float)); + memcpy (epi64_src, p_init, 2 * N * sizeof (long long)); + memcpy (epi32_src, p_init, 2 * N * sizeof (int)); + memcpy (epi16_src, p_init, 2 * N * sizeof (short)); + memcpy (epi8_src, p_init, 2 * N * sizeof (char)); + + foo_pd (pd_dst, pd_src); + foo_ps (ps_dst, ps_src); + foo_epi64 (epi64_dst, epi64_src); + foo_epi32 (epi32_dst, epi32_src); + foo_epi16 (epi16_dst, epi16_src); + foo_epi8 (epi8_dst, epi8_src); + if (__builtin_memcmp (pd_dst, pd_src, N * 2 * sizeof (double)) != 0) + __builtin_abort (); + if (__builtin_memcmp (ps_dst, ps_src, N * 2 * sizeof (float)) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi64_dst, epi64_src, N * 2 * sizeof (long long)) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi32_dst, epi32_src, N * 2 * sizeof (int)) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi16_dst, epi16_src, N * 2 * sizeof (short)) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi8_dst, epi8_src, N * 2 * sizeof (char)) != 0) + __builtin_abort (); + + return; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-1c.c b/gcc/testsuite/gcc.target/i386/pr106010-1c.c new file mode 100644 index 0000000..f07e9fb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-1c.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256 -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 2 "vect" } } */ +/* { dg-require-effective-target avx512fp16 } */ + +#include + +static void do_test (void); + +#define DO_TEST do_test +#define AVX512FP16 +#include "avx512-check.h" + +#define N 10000 + +void +__attribute__((noipa)) +foo_ph (_Complex _Float16* a, _Complex _Float16* b) +{ + for (int i = 0; i != N; i++) + a[i] = b[i]; +} + +static void +do_test (void) +{ + _Complex _Float16* ph_src = (_Complex _Float16*) malloc (2 * N * sizeof (_Float16)); + _Complex _Float16* ph_dst = (_Complex _Float16*) malloc (2 * N * sizeof (_Float16)); + char* p_init = (char*) malloc (2 * N * sizeof (_Float16)); + + __builtin_memset (ph_dst, 0, 2 * N * sizeof (_Float16)); + + for (int i = 0; i != 2 * N * sizeof (_Float16); i++) + p_init[i] = i; + + memcpy (ph_src, p_init, 2 * N * sizeof (_Float16)); + + foo_ph (ph_dst, ph_src); + if (__builtin_memcmp (ph_dst, ph_src, N * 2 * sizeof (_Float16)) != 0) + __builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-2a.c b/gcc/testsuite/gcc.target/i386/pr106010-2a.c new file mode 100644 index 0000000..d2e2f8d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-2a.c @@ -0,0 +1,82 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx -ftree-vectorize -fvect-cost-model=unlimited -fdump-tree-slp-details -mprefer-vector-width=256" } */ +/* { dg-final { scan-tree-dump-times "basic block part vectorized using (?:32|64) byte vectors" 6 "slp2" } }*/ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 2 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 2 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 2 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 2 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 2 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 2 "slp2" } } */ + +void +__attribute__((noipa)) +foo_pd (_Complex double* a, _Complex double* __restrict b) +{ + a[0] = b[0]; + a[1] = b[1]; +} + +void +__attribute__((noipa)) +foo_ps (_Complex float* a, _Complex float* __restrict b) +{ + a[0] = b[0]; + a[1] = b[1]; + a[2] = b[2]; + a[3] = b[3]; + +} + +void +__attribute__((noipa)) +foo_epi64 (_Complex long long* a, _Complex long long* __restrict b) +{ + a[0] = b[0]; + a[1] = b[1]; +} + +void +__attribute__((noipa)) +foo_epi32 (_Complex int* a, _Complex int* __restrict b) +{ + a[0] = b[0]; + a[1] = b[1]; + a[2] = b[2]; + a[3] = b[3]; +} + +void +__attribute__((noipa)) +foo_epi16 (_Complex short* a, _Complex short* __restrict b) +{ + a[0] = b[0]; + a[1] = b[1]; + a[2] = b[2]; + a[3] = b[3]; + a[4] = b[4]; + a[5] = b[5]; + a[6] = b[6]; + a[7] = b[7]; +} + +void +__attribute__((noipa)) +foo_epi8 (_Complex char* a, _Complex char* __restrict b) +{ + a[0] = b[0]; + a[1] = b[1]; + a[2] = b[2]; + a[3] = b[3]; + a[4] = b[4]; + a[5] = b[5]; + a[6] = b[6]; + a[7] = b[7]; + a[8] = b[8]; + a[9] = b[9]; + a[10] = b[10]; + a[11] = b[11]; + a[12] = b[12]; + a[13] = b[13]; + a[14] = b[14]; + a[15] = b[15]; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-2b.c b/gcc/testsuite/gcc.target/i386/pr106010-2b.c new file mode 100644 index 0000000..ac36075 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-2b.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256" } */ +/* { dg-require-effective-target avx } */ + +#include "avx-check.h" +#include +#include "pr106010-2a.c" + +void +avx_test (void) +{ + _Complex double* pd_src = (_Complex double*) malloc (32); + _Complex double* pd_dst = (_Complex double*) malloc (32); + _Complex float* ps_src = (_Complex float*) malloc (32); + _Complex float* ps_dst = (_Complex float*) malloc (32); + _Complex long long* epi64_src = (_Complex long long*) malloc (32); + _Complex long long* epi64_dst = (_Complex long long*) malloc (32); + _Complex int* epi32_src = (_Complex int*) malloc (32); + _Complex int* epi32_dst = (_Complex int*) malloc (32); + _Complex short* epi16_src = (_Complex short*) malloc (32); + _Complex short* epi16_dst = (_Complex short*) malloc (32); + _Complex char* epi8_src = (_Complex char*) malloc (32); + _Complex char* epi8_dst = (_Complex char*) malloc (32); + char* p = (char* ) malloc (32); + + __builtin_memset (pd_dst, 0, 32); + __builtin_memset (ps_dst, 0, 32); + __builtin_memset (epi64_dst, 0, 32); + __builtin_memset (epi32_dst, 0, 32); + __builtin_memset (epi16_dst, 0, 32); + __builtin_memset (epi8_dst, 0, 32); + + for (int i = 0; i != 32; i++) + p[i] = i; + __builtin_memcpy (pd_src, p, 32); + __builtin_memcpy (ps_src, p, 32); + __builtin_memcpy (epi64_src, p, 32); + __builtin_memcpy (epi32_src, p, 32); + __builtin_memcpy (epi16_src, p, 32); + __builtin_memcpy (epi8_src, p, 32); + + foo_pd (pd_dst, pd_src); + foo_ps (ps_dst, ps_src); + foo_epi64 (epi64_dst, epi64_src); + foo_epi32 (epi32_dst, epi32_src); + foo_epi16 (epi16_dst, epi16_src); + foo_epi8 (epi8_dst, epi8_src); + if (__builtin_memcmp (pd_dst, pd_src, 32) != 0) + __builtin_abort (); + if (__builtin_memcmp (ps_dst, ps_src, 32) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi64_dst, epi64_src, 32) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi32_dst, epi32_src, 32) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi16_dst, epi16_src, 32) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi16_dst, epi16_src, 32) != 0) + __builtin_abort (); + + return; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-2c.c b/gcc/testsuite/gcc.target/i386/pr106010-2c.c new file mode 100644 index 0000000..a002f20 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-2c.c @@ -0,0 +1,47 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256 -fdump-tree-slp-details" } */ +/* { dg-require-effective-target avx512fp16 } */ + +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 2 "slp2" } } */ +/* { dg-final { scan-tree-dump-times "basic block part vectorized using (?:32|64) byte vectors" 1 "slp2" } }*/ + +#include + +static void do_test (void); +#define DO_TEST do_test +#define AVX512FP16 +#include "avx512-check.h" + +void +__attribute__((noipa)) +foo_ph (_Complex _Float16* a, _Complex _Float16* __restrict b) +{ + a[0] = b[0]; + a[1] = b[1]; + a[2] = b[2]; + a[3] = b[3]; + a[4] = b[4]; + a[5] = b[5]; + a[6] = b[6]; + a[7] = b[7]; +} + +void +do_test (void) +{ + _Complex _Float16* ph_src = (_Complex _Float16*) malloc (32); + _Complex _Float16* ph_dst = (_Complex _Float16*) malloc (32); + char* p = (char* ) malloc (32); + + __builtin_memset (ph_dst, 0, 32); + + for (int i = 0; i != 32; i++) + p[i] = i; + __builtin_memcpy (ph_src, p, 32); + + foo_ph (ph_dst, ph_src); + if (__builtin_memcmp (ph_dst, ph_src, 32) != 0) + __builtin_abort (); + + return; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-3a.c b/gcc/testsuite/gcc.target/i386/pr106010-3a.c new file mode 100644 index 0000000..c1b64b5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-3a.c @@ -0,0 +1,80 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx2 -ftree-vectorize -fvect-cost-model=unlimited -fdump-tree-slp-details" } */ +/* { dg-final { scan-tree-dump-times "basic block part vectorized using (?:32|64) byte vectors" 6 "slp2" } }*/ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*VEC_PERM_EXPR.*\{ 2, 3, 0, 1 \}} 2 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*VEC_PERM_EXPR.*\{ 6, 7, 4, 5, 2, 3, 0, 1 \}} 1 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*VEC_PERM_EXPR.*\{ 2, 3, 0, 1, 6, 7, 4, 5 \}} 1 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*VEC_PERM_EXPR.*\{ 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1 \}} 1 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*VEC_PERM_EXPR.*\{ 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 30, 31, 28, 29, 26, 27, 24, 25, 22, 23, 20, 21, 18, 19, 16, 17 \}} 1 "slp2" } } */ + +void +__attribute__((noipa)) +foo_pd (_Complex double* a, _Complex double* __restrict b) +{ + a[0] = b[1]; + a[1] = b[0]; +} + +void +__attribute__((noipa)) +foo_ps (_Complex float* a, _Complex float* __restrict b) +{ + a[0] = b[1]; + a[1] = b[0]; + a[2] = b[3]; + a[3] = b[2]; +} + +void +__attribute__((noipa)) +foo_epi64 (_Complex long long* a, _Complex long long* __restrict b) +{ + a[0] = b[1]; + a[1] = b[0]; +} + +void +__attribute__((noipa)) +foo_epi32 (_Complex int* a, _Complex int* __restrict b) +{ + a[0] = b[3]; + a[1] = b[2]; + a[2] = b[1]; + a[3] = b[0]; +} + +void +__attribute__((noipa)) +foo_epi16 (_Complex short* a, _Complex short* __restrict b) +{ + a[0] = b[7]; + a[1] = b[6]; + a[2] = b[5]; + a[3] = b[4]; + a[4] = b[3]; + a[5] = b[2]; + a[6] = b[1]; + a[7] = b[0]; +} + +void +__attribute__((noipa)) +foo_epi8 (_Complex char* a, _Complex char* __restrict b) +{ + a[0] = b[7]; + a[1] = b[6]; + a[2] = b[5]; + a[3] = b[4]; + a[4] = b[3]; + a[5] = b[2]; + a[6] = b[1]; + a[7] = b[0]; + a[8] = b[15]; + a[9] = b[14]; + a[10] = b[13]; + a[11] = b[12]; + a[12] = b[11]; + a[13] = b[10]; + a[14] = b[9]; + a[15] = b[8]; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-3b.c b/gcc/testsuite/gcc.target/i386/pr106010-3b.c new file mode 100644 index 0000000..e4fa3f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-3b.c @@ -0,0 +1,126 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx2 -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256" } */ +/* { dg-require-effective-target avx2 } */ + +#include "avx2-check.h" +#include +#include "pr106010-3a.c" + +void +avx2_test (void) +{ + _Complex double* pd_src = (_Complex double*) malloc (32); + _Complex double* pd_dst = (_Complex double*) malloc (32); + _Complex double* pd_exp = (_Complex double*) malloc (32); + _Complex float* ps_src = (_Complex float*) malloc (32); + _Complex float* ps_dst = (_Complex float*) malloc (32); + _Complex float* ps_exp = (_Complex float*) malloc (32); + _Complex long long* epi64_src = (_Complex long long*) malloc (32); + _Complex long long* epi64_dst = (_Complex long long*) malloc (32); + _Complex long long* epi64_exp = (_Complex long long*) malloc (32); + _Complex int* epi32_src = (_Complex int*) malloc (32); + _Complex int* epi32_dst = (_Complex int*) malloc (32); + _Complex int* epi32_exp = (_Complex int*) malloc (32); + _Complex short* epi16_src = (_Complex short*) malloc (32); + _Complex short* epi16_dst = (_Complex short*) malloc (32); + _Complex short* epi16_exp = (_Complex short*) malloc (32); + _Complex char* epi8_src = (_Complex char*) malloc (32); + _Complex char* epi8_dst = (_Complex char*) malloc (32); + _Complex char* epi8_exp = (_Complex char*) malloc (32); + char* p = (char* ) malloc (32); + char* q = (char* ) malloc (32); + + __builtin_memset (pd_dst, 0, 32); + __builtin_memset (ps_dst, 0, 32); + __builtin_memset (epi64_dst, 0, 32); + __builtin_memset (epi32_dst, 0, 32); + __builtin_memset (epi16_dst, 0, 32); + __builtin_memset (epi8_dst, 0, 32); + + for (int i = 0; i != 32; i++) + p[i] = i; + __builtin_memcpy (pd_src, p, 32); + __builtin_memcpy (ps_src, p, 32); + __builtin_memcpy (epi64_src, p, 32); + __builtin_memcpy (epi32_src, p, 32); + __builtin_memcpy (epi16_src, p, 32); + __builtin_memcpy (epi8_src, p, 32); + + for (int i = 0; i != 16; i++) + { + p[i] = i + 16; + p[i + 16] = i; + } + __builtin_memcpy (pd_exp, p, 32); + __builtin_memcpy (epi64_exp, p, 32); + + for (int i = 0; i != 8; i++) + { + p[i] = i + 8; + p[i + 8] = i; + p[i + 16] = i + 24; + p[i + 24] = i + 16; + q[i] = i + 24; + q[i + 8] = i + 16; + q[i + 16] = i + 8; + q[i + 24] = i; + } + __builtin_memcpy (ps_exp, p, 32); + __builtin_memcpy (epi32_exp, q, 32); + + + for (int i = 0; i != 4; i++) + { + q[i] = i + 28; + q[i + 4] = i + 24; + q[i + 8] = i + 20; + q[i + 12] = i + 16; + q[i + 16] = i + 12; + q[i + 20] = i + 8; + q[i + 24] = i + 4; + q[i + 28] = i; + } + __builtin_memcpy (epi16_exp, q, 32); + + for (int i = 0; i != 2; i++) + { + q[i] = i + 14; + q[i + 2] = i + 12; + q[i + 4] = i + 10; + q[i + 6] = i + 8; + q[i + 8] = i + 6; + q[i + 10] = i + 4; + q[i + 12] = i + 2; + q[i + 14] = i; + q[i + 16] = i + 30; + q[i + 18] = i + 28; + q[i + 20] = i + 26; + q[i + 22] = i + 24; + q[i + 24] = i + 22; + q[i + 26] = i + 20; + q[i + 28] = i + 18; + q[i + 30] = i + 16; + } + __builtin_memcpy (epi8_exp, q, 32); + + foo_pd (pd_dst, pd_src); + foo_ps (ps_dst, ps_src); + foo_epi64 (epi64_dst, epi64_src); + foo_epi32 (epi32_dst, epi32_src); + foo_epi16 (epi16_dst, epi16_src); + foo_epi8 (epi8_dst, epi8_src); + if (__builtin_memcmp (pd_dst, pd_exp, 32) != 0) + __builtin_abort (); + if (__builtin_memcmp (ps_dst, ps_exp, 32) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi64_dst, epi64_exp, 32) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi32_dst, epi32_exp, 32) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi16_dst, epi16_exp, 32) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi8_dst, epi8_exp, 32) != 0) + __builtin_abort (); + + return; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-3c.c b/gcc/testsuite/gcc.target/i386/pr106010-3c.c new file mode 100644 index 0000000..5a5a3d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-3c.c @@ -0,0 +1,69 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256 -fdump-tree-slp-details" } */ +/* { dg-require-effective-target avx512fp16 } */ +/* { dg-final { scan-tree-dump-times "basic block part vectorized using (?:32|64) byte vectors" 1 "slp2" } }*/ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*VEC_PERM_EXPR.*\{ 2, 3, 0, 1, 8, 9, 6, 7, 14, 15, 12, 13, 4, 5, 10, 11 \}} 1 "slp2" } } */ + +#include + +static void do_test (void); +#define DO_TEST do_test +#define AVX512FP16 +#include "avx512-check.h" + +void +__attribute__((noipa)) +foo_ph (_Complex _Float16* a, _Complex _Float16* __restrict b) +{ + a[0] = b[1]; + a[1] = b[0]; + a[2] = b[4]; + a[3] = b[3]; + a[4] = b[7]; + a[5] = b[6]; + a[6] = b[2]; + a[7] = b[5]; +} + +void +do_test (void) +{ + _Complex _Float16* ph_src = (_Complex _Float16*) malloc (32); + _Complex _Float16* ph_dst = (_Complex _Float16*) malloc (32); + _Complex _Float16* ph_exp = (_Complex _Float16*) malloc (32); + char* p = (char* ) malloc (32); + char* q = (char* ) malloc (32); + + __builtin_memset (ph_dst, 0, 32); + + for (int i = 0; i != 32; i++) + p[i] = i; + __builtin_memcpy (ph_src, p, 32); + + for (int i = 0; i != 4; i++) + { + p[i] = i + 4; + p[i + 4] = i; + p[i + 8] = i + 16; + p[i + 12] = i + 12; + p[i + 16] = i + 28; + p[i + 20] = i + 24; + p[i + 24] = i + 8; + p[i + 28] = i + 20; + q[i] = i + 28; + q[i + 4] = i + 24; + q[i + 8] = i + 20; + q[i + 12] = i + 16; + q[i + 16] = i + 12; + q[i + 20] = i + 8; + q[i + 24] = i + 4; + q[i + 28] = i; + } + __builtin_memcpy (ph_exp, p, 32); + + foo_ph (ph_dst, ph_src); + if (__builtin_memcmp (ph_dst, ph_exp, 32) != 0) + __builtin_abort (); + + return; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-4a.c b/gcc/testsuite/gcc.target/i386/pr106010-4a.c new file mode 100644 index 0000000..b7b0b53 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-4a.c @@ -0,0 +1,101 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx -ftree-vectorize -fvect-cost-model=unlimited -fdump-tree-slp-details" } */ +/* { dg-final { scan-tree-dump-times "basic block part vectorized using (?:32|64) byte vectors" 6 "slp2" } }*/ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "slp2" } } */ + +void +__attribute__((noipa)) +foo_pd (_Complex double* a, + _Complex double b1, + _Complex double b2) +{ + a[0] = b1; + a[1] = b2; +} + +void +__attribute__((noipa)) +foo_ps (_Complex float* a, + _Complex float b1, _Complex float b2, + _Complex float b3, _Complex float b4) +{ + a[0] = b1; + a[1] = b2; + a[2] = b3; + a[3] = b4; +} + +void +__attribute__((noipa)) +foo_epi64 (_Complex long long* a, + _Complex long long b1, + _Complex long long b2) +{ + a[0] = b1; + a[1] = b2; +} + +void +__attribute__((noipa)) +foo_epi32 (_Complex int* a, + _Complex int b1, _Complex int b2, + _Complex int b3, _Complex int b4) +{ + a[0] = b1; + a[1] = b2; + a[2] = b3; + a[3] = b4; +} + +void +__attribute__((noipa)) +foo_epi16 (_Complex short* a, + _Complex short b1, _Complex short b2, + _Complex short b3, _Complex short b4, + _Complex short b5, _Complex short b6, + _Complex short b7,_Complex short b8) +{ + a[0] = b1; + a[1] = b2; + a[2] = b3; + a[3] = b4; + a[4] = b5; + a[5] = b6; + a[6] = b7; + a[7] = b8; +} + +void +__attribute__((noipa)) +foo_epi8 (_Complex char* a, + _Complex char b1, _Complex char b2, + _Complex char b3, _Complex char b4, + _Complex char b5, _Complex char b6, + _Complex char b7,_Complex char b8, + _Complex char b9, _Complex char b10, + _Complex char b11, _Complex char b12, + _Complex char b13, _Complex char b14, + _Complex char b15,_Complex char b16) +{ + a[0] = b1; + a[1] = b2; + a[2] = b3; + a[3] = b4; + a[4] = b5; + a[5] = b6; + a[6] = b7; + a[7] = b8; + a[8] = b9; + a[9] = b10; + a[10] = b11; + a[11] = b12; + a[12] = b13; + a[13] = b14; + a[14] = b15; + a[15] = b16; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-4b.c b/gcc/testsuite/gcc.target/i386/pr106010-4b.c new file mode 100644 index 0000000..e2e7950 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-4b.c @@ -0,0 +1,67 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256" } */ +/* { dg-require-effective-target avx } */ + +#include "avx-check.h" +#include +#include "pr106010-4a.c" + +void +avx_test (void) +{ + _Complex double* pd_src = (_Complex double*) malloc (32); + _Complex double* pd_dst = (_Complex double*) malloc (32); + _Complex float* ps_src = (_Complex float*) malloc (32); + _Complex float* ps_dst = (_Complex float*) malloc (32); + _Complex long long* epi64_src = (_Complex long long*) malloc (32); + _Complex long long* epi64_dst = (_Complex long long*) malloc (32); + _Complex int* epi32_src = (_Complex int*) malloc (32); + _Complex int* epi32_dst = (_Complex int*) malloc (32); + _Complex short* epi16_src = (_Complex short*) malloc (32); + _Complex short* epi16_dst = (_Complex short*) malloc (32); + _Complex char* epi8_src = (_Complex char*) malloc (32); + _Complex char* epi8_dst = (_Complex char*) malloc (32); + char* p = (char* ) malloc (32); + + __builtin_memset (pd_dst, 0, 32); + __builtin_memset (ps_dst, 0, 32); + __builtin_memset (epi64_dst, 0, 32); + __builtin_memset (epi32_dst, 0, 32); + __builtin_memset (epi16_dst, 0, 32); + __builtin_memset (epi8_dst, 0, 32); + + for (int i = 0; i != 32; i++) + p[i] = i; + __builtin_memcpy (pd_src, p, 32); + __builtin_memcpy (ps_src, p, 32); + __builtin_memcpy (epi64_src, p, 32); + __builtin_memcpy (epi32_src, p, 32); + __builtin_memcpy (epi16_src, p, 32); + __builtin_memcpy (epi8_src, p, 32); + + foo_pd (pd_dst, pd_src[0], pd_src[1]); + foo_ps (ps_dst, ps_src[0], ps_src[1], ps_src[2], ps_src[3]); + foo_epi64 (epi64_dst, epi64_src[0], epi64_src[1]); + foo_epi32 (epi32_dst, epi32_src[0], epi32_src[1], epi32_src[2], epi32_src[3]); + foo_epi16 (epi16_dst, epi16_src[0], epi16_src[1], epi16_src[2], epi16_src[3], + epi16_src[4], epi16_src[5], epi16_src[6], epi16_src[7]); + foo_epi8 (epi8_dst, epi8_src[0], epi8_src[1], epi8_src[2], epi8_src[3], + epi8_src[4], epi8_src[5], epi8_src[6], epi8_src[7], + epi8_src[8], epi8_src[9], epi8_src[10], epi8_src[11], + epi8_src[12], epi8_src[13], epi8_src[14], epi8_src[15]); + + if (__builtin_memcmp (pd_dst, pd_src, 32) != 0) + __builtin_abort (); + if (__builtin_memcmp (ps_dst, ps_src, 32) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi64_dst, epi64_src, 32) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi32_dst, epi32_src, 32) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi16_dst, epi16_src, 32) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi8_dst, epi8_src, 32) != 0) + __builtin_abort (); + + return; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-4c.c b/gcc/testsuite/gcc.target/i386/pr106010-4c.c new file mode 100644 index 0000000..8e02aef --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-4c.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -fdump-tree-slp-details -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256" } */ +/* { dg-require-effective-target avx512fp16 } */ +/* { dg-final { scan-tree-dump-times "basic block part vectorized using (?:32|64) byte vectors" 1 "slp2" } }*/ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "slp2" } } */ + +#include + +static void do_test (void); +#define DO_TEST do_test +#define AVX512FP16 +#include "avx512-check.h" + +void +__attribute__((noipa)) +foo_ph (_Complex _Float16* a, + _Complex _Float16 b1, _Complex _Float16 b2, + _Complex _Float16 b3, _Complex _Float16 b4, + _Complex _Float16 b5, _Complex _Float16 b6, + _Complex _Float16 b7,_Complex _Float16 b8) +{ + a[0] = b1; + a[1] = b2; + a[2] = b3; + a[3] = b4; + a[4] = b5; + a[5] = b6; + a[6] = b7; + a[7] = b8; +} + +void +do_test (void) +{ + + _Complex _Float16* ph_src = (_Complex _Float16*) malloc (32); + _Complex _Float16* ph_dst = (_Complex _Float16*) malloc (32); + + char* p = (char* ) malloc (32); + + __builtin_memset (ph_dst, 0, 32); + + for (int i = 0; i != 32; i++) + p[i] = i; + + __builtin_memcpy (ph_src, p, 32); + + foo_ph (ph_dst, ph_src[0], ph_src[1], ph_src[2], ph_src[3], + ph_src[4], ph_src[5], ph_src[6], ph_src[7]); + + if (__builtin_memcmp (ph_dst, ph_src, 32) != 0) + __builtin_abort (); + return; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-5a.c b/gcc/testsuite/gcc.target/i386/pr106010-5a.c new file mode 100644 index 0000000..9d4a6f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-5a.c @@ -0,0 +1,117 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx -ftree-vectorize -fvect-cost-model=unlimited -fdump-tree-slp-details -mprefer-vector-width=256" } */ +/* { dg-final { scan-tree-dump-times "basic block part vectorized using (?:32|64) byte vectors" 6 "slp2" } }*/ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 4 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 4 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 4 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 4 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 4 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 4 "slp2" } } */ + +void +__attribute__((noipa)) +foo_pd (_Complex double* a, _Complex double* __restrict b) +{ + a[0] = b[2]; + a[1] = b[3]; + a[2] = b[0]; + a[3] = b[1]; +} + +void +__attribute__((noipa)) +foo_ps (_Complex float* a, _Complex float* __restrict b) +{ + a[0] = b[4]; + a[1] = b[5]; + a[2] = b[6]; + a[3] = b[7]; + a[4] = b[0]; + a[5] = b[1]; + a[6] = b[2]; + a[7] = b[3]; +} + +void +__attribute__((noipa)) +foo_epi64 (_Complex long long* a, _Complex long long* __restrict b) +{ + a[0] = b[2]; + a[1] = b[3]; + a[2] = b[0]; + a[3] = b[1]; +} + +void +__attribute__((noipa)) +foo_epi32 (_Complex int* a, _Complex int* __restrict b) +{ + a[0] = b[4]; + a[1] = b[5]; + a[2] = b[6]; + a[3] = b[7]; + a[4] = b[0]; + a[5] = b[1]; + a[6] = b[2]; + a[7] = b[3]; +} + +void +__attribute__((noipa)) +foo_epi16 (_Complex short* a, _Complex short* __restrict b) +{ + a[0] = b[8]; + a[1] = b[9]; + a[2] = b[10]; + a[3] = b[11]; + a[4] = b[12]; + a[5] = b[13]; + a[6] = b[14]; + a[7] = b[15]; + a[8] = b[0]; + a[9] = b[1]; + a[10] = b[2]; + a[11] = b[3]; + a[12] = b[4]; + a[13] = b[5]; + a[14] = b[6]; + a[15] = b[7]; +} + +void +__attribute__((noipa)) +foo_epi8 (_Complex char* a, _Complex char* __restrict b) +{ + a[0] = b[16]; + a[1] = b[17]; + a[2] = b[18]; + a[3] = b[19]; + a[4] = b[20]; + a[5] = b[21]; + a[6] = b[22]; + a[7] = b[23]; + a[8] = b[24]; + a[9] = b[25]; + a[10] = b[26]; + a[11] = b[27]; + a[12] = b[28]; + a[13] = b[29]; + a[14] = b[30]; + a[15] = b[31]; + a[16] = b[0]; + a[17] = b[1]; + a[18] = b[2]; + a[19] = b[3]; + a[20] = b[4]; + a[21] = b[5]; + a[22] = b[6]; + a[23] = b[7]; + a[24] = b[8]; + a[25] = b[9]; + a[26] = b[10]; + a[27] = b[11]; + a[28] = b[12]; + a[29] = b[13]; + a[30] = b[14]; + a[31] = b[15]; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-5b.c b/gcc/testsuite/gcc.target/i386/pr106010-5b.c new file mode 100644 index 0000000..d5c6ebe --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-5b.c @@ -0,0 +1,80 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256" } */ +/* { dg-require-effective-target avx } */ + +#include "avx-check.h" +#include +#include "pr106010-5a.c" + +void +avx_test (void) +{ + _Complex double* pd_src = (_Complex double*) malloc (64); + _Complex double* pd_dst = (_Complex double*) malloc (64); + _Complex double* pd_exp = (_Complex double*) malloc (64); + _Complex float* ps_src = (_Complex float*) malloc (64); + _Complex float* ps_dst = (_Complex float*) malloc (64); + _Complex float* ps_exp = (_Complex float*) malloc (64); + _Complex long long* epi64_src = (_Complex long long*) malloc (64); + _Complex long long* epi64_dst = (_Complex long long*) malloc (64); + _Complex long long* epi64_exp = (_Complex long long*) malloc (64); + _Complex int* epi32_src = (_Complex int*) malloc (64); + _Complex int* epi32_dst = (_Complex int*) malloc (64); + _Complex int* epi32_exp = (_Complex int*) malloc (64); + _Complex short* epi16_src = (_Complex short*) malloc (64); + _Complex short* epi16_dst = (_Complex short*) malloc (64); + _Complex short* epi16_exp = (_Complex short*) malloc (64); + _Complex char* epi8_src = (_Complex char*) malloc (64); + _Complex char* epi8_dst = (_Complex char*) malloc (64); + _Complex char* epi8_exp = (_Complex char*) malloc (64); + char* p = (char* ) malloc (64); + char* q = (char* ) malloc (64); + + __builtin_memset (pd_dst, 0, 64); + __builtin_memset (ps_dst, 0, 64); + __builtin_memset (epi64_dst, 0, 64); + __builtin_memset (epi32_dst, 0, 64); + __builtin_memset (epi16_dst, 0, 64); + __builtin_memset (epi8_dst, 0, 64); + + for (int i = 0; i != 64; i++) + { + p[i] = i; + q[i] = (i + 32) % 64; + } + __builtin_memcpy (pd_src, p, 64); + __builtin_memcpy (ps_src, p, 64); + __builtin_memcpy (epi64_src, p, 64); + __builtin_memcpy (epi32_src, p, 64); + __builtin_memcpy (epi16_src, p, 64); + __builtin_memcpy (epi8_src, p, 64); + + __builtin_memcpy (pd_exp, q, 64); + __builtin_memcpy (ps_exp, q, 64); + __builtin_memcpy (epi64_exp, q, 64); + __builtin_memcpy (epi32_exp, q, 64); + __builtin_memcpy (epi16_exp, q, 64); + __builtin_memcpy (epi8_exp, q, 64); + + foo_pd (pd_dst, pd_src); + foo_ps (ps_dst, ps_src); + foo_epi64 (epi64_dst, epi64_src); + foo_epi32 (epi32_dst, epi32_src); + foo_epi16 (epi16_dst, epi16_src); + foo_epi8 (epi8_dst, epi8_src); + + if (__builtin_memcmp (pd_dst, pd_exp, 64) != 0) + __builtin_abort (); + if (__builtin_memcmp (ps_dst, ps_exp, 64) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi64_dst, epi64_exp, 64) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi32_dst, epi32_exp, 64) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi16_dst, epi16_exp, 64) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi8_dst, epi8_exp, 64) != 0) + __builtin_abort (); + + return; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-5c.c b/gcc/testsuite/gcc.target/i386/pr106010-5c.c new file mode 100644 index 0000000..9ce4e6d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-5c.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ftree-vectorize -fvect-cost-model=unlimited -fdump-tree-slp-details -mprefer-vector-width=256" } */ +/* { dg-require-effective-target avx512fp16 } */ +/* { dg-final { scan-tree-dump-times "basic block part vectorized using (?:32|64) byte vectors" 1 "slp2" } }*/ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 4 "slp2" } } */ + +#include + +static void do_test (void); +#define DO_TEST do_test +#define AVX512FP16 +#include "avx512-check.h" + +void +__attribute__((noipa)) +foo_ph (_Complex _Float16* a, _Complex _Float16* __restrict b) +{ + a[0] = b[8]; + a[1] = b[9]; + a[2] = b[10]; + a[3] = b[11]; + a[4] = b[12]; + a[5] = b[13]; + a[6] = b[14]; + a[7] = b[15]; + a[8] = b[0]; + a[9] = b[1]; + a[10] = b[2]; + a[11] = b[3]; + a[12] = b[4]; + a[13] = b[5]; + a[14] = b[6]; + a[15] = b[7]; +} + +void +do_test (void) +{ + _Complex _Float16* ph_src = (_Complex _Float16*) malloc (64); + _Complex _Float16* ph_dst = (_Complex _Float16*) malloc (64); + _Complex _Float16* ph_exp = (_Complex _Float16*) malloc (64); + char* p = (char* ) malloc (64); + char* q = (char* ) malloc (64); + + __builtin_memset (ph_dst, 0, 64); + + for (int i = 0; i != 64; i++) + { + p[i] = i; + q[i] = (i + 32) % 64; + } + __builtin_memcpy (ph_src, p, 64); + + __builtin_memcpy (ph_exp, q, 64); + + foo_ph (ph_dst, ph_src); + + if (__builtin_memcmp (ph_dst, ph_exp, 64) != 0) + __builtin_abort (); + + return; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-6a.c b/gcc/testsuite/gcc.target/i386/pr106010-6a.c new file mode 100644 index 0000000..65a90d0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-6a.c @@ -0,0 +1,115 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx2 -ftree-vectorize -fvect-cost-model=unlimited -fdump-tree-slp-details -mprefer-vector-width=256" } */ +/* { dg-final { scan-tree-dump-times "basic block part vectorized using (?:32|64) byte vectors" 6 "slp2" } }*/ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*VEC_PERM_EXPR.*\{ 2, 3, 0, 1 \}} 4 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*VEC_PERM_EXPR.*\{ 6, 7, 4, 5, 2, 3, 0, 1 \}} 4 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*VEC_PERM_EXPR.*\{ 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1 \}} 2 "slp2" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*VEC_PERM_EXPR.*\{ 30, 31, 28, 29, 26, 27, 24, 25, 22, 23, 20, 21, 18, 19, 16, 17, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1 \}} 2 "slp2" } } */ + +void +__attribute__((noipa)) +foo_pd (_Complex double* a, _Complex double* __restrict b) +{ + a[0] = b[3]; + a[1] = b[2]; + a[2] = b[1]; + a[3] = b[0]; +} + +void +__attribute__((noipa)) +foo_ps (_Complex float* a, _Complex float* __restrict b) +{ + a[0] = b[7]; + a[1] = b[6]; + a[2] = b[5]; + a[3] = b[4]; + a[4] = b[3]; + a[5] = b[2]; + a[6] = b[1]; + a[7] = b[0]; +} + +void +__attribute__((noipa)) +foo_epi64 (_Complex long long* a, _Complex long long* __restrict b) +{ + a[0] = b[3]; + a[1] = b[2]; + a[2] = b[1]; + a[3] = b[0]; +} + +void +__attribute__((noipa)) +foo_epi32 (_Complex int* a, _Complex int* __restrict b) +{ + a[0] = b[7]; + a[1] = b[6]; + a[2] = b[5]; + a[3] = b[4]; + a[4] = b[3]; + a[5] = b[2]; + a[6] = b[1]; + a[7] = b[0]; +} + +void +__attribute__((noipa)) +foo_epi16 (_Complex short* a, _Complex short* __restrict b) +{ + a[0] = b[15]; + a[1] = b[14]; + a[2] = b[13]; + a[3] = b[12]; + a[4] = b[11]; + a[5] = b[10]; + a[6] = b[9]; + a[7] = b[8]; + a[8] = b[7]; + a[9] = b[6]; + a[10] = b[5]; + a[11] = b[4]; + a[12] = b[3]; + a[13] = b[2]; + a[14] = b[1]; + a[15] = b[0]; +} + +void +__attribute__((noipa)) +foo_epi8 (_Complex char* a, _Complex char* __restrict b) +{ + a[0] = b[31]; + a[1] = b[30]; + a[2] = b[29]; + a[3] = b[28]; + a[4] = b[27]; + a[5] = b[26]; + a[6] = b[25]; + a[7] = b[24]; + a[8] = b[23]; + a[9] = b[22]; + a[10] = b[21]; + a[11] = b[20]; + a[12] = b[19]; + a[13] = b[18]; + a[14] = b[17]; + a[15] = b[16]; + a[16] = b[15]; + a[17] = b[14]; + a[18] = b[13]; + a[19] = b[12]; + a[20] = b[11]; + a[21] = b[10]; + a[22] = b[9]; + a[23] = b[8]; + a[24] = b[7]; + a[25] = b[6]; + a[26] = b[5]; + a[27] = b[4]; + a[28] = b[3]; + a[29] = b[2]; + a[30] = b[1]; + a[31] = b[0]; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-6b.c b/gcc/testsuite/gcc.target/i386/pr106010-6b.c new file mode 100644 index 0000000..1c5bb02 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-6b.c @@ -0,0 +1,157 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx2 -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256" } */ +/* { dg-require-effective-target avx2 } */ + +#include "avx2-check.h" +#include +#include "pr106010-6a.c" + +void +avx2_test (void) +{ + _Complex double* pd_src = (_Complex double*) malloc (64); + _Complex double* pd_dst = (_Complex double*) malloc (64); + _Complex double* pd_exp = (_Complex double*) malloc (64); + _Complex float* ps_src = (_Complex float*) malloc (64); + _Complex float* ps_dst = (_Complex float*) malloc (64); + _Complex float* ps_exp = (_Complex float*) malloc (64); + _Complex long long* epi64_src = (_Complex long long*) malloc (64); + _Complex long long* epi64_dst = (_Complex long long*) malloc (64); + _Complex long long* epi64_exp = (_Complex long long*) malloc (64); + _Complex int* epi32_src = (_Complex int*) malloc (64); + _Complex int* epi32_dst = (_Complex int*) malloc (64); + _Complex int* epi32_exp = (_Complex int*) malloc (64); + _Complex short* epi16_src = (_Complex short*) malloc (64); + _Complex short* epi16_dst = (_Complex short*) malloc (64); + _Complex short* epi16_exp = (_Complex short*) malloc (64); + _Complex char* epi8_src = (_Complex char*) malloc (64); + _Complex char* epi8_dst = (_Complex char*) malloc (64); + _Complex char* epi8_exp = (_Complex char*) malloc (64); + char* p = (char* ) malloc (64); + char* q = (char* ) malloc (64); + + __builtin_memset (pd_dst, 0, 64); + __builtin_memset (ps_dst, 0, 64); + __builtin_memset (epi64_dst, 0, 64); + __builtin_memset (epi32_dst, 0, 64); + __builtin_memset (epi16_dst, 0, 64); + __builtin_memset (epi8_dst, 0, 64); + + for (int i = 0; i != 64; i++) + p[i] = i; + + __builtin_memcpy (pd_src, p, 64); + __builtin_memcpy (ps_src, p, 64); + __builtin_memcpy (epi64_src, p, 64); + __builtin_memcpy (epi32_src, p, 64); + __builtin_memcpy (epi16_src, p, 64); + __builtin_memcpy (epi8_src, p, 64); + + + for (int i = 0; i != 16; i++) + { + q[i] = i + 48; + q[i + 16] = i + 32; + q[i + 32] = i + 16; + q[i + 48] = i; + } + + __builtin_memcpy (pd_exp, q, 64); + __builtin_memcpy (epi64_exp, q, 64); + + for (int i = 0; i != 8; i++) + { + q[i] = i + 56; + q[i + 8] = i + 48; + q[i + 16] = i + 40; + q[i + 24] = i + 32; + q[i + 32] = i + 24; + q[i + 40] = i + 16; + q[i + 48] = i + 8; + q[i + 56] = i; + } + + __builtin_memcpy (ps_exp, q, 64); + __builtin_memcpy (epi32_exp, q, 64); + + for (int i = 0; i != 4; i++) + { + q[i] = i + 60; + q[i + 4] = i + 56; + q[i + 8] = i + 52; + q[i + 12] = i + 48; + q[i + 16] = i + 44; + q[i + 20] = i + 40; + q[i + 24] = i + 36; + q[i + 28] = i + 32; + q[i + 32] = i + 28; + q[i + 36] = i + 24; + q[i + 40] = i + 20; + q[i + 44] = i + 16; + q[i + 48] = i + 12; + q[i + 52] = i + 8; + q[i + 56] = i + 4; + q[i + 60] = i; + } + + __builtin_memcpy (epi16_exp, q, 64); + + for (int i = 0; i != 2; i++) + { + q[i] = i + 62; + q[i + 2] = i + 60; + q[i + 4] = i + 58; + q[i + 6] = i + 56; + q[i + 8] = i + 54; + q[i + 10] = i + 52; + q[i + 12] = i + 50; + q[i + 14] = i + 48; + q[i + 16] = i + 46; + q[i + 18] = i + 44; + q[i + 20] = i + 42; + q[i + 22] = i + 40; + q[i + 24] = i + 38; + q[i + 26] = i + 36; + q[i + 28] = i + 34; + q[i + 30] = i + 32; + q[i + 32] = i + 30; + q[i + 34] = i + 28; + q[i + 36] = i + 26; + q[i + 38] = i + 24; + q[i + 40] = i + 22; + q[i + 42] = i + 20; + q[i + 44] = i + 18; + q[i + 46] = i + 16; + q[i + 48] = i + 14; + q[i + 50] = i + 12; + q[i + 52] = i + 10; + q[i + 54] = i + 8; + q[i + 56] = i + 6; + q[i + 58] = i + 4; + q[i + 60] = i + 2; + q[i + 62] = i; + } + __builtin_memcpy (epi8_exp, q, 64); + + foo_pd (pd_dst, pd_src); + foo_ps (ps_dst, ps_src); + foo_epi64 (epi64_dst, epi64_src); + foo_epi32 (epi32_dst, epi32_src); + foo_epi16 (epi16_dst, epi16_src); + foo_epi8 (epi8_dst, epi8_src); + + if (__builtin_memcmp (pd_dst, pd_exp, 64) != 0) + __builtin_abort (); + if (__builtin_memcmp (ps_dst, ps_exp, 64) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi64_dst, epi64_exp, 64) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi32_dst, epi32_exp, 64) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi16_dst, epi16_exp, 64) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi8_dst, epi8_exp, 64) != 0) + __builtin_abort (); + + return; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-6c.c b/gcc/testsuite/gcc.target/i386/pr106010-6c.c new file mode 100644 index 0000000..b859d88 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-6c.c @@ -0,0 +1,80 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256 -fdump-tree-slp-details" } */ +/* { dg-require-effective-target avx512fp16 } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*VEC_PERM_EXPR.*\{ 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1 \}} 2 "slp2" } } */ +/* { dg-final { scan-tree-dump-times "basic block part vectorized using (?:32|64) byte vectors" 1 "slp2" } } */ + +#include + +static void do_test (void); +#define DO_TEST do_test +#define AVX512FP16 +#include "avx512-check.h" + +void +__attribute__((noipa)) +foo_ph (_Complex _Float16* a, _Complex _Float16* __restrict b) +{ + a[0] = b[15]; + a[1] = b[14]; + a[2] = b[13]; + a[3] = b[12]; + a[4] = b[11]; + a[5] = b[10]; + a[6] = b[9]; + a[7] = b[8]; + a[8] = b[7]; + a[9] = b[6]; + a[10] = b[5]; + a[11] = b[4]; + a[12] = b[3]; + a[13] = b[2]; + a[14] = b[1]; + a[15] = b[0]; +} + +void +do_test (void) +{ + _Complex _Float16* ph_src = (_Complex _Float16*) malloc (64); + _Complex _Float16* ph_dst = (_Complex _Float16*) malloc (64); + _Complex _Float16* ph_exp = (_Complex _Float16*) malloc (64); + char* p = (char* ) malloc (64); + char* q = (char* ) malloc (64); + + __builtin_memset (ph_dst, 0, 64); + + for (int i = 0; i != 64; i++) + p[i] = i; + + __builtin_memcpy (ph_src, p, 64); + + for (int i = 0; i != 4; i++) + { + q[i] = i + 60; + q[i + 4] = i + 56; + q[i + 8] = i + 52; + q[i + 12] = i + 48; + q[i + 16] = i + 44; + q[i + 20] = i + 40; + q[i + 24] = i + 36; + q[i + 28] = i + 32; + q[i + 32] = i + 28; + q[i + 36] = i + 24; + q[i + 40] = i + 20; + q[i + 44] = i + 16; + q[i + 48] = i + 12; + q[i + 52] = i + 8; + q[i + 56] = i + 4; + q[i + 60] = i; + } + + __builtin_memcpy (ph_exp, q, 64); + + foo_ph (ph_dst, ph_src); + + if (__builtin_memcmp (ph_dst, ph_exp, 64) != 0) + __builtin_abort (); + + return; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-7a.c b/gcc/testsuite/gcc.target/i386/pr106010-7a.c new file mode 100644 index 0000000..2ea01fa --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-7a.c @@ -0,0 +1,58 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx -ftree-vectorize -fvect-cost-model=unlimited -fdump-tree-vect-details -mprefer-vector-width=256" } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 6 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "vect" } } */ + +#define N 10000 +void +__attribute__((noipa)) +foo_pd (_Complex double* a, _Complex double b) +{ + for (int i = 0; i != N; i++) + a[i] = b; +} + +void +__attribute__((noipa)) +foo_ps (_Complex float* a, _Complex float b) +{ + for (int i = 0; i != N; i++) + a[i] = b; +} + +void +__attribute__((noipa)) +foo_epi64 (_Complex long long* a, _Complex long long b) +{ + for (int i = 0; i != N; i++) + a[i] = b; +} + +void +__attribute__((noipa)) +foo_epi32 (_Complex int* a, _Complex int b) +{ + for (int i = 0; i != N; i++) + a[i] = b; +} + +void +__attribute__((noipa)) +foo_epi16 (_Complex short* a, _Complex short b) +{ + for (int i = 0; i != N; i++) + a[i] = b; +} + +void +__attribute__((noipa)) +foo_epi8 (_Complex char* a, _Complex char b) +{ + for (int i = 0; i != N; i++) + a[i] = b; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-7b.c b/gcc/testsuite/gcc.target/i386/pr106010-7b.c new file mode 100644 index 0000000..26482cc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-7b.c @@ -0,0 +1,63 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256" } */ +/* { dg-require-effective-target avx } */ + +#include "avx-check.h" +#include +#include "pr106010-7a.c" + +void +avx_test (void) +{ + _Complex double* pd_src = (_Complex double*) malloc (2 * N * sizeof (double)); + _Complex double* pd_dst = (_Complex double*) malloc (2 * N * sizeof (double)); + _Complex float* ps_src = (_Complex float*) malloc (2 * N * sizeof (float)); + _Complex float* ps_dst = (_Complex float*) malloc (2 * N * sizeof (float)); + _Complex long long* epi64_src = (_Complex long long*) malloc (2 * N * sizeof (long long)); + _Complex long long* epi64_dst = (_Complex long long*) malloc (2 * N * sizeof (long long)); + _Complex int* epi32_src = (_Complex int*) malloc (2 * N * sizeof (int)); + _Complex int* epi32_dst = (_Complex int*) malloc (2 * N * sizeof (int)); + _Complex short* epi16_src = (_Complex short*) malloc (2 * N * sizeof (short)); + _Complex short* epi16_dst = (_Complex short*) malloc (2 * N * sizeof (short)); + _Complex char* epi8_src = (_Complex char*) malloc (2 * N * sizeof (char)); + _Complex char* epi8_dst = (_Complex char*) malloc (2 * N * sizeof (char)); + char* p_init = (char*) malloc (2 * N * sizeof (double)); + + __builtin_memset (pd_dst, 0, 2 * N * sizeof (double)); + __builtin_memset (ps_dst, 0, 2 * N * sizeof (float)); + __builtin_memset (epi64_dst, 0, 2 * N * sizeof (long long)); + __builtin_memset (epi32_dst, 0, 2 * N * sizeof (int)); + __builtin_memset (epi16_dst, 0, 2 * N * sizeof (short)); + __builtin_memset (epi8_dst, 0, 2 * N * sizeof (char)); + + for (int i = 0; i != 2 * N * sizeof (double); i++) + p_init[i] = i % 2 + 3; + + memcpy (pd_src, p_init, 2 * N * sizeof (double)); + memcpy (ps_dst, p_init, 2 * N * sizeof (float)); + memcpy (epi64_dst, p_init, 2 * N * sizeof (long long)); + memcpy (epi32_dst, p_init, 2 * N * sizeof (int)); + memcpy (epi16_dst, p_init, 2 * N * sizeof (short)); + memcpy (epi8_dst, p_init, 2 * N * sizeof (char)); + + foo_pd (pd_dst, pd_src[0]); + foo_ps (ps_dst, ps_src[0]); + foo_epi64 (epi64_dst, epi64_src[0]); + foo_epi32 (epi32_dst, epi32_src[0]); + foo_epi16 (epi16_dst, epi16_src[0]); + foo_epi8 (epi8_dst, epi8_src[0]); + if (__builtin_memcmp (pd_dst, pd_src, N * 2 * sizeof (double)) != 0) + __builtin_abort (); + if (__builtin_memcmp (ps_dst, ps_src, N * 2 * sizeof (float)) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi64_dst, epi64_src, N * 2 * sizeof (long long)) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi32_dst, epi32_src, N * 2 * sizeof (int)) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi16_dst, epi16_src, N * 2 * sizeof (short)) != 0) + __builtin_abort (); + if (__builtin_memcmp (epi8_dst, epi8_src, N * 2 * sizeof (char)) != 0) + __builtin_abort (); + + return; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-7c.c b/gcc/testsuite/gcc.target/i386/pr106010-7c.c new file mode 100644 index 0000000..7f4056a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-7c.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256 -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "vect" } } */ +/* { dg-require-effective-target avx512fp16 } */ + +#include + +static void do_test (void); + +#define DO_TEST do_test +#define AVX512FP16 +#include "avx512-check.h" + +#define N 10000 + +void +__attribute__((noipa)) +foo_ph (_Complex _Float16* a, _Complex _Float16 b) +{ + for (int i = 0; i != N; i++) + a[i] = b; +} + +static void +do_test (void) +{ + _Complex _Float16* ph_src = (_Complex _Float16*) malloc (2 * N * sizeof (_Float16)); + _Complex _Float16* ph_dst = (_Complex _Float16*) malloc (2 * N * sizeof (_Float16)); + char* p_init = (char*) malloc (2 * N * sizeof (_Float16)); + + __builtin_memset (ph_dst, 0, 2 * N * sizeof (_Float16)); + + for (int i = 0; i != 2 * N * sizeof (_Float16); i++) + p_init[i] = i % 2 + 3; + + memcpy (ph_src, p_init, 2 * N * sizeof (_Float16)); + + foo_ph (ph_dst, ph_src[0]); + if (__builtin_memcmp (ph_dst, ph_src, N * 2 * sizeof (_Float16)) != 0) + __builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-8a.c b/gcc/testsuite/gcc.target/i386/pr106010-8a.c new file mode 100644 index 0000000..11054b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-8a.c @@ -0,0 +1,58 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx -ftree-vectorize -fvect-cost-model=unlimited -fdump-tree-vect-details -mprefer-vector-width=256" } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 6 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "vect" } } */ + +#define N 10000 +void +__attribute__((noipa)) +foo_pd (_Complex double* a) +{ + for (int i = 0; i != N; i++) + a[i] = 1.0 + 2.0i; +} + +void +__attribute__((noipa)) +foo_ps (_Complex float* a) +{ + for (int i = 0; i != N; i++) + a[i] = 1.0f + 2.0fi; +} + +void +__attribute__((noipa)) +foo_epi64 (_Complex long long* a) +{ + for (int i = 0; i != N; i++) + a[i] = 1 + 2i; +} + +void +__attribute__((noipa)) +foo_epi32 (_Complex int* a) +{ + for (int i = 0; i != N; i++) + a[i] = 1 + 2i; +} + +void +__attribute__((noipa)) +foo_epi16 (_Complex short* a) +{ + for (int i = 0; i != N; i++) + a[i] = 1 + 2i; +} + +void +__attribute__((noipa)) +foo_epi8 (_Complex char* a) +{ + for (int i = 0; i != N; i++) + a[i] = 1 + 2i; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-8b.c b/gcc/testsuite/gcc.target/i386/pr106010-8b.c new file mode 100644 index 0000000..6bb0073 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-8b.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256" } */ +/* { dg-require-effective-target avx } */ + +#include "avx-check.h" +#include +#include "pr106010-8a.c" + +void +avx_test (void) +{ + _Complex double pd_src = 1.0 + 2.0i; + _Complex double* pd_dst = (_Complex double*) malloc (2 * N * sizeof (double)); + _Complex float ps_src = 1.0 + 2.0i; + _Complex float* ps_dst = (_Complex float*) malloc (2 * N * sizeof (float)); + _Complex long long epi64_src = 1 + 2i;; + _Complex long long* epi64_dst = (_Complex long long*) malloc (2 * N * sizeof (long long)); + _Complex int epi32_src = 1 + 2i; + _Complex int* epi32_dst = (_Complex int*) malloc (2 * N * sizeof (int)); + _Complex short epi16_src = 1 + 2i; + _Complex short* epi16_dst = (_Complex short*) malloc (2 * N * sizeof (short)); + _Complex char epi8_src = 1 + 2i; + _Complex char* epi8_dst = (_Complex char*) malloc (2 * N * sizeof (char)); + + __builtin_memset (pd_dst, 0, 2 * N * sizeof (double)); + __builtin_memset (ps_dst, 0, 2 * N * sizeof (float)); + __builtin_memset (epi64_dst, 0, 2 * N * sizeof (long long)); + __builtin_memset (epi32_dst, 0, 2 * N * sizeof (int)); + __builtin_memset (epi16_dst, 0, 2 * N * sizeof (short)); + __builtin_memset (epi8_dst, 0, 2 * N * sizeof (char)); + + foo_pd (pd_dst); + foo_ps (ps_dst); + foo_epi64 (epi64_dst); + foo_epi32 (epi32_dst); + foo_epi16 (epi16_dst); + foo_epi8 (epi8_dst); + for (int i = 0 ; i != N; i++) + { + if (pd_dst[i] != pd_src) + __builtin_abort (); + if (ps_dst[i] != ps_src) + __builtin_abort (); + if (epi64_dst[i] != epi64_src) + __builtin_abort (); + if (epi32_dst[i] != epi32_src) + __builtin_abort (); + if (epi16_dst[i] != epi16_src) + __builtin_abort (); + if (epi8_dst[i] != epi8_src) + __builtin_abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-8c.c b/gcc/testsuite/gcc.target/i386/pr106010-8c.c new file mode 100644 index 0000000..61ae131 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-8c.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256 -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*MEM } 1 "vect" } } */ +/* { dg-require-effective-target avx512fp16 } */ + +#include + +static void do_test (void); + +#define DO_TEST do_test +#define AVX512FP16 +#include "avx512-check.h" + +#define N 10000 + +void +__attribute__((noipa)) +foo_ph (_Complex _Float16* a) +{ + for (int i = 0; i != N; i++) + a[i] = 1.0f16 + 2.0f16i; +} + +static void +do_test (void) +{ + _Complex _Float16 ph_src = 1.0f16 + 2.0f16i; + _Complex _Float16* ph_dst = (_Complex _Float16*) malloc (2 * N * sizeof (_Float16)); + + __builtin_memset (ph_dst, 0, 2 * N * sizeof (_Float16)); + + foo_ph (ph_dst); + for (int i = 0; i != N; i++) + { + if (ph_dst[i] != ph_src) + __builtin_abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-9a.c b/gcc/testsuite/gcc.target/i386/pr106010-9a.c new file mode 100644 index 0000000..e922f7b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-9a.c @@ -0,0 +1,89 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx2 -fvect-cost-model=unlimited -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 6 "vect" } } */ + +typedef struct { _Complex double c; double a1; double a2;} + cdf; +typedef struct { _Complex double c; double a1; double a2; double a3; double a4;} + cdf2; +typedef struct { _Complex double c1; _Complex double c2; double a1; double a2; double a3; double a4;} + cdf3; +typedef struct { _Complex double c1; _Complex double c2; double a1; double a2;} + cdf4; + +#define N 100 +/* VMAT_ELEMENTWISE. */ +void +__attribute__((noipa)) +foo (cdf* a, cdf* __restrict b) +{ + for (int i = 0; i < N; ++i) + { + a[i].c = b[i].c; + a[i].a1 = b[i].a1; + a[i].a2 = b[i].a2; + } +} + +/* VMAT_CONTIGUOUS_PERMUTE. */ +void +__attribute__((noipa)) +foo1 (cdf2* a, cdf2* __restrict b) +{ + for (int i = 0; i < N; ++i) + { + a[i].c = b[i].c; + a[i].a1 = b[i].a1; + a[i].a2 = b[i].a2; + a[i].a3 = b[i].a3; + a[i].a4 = b[i].a4; + } +} + +/* VMAT_CONTIGUOUS. */ +void +__attribute__((noipa)) +foo2 (cdf3* a, cdf3* __restrict b) +{ + for (int i = 0; i < N; ++i) + { + a[i].c1 = b[i].c1; + a[i].c2 = b[i].c2; + a[i].a1 = b[i].a1; + a[i].a2 = b[i].a2; + a[i].a3 = b[i].a3; + a[i].a4 = b[i].a4; + } +} + +/* VMAT_STRIDED_SLP. */ +void +__attribute__((noipa)) +foo3 (cdf4* a, cdf4* __restrict b) +{ + for (int i = 0; i < N; ++i) + { + a[i].c1 = b[i].c1; + a[i].c2 = b[i].c2; + a[i].a1 = b[i].a1; + a[i].a2 = b[i].a2; + } +} + +/* VMAT_CONTIGUOUS_REVERSE. */ +void +__attribute__((noipa)) +foo4 (_Complex double* a, _Complex double* __restrict b) +{ + for (int i = 0; i != N; i++) + a[i] = b[N-i-1]; +} + +/* VMAT_CONTIGUOUS_DOWN. */ +void +__attribute__((noipa)) +foo5 (_Complex double* a, _Complex double* __restrict b) +{ + for (int i = 0; i != N; i++) + a[N-i-1] = b[0]; +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-9b.c b/gcc/testsuite/gcc.target/i386/pr106010-9b.c new file mode 100644 index 0000000..e220445 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-9b.c @@ -0,0 +1,90 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -msse2 -fvect-cost-model=unlimited" } */ +/* { dg-require-effective-target sse2 } */ + +#include +#include "sse2-check.h" +#include "pr106010-9a.c" + +static void +sse2_test (void) +{ + _Complex double* pd_src = (_Complex double*) malloc (N * sizeof (_Complex double)); + _Complex double* pd_dst = (_Complex double*) malloc (N * sizeof (_Complex double)); + _Complex double* pd_src2 = (_Complex double*) malloc (N * sizeof (_Complex double)); + _Complex double* pd_dst2 = (_Complex double*) malloc (N * sizeof (_Complex double)); + cdf* cdf_src = (cdf*) malloc (N * sizeof (cdf)); + cdf* cdf_dst = (cdf*) malloc (N * sizeof (cdf)); + cdf2* cdf2_src = (cdf2*) malloc (N * sizeof (cdf2)); + cdf2* cdf2_dst = (cdf2*) malloc (N * sizeof (cdf2)); + cdf3* cdf3_src = (cdf3*) malloc (N * sizeof (cdf3)); + cdf3* cdf3_dst = (cdf3*) malloc (N * sizeof (cdf3)); + cdf4* cdf4_src = (cdf4*) malloc (N * sizeof (cdf4)); + cdf4* cdf4_dst = (cdf4*) malloc (N * sizeof (cdf4)); + + char* p_init = (char*) malloc (N * sizeof (cdf3)); + + __builtin_memset (cdf_dst, 0, N * sizeof (cdf)); + __builtin_memset (cdf2_dst, 0, N * sizeof (cdf2)); + __builtin_memset (cdf3_dst, 0, N * sizeof (cdf3)); + __builtin_memset (cdf4_dst, 0, N * sizeof (cdf4)); + __builtin_memset (pd_dst, 0, N * sizeof (_Complex double)); + __builtin_memset (pd_dst2, 0, N * sizeof (_Complex double)); + + for (int i = 0; i != N * sizeof (cdf3); i++) + p_init[i] = i; + + memcpy (cdf_src, p_init, N * sizeof (cdf)); + memcpy (cdf2_src, p_init, N * sizeof (cdf2)); + memcpy (cdf3_src, p_init, N * sizeof (cdf3)); + memcpy (cdf4_src, p_init, N * sizeof (cdf4)); + memcpy (pd_src, p_init, N * sizeof (_Complex double)); + for (int i = 0; i != 2 * N * sizeof (double); i++) + p_init[i] = i % 16; + memcpy (pd_src2, p_init, N * sizeof (_Complex double)); + + foo (cdf_dst, cdf_src); + foo1 (cdf2_dst, cdf2_src); + foo2 (cdf3_dst, cdf3_src); + foo3 (cdf4_dst, cdf4_src); + foo4 (pd_dst, pd_src); + foo5 (pd_dst2, pd_src2); + for (int i = 0; i != N; i++) + { + p_init[(N - i - 1) * 16] = i * 16; + p_init[(N - i - 1) * 16 + 1] = i * 16 + 1; + p_init[(N - i - 1) * 16 + 2] = i * 16 + 2; + p_init[(N - i - 1) * 16 + 3] = i * 16 + 3; + p_init[(N - i - 1) * 16 + 4] = i * 16 + 4; + p_init[(N - i - 1) * 16 + 5] = i * 16 + 5; + p_init[(N - i - 1) * 16 + 6] = i * 16 + 6; + p_init[(N - i - 1) * 16 + 7] = i * 16 + 7; + p_init[(N - i - 1) * 16 + 8] = i * 16 + 8; + p_init[(N - i - 1) * 16 + 9] = i * 16 + 9; + p_init[(N - i - 1) * 16 + 10] = i * 16 + 10; + p_init[(N - i - 1) * 16 + 11] = i * 16 + 11; + p_init[(N - i - 1) * 16 + 12] = i * 16 + 12; + p_init[(N - i - 1) * 16 + 13] = i * 16 + 13; + p_init[(N - i - 1) * 16 + 14] = i * 16 + 14; + p_init[(N - i - 1) * 16 + 15] = i * 16 + 15; + } + memcpy (pd_src, p_init, N * 16); + + if (__builtin_memcmp (pd_dst, pd_src, N * 2 * sizeof (double)) != 0) + __builtin_abort (); + + if (__builtin_memcmp (pd_dst2, pd_src2, N * 2 * sizeof (double)) != 0) + __builtin_abort (); + + if (__builtin_memcmp (cdf_dst, cdf_src, N * sizeof (cdf)) != 0) + __builtin_abort (); + + if (__builtin_memcmp (cdf2_dst, cdf2_src, N * sizeof (cdf2)) != 0) + __builtin_abort (); + + if (__builtin_memcmp (cdf3_dst, cdf3_src, N * sizeof (cdf3)) != 0) + __builtin_abort (); + + if (__builtin_memcmp (cdf4_dst, cdf4_src, N * sizeof (cdf4)) != 0) + __builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-9c.c b/gcc/testsuite/gcc.target/i386/pr106010-9c.c new file mode 100644 index 0000000..ff51f61 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-9c.c @@ -0,0 +1,90 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -mavx2 -fvect-cost-model=unlimited" } */ +/* { dg-require-effective-target avx2 } */ + +#include +#include "avx2-check.h" +#include "pr106010-9a.c" + +static void +avx2_test (void) +{ + _Complex double* pd_src = (_Complex double*) malloc (N * sizeof (_Complex double)); + _Complex double* pd_dst = (_Complex double*) malloc (N * sizeof (_Complex double)); + _Complex double* pd_src2 = (_Complex double*) malloc (N * sizeof (_Complex double)); + _Complex double* pd_dst2 = (_Complex double*) malloc (N * sizeof (_Complex double)); + cdf* cdf_src = (cdf*) malloc (N * sizeof (cdf)); + cdf* cdf_dst = (cdf*) malloc (N * sizeof (cdf)); + cdf2* cdf2_src = (cdf2*) malloc (N * sizeof (cdf2)); + cdf2* cdf2_dst = (cdf2*) malloc (N * sizeof (cdf2)); + cdf3* cdf3_src = (cdf3*) malloc (N * sizeof (cdf3)); + cdf3* cdf3_dst = (cdf3*) malloc (N * sizeof (cdf3)); + cdf4* cdf4_src = (cdf4*) malloc (N * sizeof (cdf4)); + cdf4* cdf4_dst = (cdf4*) malloc (N * sizeof (cdf4)); + + char* p_init = (char*) malloc (N * sizeof (cdf3)); + + __builtin_memset (cdf_dst, 0, N * sizeof (cdf)); + __builtin_memset (cdf2_dst, 0, N * sizeof (cdf2)); + __builtin_memset (cdf3_dst, 0, N * sizeof (cdf3)); + __builtin_memset (cdf4_dst, 0, N * sizeof (cdf4)); + __builtin_memset (pd_dst, 0, N * sizeof (_Complex double)); + __builtin_memset (pd_dst2, 0, N * sizeof (_Complex double)); + + for (int i = 0; i != N * sizeof (cdf3); i++) + p_init[i] = i; + + memcpy (cdf_src, p_init, N * sizeof (cdf)); + memcpy (cdf2_src, p_init, N * sizeof (cdf2)); + memcpy (cdf3_src, p_init, N * sizeof (cdf3)); + memcpy (cdf4_src, p_init, N * sizeof (cdf4)); + memcpy (pd_src, p_init, N * sizeof (_Complex double)); + for (int i = 0; i != 2 * N * sizeof (double); i++) + p_init[i] = i % 16; + memcpy (pd_src2, p_init, N * sizeof (_Complex double)); + + foo (cdf_dst, cdf_src); + foo1 (cdf2_dst, cdf2_src); + foo2 (cdf3_dst, cdf3_src); + foo3 (cdf4_dst, cdf4_src); + foo4 (pd_dst, pd_src); + foo5 (pd_dst2, pd_src2); + for (int i = 0; i != N; i++) + { + p_init[(N - i - 1) * 16] = i * 16; + p_init[(N - i - 1) * 16 + 1] = i * 16 + 1; + p_init[(N - i - 1) * 16 + 2] = i * 16 + 2; + p_init[(N - i - 1) * 16 + 3] = i * 16 + 3; + p_init[(N - i - 1) * 16 + 4] = i * 16 + 4; + p_init[(N - i - 1) * 16 + 5] = i * 16 + 5; + p_init[(N - i - 1) * 16 + 6] = i * 16 + 6; + p_init[(N - i - 1) * 16 + 7] = i * 16 + 7; + p_init[(N - i - 1) * 16 + 8] = i * 16 + 8; + p_init[(N - i - 1) * 16 + 9] = i * 16 + 9; + p_init[(N - i - 1) * 16 + 10] = i * 16 + 10; + p_init[(N - i - 1) * 16 + 11] = i * 16 + 11; + p_init[(N - i - 1) * 16 + 12] = i * 16 + 12; + p_init[(N - i - 1) * 16 + 13] = i * 16 + 13; + p_init[(N - i - 1) * 16 + 14] = i * 16 + 14; + p_init[(N - i - 1) * 16 + 15] = i * 16 + 15; + } + memcpy (pd_src, p_init, N * 16); + + if (__builtin_memcmp (pd_dst, pd_src, N * 2 * sizeof (double)) != 0) + __builtin_abort (); + + if (__builtin_memcmp (pd_dst2, pd_src2, N * 2 * sizeof (double)) != 0) + __builtin_abort (); + + if (__builtin_memcmp (cdf_dst, cdf_src, N * sizeof (cdf)) != 0) + __builtin_abort (); + + if (__builtin_memcmp (cdf2_dst, cdf2_src, N * sizeof (cdf2)) != 0) + __builtin_abort (); + + if (__builtin_memcmp (cdf3_dst, cdf3_src, N * sizeof (cdf3)) != 0) + __builtin_abort (); + + if (__builtin_memcmp (cdf4_dst, cdf4_src, N * sizeof (cdf4)) != 0) + __builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/pr106010-9d.c b/gcc/testsuite/gcc.target/i386/pr106010-9d.c new file mode 100644 index 0000000..d4d8f1d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106010-9d.c @@ -0,0 +1,92 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -mavx512f -mavx512vl -fvect-cost-model=unlimited -mprefer-vector-width=512" } */ +/* { dg-require-effective-target avx512f } */ + +#include +#include +#define AVX512F +#include "avx512-check.h" +#include "pr106010-9a.c" + +static void +test_512 (void) +{ + _Complex double* pd_src = (_Complex double*) malloc (N * sizeof (_Complex double)); + _Complex double* pd_dst = (_Complex double*) malloc (N * sizeof (_Complex double)); + _Complex double* pd_src2 = (_Complex double*) malloc (N * sizeof (_Complex double)); + _Complex double* pd_dst2 = (_Complex double*) malloc (N * sizeof (_Complex double)); + cdf* cdf_src = (cdf*) malloc (N * sizeof (cdf)); + cdf* cdf_dst = (cdf*) malloc (N * sizeof (cdf)); + cdf2* cdf2_src = (cdf2*) malloc (N * sizeof (cdf2)); + cdf2* cdf2_dst = (cdf2*) malloc (N * sizeof (cdf2)); + cdf3* cdf3_src = (cdf3*) malloc (N * sizeof (cdf3)); + cdf3* cdf3_dst = (cdf3*) malloc (N * sizeof (cdf3)); + cdf4* cdf4_src = (cdf4*) malloc (N * sizeof (cdf4)); + cdf4* cdf4_dst = (cdf4*) malloc (N * sizeof (cdf4)); + + char* p_init = (char*) malloc (N * sizeof (cdf3)); + + __builtin_memset (cdf_dst, 0, N * sizeof (cdf)); + __builtin_memset (cdf2_dst, 0, N * sizeof (cdf2)); + __builtin_memset (cdf3_dst, 0, N * sizeof (cdf3)); + __builtin_memset (cdf4_dst, 0, N * sizeof (cdf4)); + __builtin_memset (pd_dst, 0, N * sizeof (_Complex double)); + __builtin_memset (pd_dst2, 0, N * sizeof (_Complex double)); + + for (int i = 0; i != N * sizeof (cdf3); i++) + p_init[i] = i; + + memcpy (cdf_src, p_init, N * sizeof (cdf)); + memcpy (cdf2_src, p_init, N * sizeof (cdf2)); + memcpy (cdf3_src, p_init, N * sizeof (cdf3)); + memcpy (cdf4_src, p_init, N * sizeof (cdf4)); + memcpy (pd_src, p_init, N * sizeof (_Complex double)); + for (int i = 0; i != 2 * N * sizeof (double); i++) + p_init[i] = i % 16; + memcpy (pd_src2, p_init, N * sizeof (_Complex double)); + + foo (cdf_dst, cdf_src); + foo1 (cdf2_dst, cdf2_src); + foo2 (cdf3_dst, cdf3_src); + foo3 (cdf4_dst, cdf4_src); + foo4 (pd_dst, pd_src); + foo5 (pd_dst2, pd_src2); + for (int i = 0; i != N; i++) + { + p_init[(N - i - 1) * 16] = i * 16; + p_init[(N - i - 1) * 16 + 1] = i * 16 + 1; + p_init[(N - i - 1) * 16 + 2] = i * 16 + 2; + p_init[(N - i - 1) * 16 + 3] = i * 16 + 3; + p_init[(N - i - 1) * 16 + 4] = i * 16 + 4; + p_init[(N - i - 1) * 16 + 5] = i * 16 + 5; + p_init[(N - i - 1) * 16 + 6] = i * 16 + 6; + p_init[(N - i - 1) * 16 + 7] = i * 16 + 7; + p_init[(N - i - 1) * 16 + 8] = i * 16 + 8; + p_init[(N - i - 1) * 16 + 9] = i * 16 + 9; + p_init[(N - i - 1) * 16 + 10] = i * 16 + 10; + p_init[(N - i - 1) * 16 + 11] = i * 16 + 11; + p_init[(N - i - 1) * 16 + 12] = i * 16 + 12; + p_init[(N - i - 1) * 16 + 13] = i * 16 + 13; + p_init[(N - i - 1) * 16 + 14] = i * 16 + 14; + p_init[(N - i - 1) * 16 + 15] = i * 16 + 15; + } + memcpy (pd_src, p_init, N * 16); + + if (__builtin_memcmp (pd_dst, pd_src, N * 2 * sizeof (double)) != 0) + __builtin_abort (); + + if (__builtin_memcmp (pd_dst2, pd_src2, N * 2 * sizeof (double)) != 0) + __builtin_abort (); + + if (__builtin_memcmp (cdf_dst, cdf_src, N * sizeof (cdf)) != 0) + __builtin_abort (); + + if (__builtin_memcmp (cdf2_dst, cdf2_src, N * sizeof (cdf2)) != 0) + __builtin_abort (); + + if (__builtin_memcmp (cdf3_dst, cdf3_src, N * sizeof (cdf3)) != 0) + __builtin_abort (); + + if (__builtin_memcmp (cdf4_dst, cdf4_src, N * sizeof (cdf4)) != 0) + __builtin_abort (); +} diff --git a/gcc/tree-complex.cc b/gcc/tree-complex.cc index 61950a0..ea9df61 100644 --- a/gcc/tree-complex.cc +++ b/gcc/tree-complex.cc @@ -297,6 +297,11 @@ init_dont_simulate_again (void) break; default: + /* When expand_complex_move would trigger make sure we + perform lowering even when there is no actual complex + operation. This helps consistency and vectorization. */ + if (TREE_CODE (TREE_TYPE (gimple_op (stmt, 0))) == COMPLEX_TYPE) + saw_a_complex_op = true; break; } @@ -869,7 +874,9 @@ expand_complex_move (gimple_stmt_iterator *gsi, tree type) update_complex_assignment (gsi, r, i); } } - else if (rhs && TREE_CODE (rhs) == SSA_NAME && !TREE_SIDE_EFFECTS (lhs)) + else if (rhs + && (TREE_CODE (rhs) == SSA_NAME || TREE_CODE (rhs) == COMPLEX_CST) + && !TREE_SIDE_EFFECTS (lhs)) { tree x; gimple *t; -- cgit v1.1 From 78d5e125c008d87cb2e1c08a9ff2a02d4214ffcf Mon Sep 17 00:00:00 2001 From: liuhongt Date: Tue, 19 Jul 2022 11:22:24 +0800 Subject: Move pass_cse_sincos after vectorizer. __builtin_cexpi can't be vectorized since there's gap between it and vectorized sincos version(In libmvec, it passes a double and two double pointer and returns nothing.) And it will lose some vectorization opportunity if sin & cos are optimized to cexpi before vectorizer. I'm trying to add vect_recog_cexpi_pattern to split cexpi to sin and cos, but it failed vectorizable_simd_clone_call since NULL is returned by cgraph_node::get (fndecl). So alternatively, the patch try to move pass_cse_sincos after vectorizer, just before pas_cse_reciprocals. Also original pass_cse_sincos additionaly expands pow&cabs, this patch split that part into a separate pass named pass_expand_powcabs which remains the old pass position. gcc/ChangeLog: * passes.def: (Split pass_cse_sincos to pass_expand_powcabs and pass_cse_sincos, and move pass_cse_sincos after vectorizer). * timevar.def (TV_TREE_POWCABS): New timevar. * tree-pass.h (make_pass_expand_powcabs): Split from pass_cse_sincos. * tree-ssa-math-opts.cc (gimple_expand_builtin_cabs): Ditto. (class pass_expand_powcabs): Ditto. (pass_expand_powcabs::execute): Ditto. (make_pass_expand_powcabs): Ditto. (pass_cse_sincos::execute): Remove pow/cabs expand part. (make_pass_cse_sincos): Ditto. gcc/testsuite/ChangeLog: * gcc.dg/pow-sqrt-synth-1.c: Adjust testcase. --- gcc/passes.def | 3 +- gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c | 4 +- gcc/timevar.def | 1 + gcc/tree-pass.h | 1 + gcc/tree-ssa-math-opts.cc | 112 ++++++++++++++++++++++++++------ 5 files changed, 97 insertions(+), 24 deletions(-) (limited to 'gcc') diff --git a/gcc/passes.def b/gcc/passes.def index 375d3d6..6bb92ef 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -253,7 +253,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_ccp, true /* nonzero_p */); /* After CCP we rewrite no longer addressed locals into SSA form if possible. */ - NEXT_PASS (pass_cse_sincos); + NEXT_PASS (pass_expand_powcabs); NEXT_PASS (pass_optimize_bswap); NEXT_PASS (pass_laddress); NEXT_PASS (pass_lim); @@ -328,6 +328,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_simduid_cleanup); NEXT_PASS (pass_lower_vector_ssa); NEXT_PASS (pass_lower_switch); + NEXT_PASS (pass_cse_sincos); NEXT_PASS (pass_cse_reciprocals); NEXT_PASS (pass_reassoc, false /* early_p */); NEXT_PASS (pass_strength_reduction); diff --git a/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c b/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c index 4a94325..484b29a 100644 --- a/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c +++ b/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c @@ -1,5 +1,5 @@ /* { dg-do compile { target sqrt_insn } } */ -/* { dg-options "-fdump-tree-sincos -Ofast --param max-pow-sqrt-depth=8" } */ +/* { dg-options "-fdump-tree-powcabs -Ofast --param max-pow-sqrt-depth=8" } */ /* { dg-additional-options "-mfloat-abi=softfp -mfpu=neon-vfpv4" { target arm*-*-* } } */ double @@ -34,4 +34,4 @@ vecfoo (double *a) a[i] = __builtin_pow (a[i], 1.25); } -/* { dg-final { scan-tree-dump-times "synthesizing" 7 "sincos" } } */ +/* { dg-final { scan-tree-dump-times "synthesizing" 7 "powcabs" } } */ diff --git a/gcc/timevar.def b/gcc/timevar.def index 2dae5e1..651af19 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -220,6 +220,7 @@ DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch conversion") DEFTIMEVAR (TV_TREE_SWITCH_LOWERING, "tree switch lowering") DEFTIMEVAR (TV_TREE_RECIP , "gimple CSE reciprocals") DEFTIMEVAR (TV_TREE_SINCOS , "gimple CSE sin/cos") +DEFTIMEVAR (TV_TREE_POWCABS , "gimple expand pow/cabs") DEFTIMEVAR (TV_TREE_WIDEN_MUL , "gimple widening/fma detection") DEFTIMEVAR (TV_TRANS_MEM , "transactional memory") DEFTIMEVAR (TV_TREE_STRLEN , "tree strlen optimization") diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 606d1d6..4dfe05e 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -444,6 +444,7 @@ extern gimple_opt_pass *make_pass_early_warn_uninitialized (gcc::context *ctxt); extern gimple_opt_pass *make_pass_late_warn_uninitialized (gcc::context *ctxt); extern gimple_opt_pass *make_pass_cse_reciprocals (gcc::context *ctxt); extern gimple_opt_pass *make_pass_cse_sincos (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_expand_powcabs (gcc::context *ctxt); extern gimple_opt_pass *make_pass_optimize_bswap (gcc::context *ctxt); extern gimple_opt_pass *make_pass_store_merging (gcc::context *ctxt); extern gimple_opt_pass *make_pass_optimize_widening_mul (gcc::context *ctxt); diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index a4492c9..58152b5 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -2226,8 +2226,7 @@ gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg) } /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1 - on the SSA_NAME argument of each of them. Also expand powi(x,n) into - an optimal number of multiplies, when n is a constant. */ + on the SSA_NAME argument of each of them. */ namespace { @@ -2254,8 +2253,6 @@ public: /* opt_pass methods: */ bool gate (function *) final override { - /* We no longer require either sincos or cexp, since powi expansion - piggybacks on this pass. */ return optimize; } @@ -2275,24 +2272,15 @@ pass_cse_sincos::execute (function *fun) FOR_EACH_BB_FN (bb, fun) { gimple_stmt_iterator gsi; - bool cleanup_eh = false; for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { gimple *stmt = gsi_stmt (gsi); - /* Only the last stmt in a bb could throw, no need to call - gimple_purge_dead_eh_edges if we change something in the middle - of a basic block. */ - cleanup_eh = false; - if (is_gimple_call (stmt) && gimple_call_lhs (stmt)) { - tree arg, arg0, arg1, result; - HOST_WIDE_INT n; - location_t loc; - + tree arg; switch (gimple_call_combined_fn (stmt)) { CASE_CFN_COS: @@ -2309,7 +2297,94 @@ pass_cse_sincos::execute (function *fun) if (TREE_CODE (arg) == SSA_NAME) cfg_changed |= execute_cse_sincos_1 (arg); break; + default: + break; + } + } + } + } + + statistics_counter_event (fun, "sincos statements inserted", + sincos_stats.inserted); + statistics_counter_event (fun, "conv statements removed", + sincos_stats.conv_removed); + + return cfg_changed ? TODO_cleanup_cfg : 0; +} + +} // anon namespace + +gimple_opt_pass * +make_pass_cse_sincos (gcc::context *ctxt) +{ + return new pass_cse_sincos (ctxt); +} + +/* Expand powi(x,n) into an optimal number of multiplies, when n is a constant. + Also expand CABS. */ +namespace { + +const pass_data pass_data_expand_powcabs = +{ + GIMPLE_PASS, /* type */ + "powcabs", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_TREE_POWCABS, /* tv_id */ + PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_update_ssa, /* todo_flags_finish */ +}; + +class pass_expand_powcabs : public gimple_opt_pass +{ +public: + pass_expand_powcabs (gcc::context *ctxt) + : gimple_opt_pass (pass_data_expand_powcabs, ctxt) + {} + /* opt_pass methods: */ + bool gate (function *) final override + { + return optimize; + } + + unsigned int execute (function *) final override; + +}; // class pass_expand_powcabs + +unsigned int +pass_expand_powcabs::execute (function *fun) +{ + basic_block bb; + bool cfg_changed = false; + + calculate_dominance_info (CDI_DOMINATORS); + + FOR_EACH_BB_FN (bb, fun) + { + gimple_stmt_iterator gsi; + bool cleanup_eh = false; + + for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + /* Only the last stmt in a bb could throw, no need to call + gimple_purge_dead_eh_edges if we change something in the middle + of a basic block. */ + cleanup_eh = false; + + if (is_gimple_call (stmt) + && gimple_call_lhs (stmt)) + { + tree arg0, arg1, result; + HOST_WIDE_INT n; + location_t loc; + + switch (gimple_call_combined_fn (stmt)) + { CASE_CFN_POW: arg0 = gimple_call_arg (stmt, 0); arg1 = gimple_call_arg (stmt, 1); @@ -2405,20 +2480,15 @@ pass_cse_sincos::execute (function *fun) cfg_changed |= gimple_purge_dead_eh_edges (bb); } - statistics_counter_event (fun, "sincos statements inserted", - sincos_stats.inserted); - statistics_counter_event (fun, "conv statements removed", - sincos_stats.conv_removed); - return cfg_changed ? TODO_cleanup_cfg : 0; } } // anon namespace gimple_opt_pass * -make_pass_cse_sincos (gcc::context *ctxt) +make_pass_expand_powcabs (gcc::context *ctxt) { - return new pass_cse_sincos (ctxt); + return new pass_expand_powcabs (ctxt); } /* Return true if stmt is a type conversion operation that can be stripped -- cgit v1.1 From 5f59d0f2d9fa921c04d75c39592e603cfa2b1324 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 20 Jul 2022 11:02:03 +0200 Subject: Improve SLP codegen, avoiding unnecessary TREE_ADDRESSABLE The following adjusts vectorizer code generation to avoid splitting out address increments for invariant addresses which causes objects to get TREE_ADDRESSABLE when not necessary. * tree-vect-data-refs.cc (bump_vector_ptr): Return an invariant updated address when the input was invariant. --- gcc/tree-vect-data-refs.cc | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'gcc') diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index 609cacc..b279a82 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -5195,6 +5195,14 @@ bump_vector_ptr (vec_info *vinfo, if (TREE_CODE (dataref_ptr) == SSA_NAME) new_dataref_ptr = copy_ssa_name (dataref_ptr); + else if (is_gimple_min_invariant (dataref_ptr)) + /* When possible avoid emitting a separate increment stmt that will + force the addressed object addressable. */ + return build1 (ADDR_EXPR, TREE_TYPE (dataref_ptr), + fold_build2 (MEM_REF, + TREE_TYPE (TREE_TYPE (dataref_ptr)), + dataref_ptr, + fold_convert (ptr_type_node, update))); else new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr)); incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR, -- cgit v1.1 From 8694390e2b6ae3af3212f1c829e62fb086cf7707 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Tue, 19 Jul 2022 18:01:37 +0300 Subject: Remove unused remove_node_from_expr_list This function remains unused since remove_node_from_insn_list was cloned from it. gcc/ChangeLog: * rtl.h (remove_node_from_expr_list): Remove declaration. * rtlanal.cc (remove_node_from_expr_list): Remove (no uses). --- gcc/rtl.h | 1 - gcc/rtlanal.cc | 29 ----------------------------- 2 files changed, 30 deletions(-) (limited to 'gcc') diff --git a/gcc/rtl.h b/gcc/rtl.h index 488016b..645c009 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -3712,7 +3712,6 @@ extern unsigned hash_rtx_cb (const_rtx, machine_mode, int *, int *, extern rtx regno_use_in (unsigned int, rtx); extern int auto_inc_p (const_rtx); extern bool in_insn_list_p (const rtx_insn_list *, const rtx_insn *); -extern void remove_node_from_expr_list (const_rtx, rtx_expr_list **); extern void remove_node_from_insn_list (const rtx_insn *, rtx_insn_list **); extern int loc_mentioned_in_p (rtx *, const_rtx); extern rtx_insn *find_first_parameter_load (rtx_insn *, rtx_insn *); diff --git a/gcc/rtlanal.cc b/gcc/rtlanal.cc index d78cc60..ec95ecd 100644 --- a/gcc/rtlanal.cc +++ b/gcc/rtlanal.cc @@ -2878,35 +2878,6 @@ in_insn_list_p (const rtx_insn_list *listp, const rtx_insn *node) return false; } -/* Search LISTP (an EXPR_LIST) for an entry whose first operand is NODE and - remove that entry from the list if it is found. - - A simple equality test is used to determine if NODE matches. */ - -void -remove_node_from_expr_list (const_rtx node, rtx_expr_list **listp) -{ - rtx_expr_list *temp = *listp; - rtx_expr_list *prev = NULL; - - while (temp) - { - if (node == temp->element ()) - { - /* Splice the node out of the list. */ - if (prev) - XEXP (prev, 1) = temp->next (); - else - *listp = temp->next (); - - return; - } - - prev = temp; - temp = temp->next (); - } -} - /* Search LISTP (an INSN_LIST) for an entry whose first operand is NODE and remove that entry from the list if it is found. -- cgit v1.1 From daa36cfc2fc2538810db071b81d250f4d621f7ea Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Tue, 19 Jul 2022 18:04:30 +0300 Subject: Avoid registering __builtin_setjmp_receiver label twice [PR101347] The testcase in the PR demonstrates how it is possible for one __builtin_setjmp_receiver label to appear in nonlocal_goto_handler_labels list twice (after the block with __builtin_setjmp_setup referring to it was duplicated). remove_node_from_insn_list did not account for this possibility and removed only the first copy from the list. Add an assert verifying that duplicates are not present. To avoid adding a label to the list twice, move registration of the label from __builtin_setjmp_setup handling to __builtin_setjmp_receiver. gcc/ChangeLog: PR rtl-optimization/101347 * builtins.cc (expand_builtin) [BUILT_IN_SETJMP_SETUP]: Move population of nonlocal_goto_handler_labels from here ... (expand_builtin) [BUILT_IN_SETJMP_RECEIVER]: ... to here. * rtlanal.cc (remove_node_from_insn_list): Verify that a duplicate is not present in the remainder of the list. --- gcc/builtins.cc | 15 +++++++-------- gcc/rtlanal.cc | 1 + 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'gcc') diff --git a/gcc/builtins.cc b/gcc/builtins.cc index 0d13197..b08b436 100644 --- a/gcc/builtins.cc +++ b/gcc/builtins.cc @@ -7472,15 +7472,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode, tree label = TREE_OPERAND (CALL_EXPR_ARG (exp, 1), 0); rtx_insn *label_r = label_rtx (label); - /* This is copied from the handling of non-local gotos. */ expand_builtin_setjmp_setup (buf_addr, label_r); - nonlocal_goto_handler_labels - = gen_rtx_INSN_LIST (VOIDmode, label_r, - nonlocal_goto_handler_labels); - /* ??? Do not let expand_label treat us as such since we would - not want to be both on the list of non-local labels and on - the list of forced labels. */ - FORCED_LABEL (label) = 0; return const0_rtx; } break; @@ -7493,6 +7485,13 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode, rtx_insn *label_r = label_rtx (label); expand_builtin_setjmp_receiver (label_r); + nonlocal_goto_handler_labels + = gen_rtx_INSN_LIST (VOIDmode, label_r, + nonlocal_goto_handler_labels); + /* ??? Do not let expand_label treat us as such since we would + not want to be both on the list of non-local labels and on + the list of forced labels. */ + FORCED_LABEL (label) = 0; return const0_rtx; } break; diff --git a/gcc/rtlanal.cc b/gcc/rtlanal.cc index ec95ecd..56da743 100644 --- a/gcc/rtlanal.cc +++ b/gcc/rtlanal.cc @@ -2899,6 +2899,7 @@ remove_node_from_insn_list (const rtx_insn *node, rtx_insn_list **listp) else *listp = temp->next (); + gcc_checking_assert (!in_insn_list_p (temp->next (), node)); return; } -- cgit v1.1 From 26bbe78f77f73bb66af1ac13d0deec888a3c6510 Mon Sep 17 00:00:00 2001 From: Harald Anlauf Date: Wed, 20 Jul 2022 20:40:23 +0200 Subject: Fortran: fix parsing of omp task affinity iterator clause [PR101330] gcc/fortran/ChangeLog: PR fortran/101330 * openmp.cc (gfc_match_iterator): Remove left-over code from development that could lead to a crash on invalid input. gcc/testsuite/ChangeLog: PR fortran/101330 * gfortran.dg/gomp/affinity-clause-7.f90: New test. --- gcc/fortran/openmp.cc | 1 - gcc/testsuite/gfortran.dg/gomp/affinity-clause-7.f90 | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gfortran.dg/gomp/affinity-clause-7.f90 (limited to 'gcc') diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc index bd4ff25..df9cdf4 100644 --- a/gcc/fortran/openmp.cc +++ b/gcc/fortran/openmp.cc @@ -1181,7 +1181,6 @@ gfc_match_iterator (gfc_namespace **ns, bool permit_var) } if (':' == gfc_peek_ascii_char ()) { - step = gfc_get_expr (); if (gfc_match (": %e ", &step) != MATCH_YES) { gfc_free_expr (begin); diff --git a/gcc/testsuite/gfortran.dg/gomp/affinity-clause-7.f90 b/gcc/testsuite/gfortran.dg/gomp/affinity-clause-7.f90 new file mode 100644 index 0000000..5b1ca85 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/affinity-clause-7.f90 @@ -0,0 +1,19 @@ +! { dg-do compile } +! PR fortran/101330 - ICE in free_expr0(): Bad expr type +! Contributed by G.Steinmetz + + implicit none + integer :: j, b(10) +!$omp task affinity (iterator(j=1:2:1) : b(j)) +!$omp end task +!$omp task affinity (iterator(j=1:2:) : b(j)) ! { dg-error "Invalid character" } +!!$omp end task +!$omp task affinity (iterator(j=1:2: ! { dg-error "Invalid character" } +!!$omp end task +!$omp task affinity (iterator(j=1:2:) ! { dg-error "Invalid character" } +!!$omp end task +!$omp task affinity (iterator(j=1:2::) ! { dg-error "Invalid character" } +!!$omp end task +!$omp task affinity (iterator(j=1:2:)) ! { dg-error "Invalid character" } +!!$omp end task +end -- cgit v1.1 From 5e830693dd335621940368b6d39b23afc2c98545 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Wed, 20 Jul 2022 17:25:35 -0400 Subject: analyzer: update "tainted" state of RHS in comparisons [PR106373] Doing so fixes various false positives from -Wanalyzer-tainted-array-index at -O1 and above (e.g. seen on the Linux kernel) gcc/analyzer/ChangeLog: PR analyzer/106373 * sm-taint.cc (taint_state_machine::on_condition): Potentially update the state of the RHS as well as the LHS. gcc/testsuite/ChangeLog: PR analyzer/106373 * gcc.dg/analyzer/torture/taint-read-index-3.c: New test. Signed-off-by: David Malcolm --- gcc/analyzer/sm-taint.cc | 18 ++++++-- .../gcc.dg/analyzer/torture/taint-read-index-3.c | 52 ++++++++++++++++++++++ 2 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/analyzer/torture/taint-read-index-3.c (limited to 'gcc') diff --git a/gcc/analyzer/sm-taint.cc b/gcc/analyzer/sm-taint.cc index 9cb7888..0486c01 100644 --- a/gcc/analyzer/sm-taint.cc +++ b/gcc/analyzer/sm-taint.cc @@ -830,13 +830,11 @@ taint_state_machine::on_condition (sm_context *sm_ctxt, const gimple *stmt, const svalue *lhs, enum tree_code op, - const svalue *rhs ATTRIBUTE_UNUSED) const + const svalue *rhs) const { if (stmt == NULL) return; - // TODO: this doesn't use the RHS; should we make it symmetric? - // TODO switch (op) { @@ -845,10 +843,17 @@ taint_state_machine::on_condition (sm_context *sm_ctxt, case GE_EXPR: case GT_EXPR: { + /* (LHS >= RHS) or (LHS > RHS) + LHS gains a lower bound + RHS gains an upper bound. */ sm_ctxt->on_transition (node, stmt, lhs, m_tainted, m_has_lb); sm_ctxt->on_transition (node, stmt, lhs, m_has_ub, m_stop); + sm_ctxt->on_transition (node, stmt, rhs, m_tainted, + m_has_ub); + sm_ctxt->on_transition (node, stmt, rhs, m_has_lb, + m_stop); } break; case LE_EXPR: @@ -896,10 +901,17 @@ taint_state_machine::on_condition (sm_context *sm_ctxt, } } + /* (LHS <= RHS) or (LHS < RHS) + LHS gains an upper bound + RHS gains a lower bound. */ sm_ctxt->on_transition (node, stmt, lhs, m_tainted, m_has_ub); sm_ctxt->on_transition (node, stmt, lhs, m_has_lb, m_stop); + sm_ctxt->on_transition (node, stmt, rhs, m_tainted, + m_has_lb); + sm_ctxt->on_transition (node, stmt, rhs, m_has_ub, + m_stop); } break; default: diff --git a/gcc/testsuite/gcc.dg/analyzer/torture/taint-read-index-3.c b/gcc/testsuite/gcc.dg/analyzer/torture/taint-read-index-3.c new file mode 100644 index 0000000..8eb6061 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/torture/taint-read-index-3.c @@ -0,0 +1,52 @@ +// TODO: remove need for the taint option: +/* { dg-additional-options "-fanalyzer-checker=taint" } */ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } { "" } } */ + +struct raw_ep { + /* ...snip... */ + int state; + /* ...snip... */ +}; + +struct raw_dev { + /* ...snip... */ + struct raw_ep eps[30]; + int eps_num; + /* ...snip... */ +}; + +int __attribute__((tainted_args)) +simplified_raw_ioctl_ep_disable(struct raw_dev *dev, unsigned long value) +{ + int ret = 0, i = value; + + if (i < 0 || i >= dev->eps_num) { + ret = -16; + goto out_unlock; + } + if (dev->eps[i].state == 0) { /* { dg-bogus "attacker-controlled" } */ + ret = -22; + goto out_unlock; + } + +out_unlock: + return ret; +} + +int __attribute__((tainted_args)) +test_2(struct raw_dev *dev, int i) +{ + int ret = 0; + + if (i < 0 || i >= dev->eps_num) { + ret = -16; + goto out_unlock; + } + if (dev->eps[i].state == 0) { /* { dg-bogus "attacker-controlled" } */ + ret = -22; + goto out_unlock; + } + +out_unlock: + return ret; +} -- cgit v1.1 From e7dfd8744502d6588483ec63ab7f81c2f5940267 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Thu, 21 Jul 2022 00:16:34 +0000 Subject: Daily bump. --- gcc/ChangeLog | 40 ++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/analyzer/ChangeLog | 14 ++++++++++++++ gcc/fortran/ChangeLog | 6 ++++++ gcc/testsuite/ChangeLog | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 107 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f6b5650..9a8bfd0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,43 @@ +2022-07-20 Alexander Monakov + + PR rtl-optimization/101347 + * builtins.cc (expand_builtin) [BUILT_IN_SETJMP_SETUP]: Move + population of nonlocal_goto_handler_labels from here ... + (expand_builtin) [BUILT_IN_SETJMP_RECEIVER]: ... to here. + * rtlanal.cc (remove_node_from_insn_list): Verify that a + duplicate is not present in the remainder of the list. + +2022-07-20 Alexander Monakov + + * rtl.h (remove_node_from_expr_list): Remove declaration. + * rtlanal.cc (remove_node_from_expr_list): Remove (no uses). + +2022-07-20 Richard Biener + + * tree-vect-data-refs.cc (bump_vector_ptr): Return an + invariant updated address when the input was invariant. + +2022-07-20 liuhongt + + * passes.def: (Split pass_cse_sincos to pass_expand_powcabs + and pass_cse_sincos, and move pass_cse_sincos after vectorizer). + * timevar.def (TV_TREE_POWCABS): New timevar. + * tree-pass.h (make_pass_expand_powcabs): Split from pass_cse_sincos. + * tree-ssa-math-opts.cc (gimple_expand_builtin_cabs): Ditto. + (class pass_expand_powcabs): Ditto. + (pass_expand_powcabs::execute): Ditto. + (make_pass_expand_powcabs): Ditto. + (pass_cse_sincos::execute): Remove pow/cabs expand part. + (make_pass_cse_sincos): Ditto. + +2022-07-20 Richard Biener + Hongtao Liu + + PR tree-optimization/106010 + * tree-complex.cc (init_dont_simulate_again): Lower complex + type move. + (expand_complex_move): Also expand COMPLEX_CST for rhs. + 2022-07-19 Andrew MacLeod * gimple-range-cache.cc (ranger_cache::range_from_dom): Check diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 964c55a..ace4919 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20220720 +20220721 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index 884be16..a94593e 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,17 @@ +2022-07-20 David Malcolm + + PR analyzer/106373 + * sm-taint.cc (taint_state_machine::on_condition): Potentially + update the state of the RHS as well as the LHS. + +2022-07-20 David Malcolm + + PR analyzer/106359 + * region.h (string_region::tracked_p): New. + * store.cc (binding_cluster::binding_cluster): Move here from + store.h. Add assertion that base_region is tracked_p. + * store.h (binding_cluster::binding_cluster): Move to store.cc. + 2022-07-19 David Malcolm PR analyzer/106321 diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 9296258..1109cd2 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,9 @@ +2022-07-20 Harald Anlauf + + PR fortran/101330 + * openmp.cc (gfc_match_iterator): Remove left-over code from + development that could lead to a crash on invalid input. + 2022-07-19 Harald Anlauf PR fortran/103590 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d04b83c..44d6ee2 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,49 @@ +2022-07-20 David Malcolm + + PR analyzer/106373 + * gcc.dg/analyzer/torture/taint-read-index-3.c: New test. + +2022-07-20 Harald Anlauf + + PR fortran/101330 + * gfortran.dg/gomp/affinity-clause-7.f90: New test. + +2022-07-20 liuhongt + + * gcc.dg/pow-sqrt-synth-1.c: Adjust testcase. + +2022-07-20 Richard Biener + Hongtao Liu + + * gcc.target/i386/pr106010-1a.c: New test. + * gcc.target/i386/pr106010-1b.c: New test. + * gcc.target/i386/pr106010-1c.c: New test. + * gcc.target/i386/pr106010-2a.c: New test. + * gcc.target/i386/pr106010-2b.c: New test. + * gcc.target/i386/pr106010-2c.c: New test. + * gcc.target/i386/pr106010-3a.c: New test. + * gcc.target/i386/pr106010-3b.c: New test. + * gcc.target/i386/pr106010-3c.c: New test. + * gcc.target/i386/pr106010-4a.c: New test. + * gcc.target/i386/pr106010-4b.c: New test. + * gcc.target/i386/pr106010-4c.c: New test. + * gcc.target/i386/pr106010-5a.c: New test. + * gcc.target/i386/pr106010-5b.c: New test. + * gcc.target/i386/pr106010-5c.c: New test. + * gcc.target/i386/pr106010-6a.c: New test. + * gcc.target/i386/pr106010-6b.c: New test. + * gcc.target/i386/pr106010-6c.c: New test. + * gcc.target/i386/pr106010-7a.c: New test. + * gcc.target/i386/pr106010-7b.c: New test. + * gcc.target/i386/pr106010-7c.c: New test. + * gcc.target/i386/pr106010-8a.c: New test. + * gcc.target/i386/pr106010-8b.c: New test. + * gcc.target/i386/pr106010-8c.c: New test. + * gcc.target/i386/pr106010-9a.c: New test. + * gcc.target/i386/pr106010-9b.c: New test. + * gcc.target/i386/pr106010-9c.c: New test. + * gcc.target/i386/pr106010-9d.c: New test. + 2022-07-19 Harald Anlauf PR fortran/103590 -- cgit v1.1 From a6c192e80a87efbe6c0641f25a963c7bee9990fb Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Wed, 20 Jul 2022 21:34:03 -0400 Subject: analyzer: fix ICE on untracked decl_regions [PR106374] gcc/analyzer/ChangeLog: PR analyzer/106374 * region.cc (decl_region::get_svalue_for_initializer): Bail out on untracked regions. gcc/testsuite/ChangeLog: PR analyzer/106374 * gcc.dg/analyzer/untracked-2.c: New test. Signed-off-by: David Malcolm --- gcc/analyzer/region.cc | 5 +++++ gcc/testsuite/gcc.dg/analyzer/untracked-2.c | 7 +++++++ 2 files changed, 12 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/analyzer/untracked-2.c (limited to 'gcc') diff --git a/gcc/analyzer/region.cc b/gcc/analyzer/region.cc index a8d1ae9..b78bf4e 100644 --- a/gcc/analyzer/region.cc +++ b/gcc/analyzer/region.cc @@ -1152,6 +1152,11 @@ decl_region::get_svalue_for_initializer (region_model_manager *mgr) const if (binding->symbolic_p ()) return NULL; + /* If we don't care about tracking the content of this region, then + it's unused, and the value doesn't matter. */ + if (!tracked_p ()) + return NULL; + binding_cluster c (this); c.zero_fill_region (mgr->get_store_manager (), this); return mgr->get_or_create_compound_svalue (TREE_TYPE (m_decl), diff --git a/gcc/testsuite/gcc.dg/analyzer/untracked-2.c b/gcc/testsuite/gcc.dg/analyzer/untracked-2.c new file mode 100644 index 0000000..565a9cc --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/untracked-2.c @@ -0,0 +1,7 @@ +typedef unsigned char u8; +extern int foo(const u8 *key, unsigned int keylen); +int test (void) +{ + static const u8 default_salt[64]; + return foo(default_salt, 64); +} -- cgit v1.1 From 742377ed0f09313503a1c5393c4f742d69249521 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Wed, 20 Jul 2022 21:34:17 -0400 Subject: analyzer: bulletproof taint warnings against NULL m_arg gcc/analyzer/ChangeLog: * sm-taint.cc (tainted_array_index::emit): Bulletproof against NULL m_arg. (tainted_array_index::describe_final_event): Likewise. (tainted_size::emit): Likewise. (tainted_size::describe_final_event): Likewise. Signed-off-by: David Malcolm --- gcc/analyzer/sm-taint.cc | 247 +++++++++++++++++++++++++++++++---------------- 1 file changed, 164 insertions(+), 83 deletions(-) (limited to 'gcc') diff --git a/gcc/analyzer/sm-taint.cc b/gcc/analyzer/sm-taint.cc index 0486c01..51bfe06 100644 --- a/gcc/analyzer/sm-taint.cc +++ b/gcc/analyzer/sm-taint.cc @@ -212,53 +212,96 @@ public: diagnostic_metadata m; /* CWE-129: "Improper Validation of Array Index". */ m.add_cwe (129); - switch (m_has_bounds) - { - default: - gcc_unreachable (); - case BOUNDS_NONE: - return warning_meta (rich_loc, m, get_controlling_option (), - "use of attacker-controlled value %qE" - " in array lookup without bounds checking", - m_arg); - break; - case BOUNDS_UPPER: - return warning_meta (rich_loc, m, get_controlling_option (), - "use of attacker-controlled value %qE" - " in array lookup without checking for negative", - m_arg); - break; - case BOUNDS_LOWER: - return warning_meta (rich_loc, m, get_controlling_option (), - "use of attacker-controlled value %qE" - " in array lookup without upper-bounds checking", - m_arg); - break; - } + if (m_arg) + switch (m_has_bounds) + { + default: + gcc_unreachable (); + case BOUNDS_NONE: + return warning_meta (rich_loc, m, get_controlling_option (), + "use of attacker-controlled value %qE" + " in array lookup without bounds checking", + m_arg); + break; + case BOUNDS_UPPER: + return warning_meta (rich_loc, m, get_controlling_option (), + "use of attacker-controlled value %qE" + " in array lookup without checking for negative", + m_arg); + break; + case BOUNDS_LOWER: + return warning_meta (rich_loc, m, get_controlling_option (), + "use of attacker-controlled value %qE" + " in array lookup without upper-bounds checking", + m_arg); + break; + } + else + switch (m_has_bounds) + { + default: + gcc_unreachable (); + case BOUNDS_NONE: + return warning_meta (rich_loc, m, get_controlling_option (), + "use of attacker-controlled value" + " in array lookup without bounds checking"); + break; + case BOUNDS_UPPER: + return warning_meta (rich_loc, m, get_controlling_option (), + "use of attacker-controlled value" + " in array lookup without checking for" + " negative"); + break; + case BOUNDS_LOWER: + return warning_meta (rich_loc, m, get_controlling_option (), + "use of attacker-controlled value" + " in array lookup without upper-bounds" + " checking"); + break; + } } label_text describe_final_event (const evdesc::final_event &ev) final override { - switch (m_has_bounds) - { - default: - gcc_unreachable (); - case BOUNDS_NONE: - return ev.formatted_print - ("use of attacker-controlled value %qE in array lookup" - " without bounds checking", - m_arg); - case BOUNDS_UPPER: - return ev.formatted_print - ("use of attacker-controlled value %qE" - " in array lookup without checking for negative", - m_arg); - case BOUNDS_LOWER: - return ev.formatted_print - ("use of attacker-controlled value %qE" - " in array lookup without upper-bounds checking", - m_arg); - } + if (m_arg) + switch (m_has_bounds) + { + default: + gcc_unreachable (); + case BOUNDS_NONE: + return ev.formatted_print + ("use of attacker-controlled value %qE in array lookup" + " without bounds checking", + m_arg); + case BOUNDS_UPPER: + return ev.formatted_print + ("use of attacker-controlled value %qE" + " in array lookup without checking for negative", + m_arg); + case BOUNDS_LOWER: + return ev.formatted_print + ("use of attacker-controlled value %qE" + " in array lookup without upper-bounds checking", + m_arg); + } + else + switch (m_has_bounds) + { + default: + gcc_unreachable (); + case BOUNDS_NONE: + return ev.formatted_print + ("use of attacker-controlled value in array lookup" + " without bounds checking"); + case BOUNDS_UPPER: + return ev.formatted_print + ("use of attacker-controlled value" + " in array lookup without checking for negative"); + case BOUNDS_LOWER: + return ev.formatted_print + ("use of attacker-controlled value" + " in array lookup without upper-bounds checking"); + } } }; @@ -394,50 +437,88 @@ public: { diagnostic_metadata m; m.add_cwe (129); - switch (m_has_bounds) - { - default: - gcc_unreachable (); - case BOUNDS_NONE: - return warning_meta (rich_loc, m, get_controlling_option (), - "use of attacker-controlled value %qE as size" - " without bounds checking", - m_arg); - break; - case BOUNDS_UPPER: - return warning_meta (rich_loc, m, get_controlling_option (), - "use of attacker-controlled value %qE as size" - " without lower-bounds checking", - m_arg); - break; - case BOUNDS_LOWER: - return warning_meta (rich_loc, m, get_controlling_option (), - "use of attacker-controlled value %qE as size" - " without upper-bounds checking", - m_arg); - break; - } + if (m_arg) + switch (m_has_bounds) + { + default: + gcc_unreachable (); + case BOUNDS_NONE: + return warning_meta (rich_loc, m, get_controlling_option (), + "use of attacker-controlled value %qE as size" + " without bounds checking", + m_arg); + break; + case BOUNDS_UPPER: + return warning_meta (rich_loc, m, get_controlling_option (), + "use of attacker-controlled value %qE as size" + " without lower-bounds checking", + m_arg); + break; + case BOUNDS_LOWER: + return warning_meta (rich_loc, m, get_controlling_option (), + "use of attacker-controlled value %qE as size" + " without upper-bounds checking", + m_arg); + break; + } + else + switch (m_has_bounds) + { + default: + gcc_unreachable (); + case BOUNDS_NONE: + return warning_meta (rich_loc, m, get_controlling_option (), + "use of attacker-controlled value as size" + " without bounds checking"); + break; + case BOUNDS_UPPER: + return warning_meta (rich_loc, m, get_controlling_option (), + "use of attacker-controlled value as size" + " without lower-bounds checking"); + break; + case BOUNDS_LOWER: + return warning_meta (rich_loc, m, get_controlling_option (), + "use of attacker-controlled value as size" + " without upper-bounds checking"); + break; + } } label_text describe_final_event (const evdesc::final_event &ev) final override { - switch (m_has_bounds) - { - default: - gcc_unreachable (); - case BOUNDS_NONE: - return ev.formatted_print ("use of attacker-controlled value %qE" - " as size without bounds checking", - m_arg); - case BOUNDS_UPPER: - return ev.formatted_print ("use of attacker-controlled value %qE" - " as size without lower-bounds checking", - m_arg); - case BOUNDS_LOWER: - return ev.formatted_print ("use of attacker-controlled value %qE" - " as size without upper-bounds checking", - m_arg); - } + if (m_arg) + switch (m_has_bounds) + { + default: + gcc_unreachable (); + case BOUNDS_NONE: + return ev.formatted_print ("use of attacker-controlled value %qE" + " as size without bounds checking", + m_arg); + case BOUNDS_UPPER: + return ev.formatted_print ("use of attacker-controlled value %qE" + " as size without lower-bounds checking", + m_arg); + case BOUNDS_LOWER: + return ev.formatted_print ("use of attacker-controlled value %qE" + " as size without upper-bounds checking", + m_arg); + } + else + switch (m_has_bounds) + { + default: + gcc_unreachable (); + case BOUNDS_NONE: + return ev.formatted_print ("use of attacker-controlled value" + " as size without bounds checking"); + case BOUNDS_UPPER: + return ev.formatted_print ("use of attacker-controlled value" + " as size without lower-bounds checking"); + case BOUNDS_LOWER: + return ev.formatted_print ("use of attacker-controlled value" + " as size without upper-bounds checking"); + } } }; -- cgit v1.1 From 6877993c4da49315151e9d912408480070144dd3 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 20 Jul 2022 15:46:17 +0200 Subject: Add alias disambiguation for vectorizer load/store IFNs The following adds support for MASK_STORE, MASK_LOAD and friends to call_may_clobber_ref_p and ref_maybe_used_by_call_p. Since they all use a special argument to specify TBAA they are not really suited for fnspec handling thus the manual support. * tree-ssa-alias.cc (ref_maybe_used_by_call_p_1): Special-case store internal functions and IFN_MASK_LOAD, IFN_LEN_LOAD and IFN_MASK_LOAD_LANES. (call_may_clobber_ref_p_1): Special-case IFN_MASK_STORE, IFN_LEN_STORE and IFN_MASK_STORE_LANES. --- gcc/tree-ssa-alias.cc | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-ssa-alias.cc b/gcc/tree-ssa-alias.cc index 782266b..390cd87 100644 --- a/gcc/tree-ssa-alias.cc +++ b/gcc/tree-ssa-alias.cc @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. If not see #include "print-tree.h" #include "tree-ssa-alias-compare.h" #include "builtins.h" +#include "internal-fn.h" /* Broad overview of how alias analysis on gimple works: @@ -2793,8 +2794,38 @@ ref_maybe_used_by_call_p_1 (gcall *call, ao_ref *ref, bool tbaa_p) if (ref->volatile_p) return true; - callee = gimple_call_fndecl (call); + if (gimple_call_internal_p (call)) + switch (gimple_call_internal_fn (call)) + { + case IFN_MASK_STORE: + case IFN_SCATTER_STORE: + case IFN_MASK_SCATTER_STORE: + case IFN_LEN_STORE: + return false; + case IFN_MASK_STORE_LANES: + goto process_args; + case IFN_MASK_LOAD: + case IFN_LEN_LOAD: + case IFN_MASK_LOAD_LANES: + { + ao_ref rhs_ref; + tree lhs = gimple_call_lhs (call); + if (lhs) + { + ao_ref_init_from_ptr_and_size (&rhs_ref, + gimple_call_arg (call, 0), + TYPE_SIZE_UNIT (TREE_TYPE (lhs))); + rhs_ref.ref_alias_set = rhs_ref.base_alias_set + = tbaa_p ? get_deref_alias_set (TREE_TYPE + (gimple_call_arg (call, 1))) : 0; + return refs_may_alias_p_1 (ref, &rhs_ref, tbaa_p); + } + break; + } + default:; + } + callee = gimple_call_fndecl (call); if (callee != NULL_TREE) { struct cgraph_node *node = cgraph_node::get (callee); @@ -3005,7 +3036,7 @@ call_may_clobber_ref_p_1 (gcall *call, ao_ref *ref, bool tbaa_p) & (ECF_PURE|ECF_CONST|ECF_LOOPING_CONST_OR_PURE|ECF_NOVOPS)) return false; if (gimple_call_internal_p (call)) - switch (gimple_call_internal_fn (call)) + switch (auto fn = gimple_call_internal_fn (call)) { /* Treat these internal calls like ECF_PURE for aliasing, they don't write to any memory the program should care about. @@ -3018,6 +3049,20 @@ call_may_clobber_ref_p_1 (gcall *call, ao_ref *ref, bool tbaa_p) case IFN_UBSAN_PTR: case IFN_ASAN_CHECK: return false; + case IFN_MASK_STORE: + case IFN_LEN_STORE: + case IFN_MASK_STORE_LANES: + { + tree rhs = gimple_call_arg (call, + internal_fn_stored_value_index (fn)); + ao_ref lhs_ref; + ao_ref_init_from_ptr_and_size (&lhs_ref, gimple_call_arg (call, 0), + TYPE_SIZE_UNIT (TREE_TYPE (rhs))); + lhs_ref.ref_alias_set = lhs_ref.base_alias_set + = tbaa_p ? get_deref_alias_set + (TREE_TYPE (gimple_call_arg (call, 1))) : 0; + return refs_may_alias_p_1 (ref, &lhs_ref, tbaa_p); + } default: break; } -- cgit v1.1 From bd9837bc3ca1344c32aef7ba9f8fa1785063132e Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 20 Jul 2022 12:28:26 +0200 Subject: Teach VN about masked/len stores The following teaches VN to handle reads from .MASK_STORE and .LEN_STORE. For this push_partial_def is extended first for convenience so we don't have to handle the full def case in the caller (possibly other paths can be simplified then). Also the partial definition stored value can have an offset applied so we don't have to build a fake RHS when we register the pieces of an existing store. PR tree-optimization/106365 * tree-ssa-sccvn.cc (pd_data::rhs_off): New field determining the offset to start encoding of RHS from. (vn_walk_cb_data::vn_walk_cb_data): Initialize it. (vn_walk_cb_data::push_partial_def): Allow the first partial definition to be fully providing the def. Offset RHS before encoding if requested. (vn_reference_lookup_3): Initialize def_rhs everywhere. Add support for .MASK_STORE and .LEN_STORE (partial) definitions. * gcc.target/i386/vec-maskstore-vn.c: New testcase. --- gcc/testsuite/gcc.target/i386/vec-maskstore-vn.c | 30 +++ gcc/tree-ssa-sccvn.cc | 255 ++++++++++++++++++----- 2 files changed, 228 insertions(+), 57 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/vec-maskstore-vn.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/i386/vec-maskstore-vn.c b/gcc/testsuite/gcc.target/i386/vec-maskstore-vn.c new file mode 100644 index 0000000..9821390 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vec-maskstore-vn.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx2 -fdump-tree-fre5" } */ + +void __attribute__((noinline,noclone)) +foo (int *out, int *res) +{ + int mask[] = { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }; + int i; + for (i = 0; i < 16; ++i) + { + if (mask[i]) + out[i] = i; + } + int o0 = out[0]; + int o7 = out[7]; + int o14 = out[14]; + int o15 = out[15]; + res[0] = o0; + res[2] = o7; + res[4] = o14; + res[6] = o15; +} + +/* Vectorization produces .MASK_STORE, unrolling will unroll the two + vector iterations. FRE5 after that should be able to CSE + out[7] and out[15], but leave out[0] and out[14] alone. */ +/* { dg-final { scan-tree-dump " = o0_\[0-9\]+;" "fre5" } } */ +/* { dg-final { scan-tree-dump " = 7;" "fre5" } } */ +/* { dg-final { scan-tree-dump " = o14_\[0-9\]+;" "fre5" } } */ +/* { dg-final { scan-tree-dump " = 15;" "fre5" } } */ diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc index f41d503..7d947b5 100644 --- a/gcc/tree-ssa-sccvn.cc +++ b/gcc/tree-ssa-sccvn.cc @@ -1790,6 +1790,7 @@ struct pd_range struct pd_data { tree rhs; + HOST_WIDE_INT rhs_off; HOST_WIDE_INT offset; HOST_WIDE_INT size; }; @@ -1816,6 +1817,7 @@ struct vn_walk_cb_data unsigned int pos = 0, prec = w.get_precision (); pd_data pd; pd.rhs = build_constructor (NULL_TREE, NULL); + pd.rhs_off = 0; /* When bitwise and with a constant is done on a memory load, we don't really need all the bits to be defined or defined to constants, we don't really care what is in the position @@ -1976,6 +1978,7 @@ vn_walk_cb_data::push_partial_def (pd_data pd, bool pd_constant_p = (TREE_CODE (pd.rhs) == CONSTRUCTOR || CONSTANT_CLASS_P (pd.rhs)); + pd_range *r; if (partial_defs.is_empty ()) { /* If we get a clobber upfront, fail. */ @@ -1989,65 +1992,70 @@ vn_walk_cb_data::push_partial_def (pd_data pd, first_set = set; first_base_set = base_set; last_vuse_ptr = NULL; - /* Continue looking for partial defs. */ - return NULL; - } - - if (!known_ranges) - { - /* ??? Optimize the case where the 2nd partial def completes things. */ - gcc_obstack_init (&ranges_obstack); - known_ranges = splay_tree_new_with_allocator (pd_range_compare, 0, 0, - pd_tree_alloc, - pd_tree_dealloc, this); - splay_tree_insert (known_ranges, - (splay_tree_key)&first_range.offset, - (splay_tree_value)&first_range); - } - - pd_range newr = { pd.offset, pd.size }; - splay_tree_node n; - pd_range *r; - /* Lookup the predecessor of offset + 1 and see if we need to merge. */ - HOST_WIDE_INT loffset = newr.offset + 1; - if ((n = splay_tree_predecessor (known_ranges, (splay_tree_key)&loffset)) - && ((r = (pd_range *)n->value), true) - && ranges_known_overlap_p (r->offset, r->size + 1, - newr.offset, newr.size)) - { - /* Ignore partial defs already covered. Here we also drop shadowed - clobbers arriving here at the floor. */ - if (known_subrange_p (newr.offset, newr.size, r->offset, r->size)) - return NULL; - r->size = MAX (r->offset + r->size, newr.offset + newr.size) - r->offset; + r = &first_range; + /* Go check if the first partial definition was a full one in case + the caller didn't optimize for this. */ } else { - /* newr.offset wasn't covered yet, insert the range. */ - r = XOBNEW (&ranges_obstack, pd_range); - *r = newr; - splay_tree_insert (known_ranges, (splay_tree_key)&r->offset, - (splay_tree_value)r); - } - /* Merge r which now contains newr and is a member of the splay tree with - adjacent overlapping ranges. */ - pd_range *rafter; - while ((n = splay_tree_successor (known_ranges, (splay_tree_key)&r->offset)) - && ((rafter = (pd_range *)n->value), true) - && ranges_known_overlap_p (r->offset, r->size + 1, - rafter->offset, rafter->size)) - { - r->size = MAX (r->offset + r->size, - rafter->offset + rafter->size) - r->offset; - splay_tree_remove (known_ranges, (splay_tree_key)&rafter->offset); - } - /* If we get a clobber, fail. */ - if (TREE_CLOBBER_P (pd.rhs)) - return (void *)-1; - /* Non-constants are OK as long as they are shadowed by a constant. */ - if (!pd_constant_p) - return (void *)-1; - partial_defs.safe_push (pd); + if (!known_ranges) + { + /* ??? Optimize the case where the 2nd partial def completes + things. */ + gcc_obstack_init (&ranges_obstack); + known_ranges = splay_tree_new_with_allocator (pd_range_compare, 0, 0, + pd_tree_alloc, + pd_tree_dealloc, this); + splay_tree_insert (known_ranges, + (splay_tree_key)&first_range.offset, + (splay_tree_value)&first_range); + } + + pd_range newr = { pd.offset, pd.size }; + splay_tree_node n; + /* Lookup the predecessor of offset + 1 and see if we need to merge. */ + HOST_WIDE_INT loffset = newr.offset + 1; + if ((n = splay_tree_predecessor (known_ranges, (splay_tree_key)&loffset)) + && ((r = (pd_range *)n->value), true) + && ranges_known_overlap_p (r->offset, r->size + 1, + newr.offset, newr.size)) + { + /* Ignore partial defs already covered. Here we also drop shadowed + clobbers arriving here at the floor. */ + if (known_subrange_p (newr.offset, newr.size, r->offset, r->size)) + return NULL; + r->size + = MAX (r->offset + r->size, newr.offset + newr.size) - r->offset; + } + else + { + /* newr.offset wasn't covered yet, insert the range. */ + r = XOBNEW (&ranges_obstack, pd_range); + *r = newr; + splay_tree_insert (known_ranges, (splay_tree_key)&r->offset, + (splay_tree_value)r); + } + /* Merge r which now contains newr and is a member of the splay tree with + adjacent overlapping ranges. */ + pd_range *rafter; + while ((n = splay_tree_successor (known_ranges, + (splay_tree_key)&r->offset)) + && ((rafter = (pd_range *)n->value), true) + && ranges_known_overlap_p (r->offset, r->size + 1, + rafter->offset, rafter->size)) + { + r->size = MAX (r->offset + r->size, + rafter->offset + rafter->size) - r->offset; + splay_tree_remove (known_ranges, (splay_tree_key)&rafter->offset); + } + /* If we get a clobber, fail. */ + if (TREE_CLOBBER_P (pd.rhs)) + return (void *)-1; + /* Non-constants are OK as long as they are shadowed by a constant. */ + if (!pd_constant_p) + return (void *)-1; + partial_defs.safe_push (pd); + } /* Now we have merged newr into the range tree. When we have covered [offseti, sizei] then the tree will contain exactly one node which has @@ -2081,7 +2089,8 @@ vn_walk_cb_data::push_partial_def (pd_data pd, else { len = native_encode_expr (pd.rhs, this_buffer, bufsize, - MAX (0, -pd.offset) / BITS_PER_UNIT); + (MAX (0, -pd.offset) + + pd.rhs_off) / BITS_PER_UNIT); if (len <= 0 || len < (ROUND_UP (pd.size, BITS_PER_UNIT) / BITS_PER_UNIT - MAX (0, -pd.offset) / BITS_PER_UNIT)) @@ -2906,6 +2915,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, { pd_data pd; pd.rhs = build_constructor (NULL_TREE, NULL); + pd.rhs_off = 0; pd.offset = offset2i; pd.size = leni << LOG2_BITS_PER_UNIT; return data->push_partial_def (pd, 0, 0, offseti, maxsizei); @@ -2955,6 +2965,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, by a later def. */ pd_data pd; pd.rhs = gimple_assign_rhs1 (def_stmt); + pd.rhs_off = 0; pd.offset = offset2i; pd.size = size2i; return data->push_partial_def (pd, ao_ref_alias_set (&lhs_ref), @@ -3107,6 +3118,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, if (TREE_CODE (rhs) == SSA_NAME) rhs = SSA_VAL (rhs); pd.rhs = rhs; + pd.rhs_off = 0; pd.offset = offset2i; pd.size = size2i; return data->push_partial_def (pd, ao_ref_alias_set (&lhs_ref), @@ -3186,6 +3198,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, { pd_data pd; pd.rhs = SSA_VAL (def_rhs); + pd.rhs_off = 0; pd.offset = offset2i; pd.size = size2i; return data->push_partial_def (pd, ao_ref_alias_set (&lhs_ref), @@ -3195,6 +3208,133 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, } } + /* 4b) Assignment done via one of the vectorizer internal store + functions where we may be able to access pieces from or we can + combine to a larger entity. */ + else if (known_eq (ref->size, maxsize) + && is_gimple_reg_type (vr->type) + && !reverse_storage_order_for_component_p (vr->operands) + && !contains_storage_order_barrier_p (vr->operands) + && is_gimple_call (def_stmt) + && gimple_call_internal_p (def_stmt) + && internal_store_fn_p (gimple_call_internal_fn (def_stmt))) + { + gcall *call = as_a (def_stmt); + internal_fn fn = gimple_call_internal_fn (call); + tree def_rhs = gimple_call_arg (call, + internal_fn_stored_value_index (fn)); + def_rhs = vn_valueize (def_rhs); + if (TREE_CODE (def_rhs) != VECTOR_CST) + return (void *)-1; + + tree mask = NULL_TREE, len = NULL_TREE, bias = NULL_TREE; + switch (fn) + { + case IFN_MASK_STORE: + mask = gimple_call_arg (call, internal_fn_mask_index (fn)); + mask = vn_valueize (mask); + if (TREE_CODE (mask) != VECTOR_CST) + return (void *)-1; + break; + case IFN_LEN_STORE: + len = gimple_call_arg (call, 2); + bias = gimple_call_arg (call, 4); + if (!tree_fits_uhwi_p (len) || !tree_fits_shwi_p (bias)) + return (void *)-1; + break; + default: + return (void *)-1; + } + ao_ref_init_from_ptr_and_size (&lhs_ref, + vn_valueize (gimple_call_arg (call, 0)), + TYPE_SIZE_UNIT (TREE_TYPE (def_rhs))); + tree base2; + poly_int64 offset2, size2, maxsize2; + HOST_WIDE_INT offset2i, size2i, offseti; + base2 = ao_ref_base (&lhs_ref); + offset2 = lhs_ref.offset; + size2 = lhs_ref.size; + maxsize2 = lhs_ref.max_size; + if (known_size_p (maxsize2) + && known_eq (maxsize2, size2) + && adjust_offsets_for_equal_base_address (base, &offset, + base2, &offset2) + && maxsize.is_constant (&maxsizei) + && offset.is_constant (&offseti) + && offset2.is_constant (&offset2i) + && size2.is_constant (&size2i)) + { + if (!ranges_maybe_overlap_p (offset, maxsize, offset2, size2)) + /* Poor-mans disambiguation. */ + return NULL; + else if (ranges_known_overlap_p (offset, maxsize, offset2, size2)) + { + pd_data pd; + pd.rhs = def_rhs; + tree aa = gimple_call_arg (call, 1); + alias_set_type set = get_deref_alias_set (TREE_TYPE (aa)); + tree vectype = TREE_TYPE (def_rhs); + unsigned HOST_WIDE_INT elsz + = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))); + if (mask) + { + HOST_WIDE_INT start = 0, len = 0; + unsigned mask_idx = 0; + do + { + if (integer_zerop (VECTOR_CST_ELT (mask, mask_idx))) + { + if (len != 0) + { + pd.rhs_off = start; + pd.offset = offset2i + start; + pd.size = len; + if (ranges_known_overlap_p + (offset, maxsize, pd.offset, pd.size)) + { + void *res = data->push_partial_def + (pd, set, set, offseti, maxsizei); + if (res != NULL) + return res; + } + } + start = (mask_idx + 1) * elsz; + len = 0; + } + else + len += elsz; + mask_idx++; + } + while (known_lt (mask_idx, TYPE_VECTOR_SUBPARTS (vectype))); + if (len != 0) + { + pd.rhs_off = start; + pd.offset = offset2i + start; + pd.size = len; + if (ranges_known_overlap_p (offset, maxsize, + pd.offset, pd.size)) + return data->push_partial_def (pd, set, set, + offseti, maxsizei); + } + } + else if (fn == IFN_LEN_STORE) + { + pd.rhs_off = 0; + pd.offset = offset2i; + pd.size = (tree_to_uhwi (len) + + -tree_to_shwi (bias)) * BITS_PER_UNIT; + if (ranges_known_overlap_p (offset, maxsize, + pd.offset, pd.size)) + return data->push_partial_def (pd, set, set, + offseti, maxsizei); + } + else + gcc_unreachable (); + return NULL; + } + } + } + /* 5) For aggregate copies translate the reference through them if the copy kills ref. */ else if (data->vn_walk_kind == VN_WALKREWRITE @@ -3327,6 +3467,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, { pd_data pd; pd.rhs = val; + pd.rhs_off = 0; pd.offset = 0; pd.size = maxsizei; return data->push_partial_def (pd, ao_ref_alias_set (&lhs_ref), -- cgit v1.1 From dc477ffb4aba21e9cf47de22a4df6f2b23849505 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 21 Jul 2022 10:13:46 +0200 Subject: tree-optimization/106378 - DSE of LEN_STORE and MASK_STORE The following enhances DSE to handle LEN_STORE (optimally) and MASK_STORE (conservatively). PR tree-optimization/106378 * tree-ssa-dse.cc (initialize_ao_ref_for_dse): Handle LEN_STORE, add mode to initialize a may-def and handle MASK_STORE that way. (dse_optimize_stmt): Query may-defs. Handle internal functions LEN_STORE and MASK_STORE similar to how we handle memory builtins but without byte tracking. --- gcc/tree-ssa-dse.cc | 55 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-ssa-dse.cc b/gcc/tree-ssa-dse.cc index 8d1739a..34cfd1a 100644 --- a/gcc/tree-ssa-dse.cc +++ b/gcc/tree-ssa-dse.cc @@ -93,7 +93,9 @@ static bitmap need_eh_cleanup; static bitmap need_ab_cleanup; /* STMT is a statement that may write into memory. Analyze it and - initialize WRITE to describe how STMT affects memory. + initialize WRITE to describe how STMT affects memory. When + MAY_DEF_OK is true then the function initializes WRITE to what + the stmt may define. Return TRUE if the statement was analyzed, FALSE otherwise. @@ -101,7 +103,7 @@ static bitmap need_ab_cleanup; can be achieved by analyzing more statements. */ static bool -initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write) +initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write, bool may_def_ok = false) { /* It's advantageous to handle certain mem* functions. */ if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) @@ -146,6 +148,32 @@ initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write) break; } } + else if (is_gimple_call (stmt) + && gimple_call_internal_p (stmt)) + { + switch (gimple_call_internal_fn (stmt)) + { + case IFN_LEN_STORE: + ao_ref_init_from_ptr_and_size + (write, gimple_call_arg (stmt, 0), + int_const_binop (MINUS_EXPR, + gimple_call_arg (stmt, 2), + gimple_call_arg (stmt, 4))); + return true; + case IFN_MASK_STORE: + /* We cannot initialize a must-def ao_ref (in all cases) but we + can provide a may-def variant. */ + if (may_def_ok) + { + ao_ref_init_from_ptr_and_size + (write, gimple_call_arg (stmt, 0), + TYPE_SIZE_UNIT (TREE_TYPE (gimple_call_arg (stmt, 2)))); + return true; + } + break; + default:; + } + } else if (tree lhs = gimple_get_lhs (stmt)) { if (TREE_CODE (lhs) != SSA_NAME) @@ -1328,8 +1356,10 @@ dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes) ao_ref ref; /* If this is not a store we can still remove dead call using - modref summary. */ - if (!initialize_ao_ref_for_dse (stmt, &ref)) + modref summary. Note we specifically allow ref to be initialized + to a conservative may-def since we are looking for followup stores + to kill all of it. */ + if (!initialize_ao_ref_for_dse (stmt, &ref, true)) { dse_optimize_call (gsi, live_bytes); return; @@ -1398,6 +1428,23 @@ dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes) return; } } + else if (is_gimple_call (stmt) + && gimple_call_internal_p (stmt)) + { + switch (gimple_call_internal_fn (stmt)) + { + case IFN_LEN_STORE: + case IFN_MASK_STORE: + { + enum dse_store_status store_status; + store_status = dse_classify_store (&ref, stmt, false, live_bytes); + if (store_status == DSE_STORE_DEAD) + delete_dead_or_redundant_call (gsi, "dead"); + return; + } + default:; + } + } bool by_clobber_p = false; -- cgit v1.1 From 375668e0508fbe173af1ed519d8ae2b79f388d94 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 21 Jul 2022 13:20:47 +0200 Subject: tree-optimization/106379 - add missing ~(a ^ b) folding for _Bool The following makes sure to fold ~(a ^ b) to a == b for truth values (but not vectors, we'd have to check for vector support of equality). That turns the PR106379 testcase into a ranger one. Note that while we arrive at ~(a ^ b) in a convoluted way from original !a == !b one can eventually write the expression this way directly as well. PR tree-optimization/106379 * match.pd (~(a ^ b) -> a == b): New pattern. * gcc.dg/pr106379-1.c: New testcase. --- gcc/match.pd | 6 ++++++ gcc/testsuite/gcc.dg/pr106379-1.c | 9 +++++++++ 2 files changed, 15 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/pr106379-1.c (limited to 'gcc') diff --git a/gcc/match.pd b/gcc/match.pd index 8bbc0db..88a1a5a 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -1938,6 +1938,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (tree_nop_conversion_p (type, TREE_TYPE (@0))) (bit_not (bit_xor (view_convert @0) @1)))) +/* ~(a ^ b) is a == b for truth valued a and b. */ +(simplify + (bit_not (bit_xor:s truth_valued_p@0 truth_valued_p@1)) + (if (!VECTOR_TYPE_P (type)) + (convert (eq @0 @1)))) + /* (x & ~m) | (y & m) -> ((x ^ y) & m) ^ x */ (simplify (bit_ior:c (bit_and:cs @0 (bit_not @2)) (bit_and:cs @1 @2)) diff --git a/gcc/testsuite/gcc.dg/pr106379-1.c b/gcc/testsuite/gcc.dg/pr106379-1.c new file mode 100644 index 0000000..7f2575e --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr106379-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-forwprop1" } */ + +_Bool foo (_Bool a, _Bool b) +{ + return !a == !b; +} + +/* { dg-final { scan-tree-dump "\[ab\]_\[0-9\]+\\(D\\) == \[ba\]_\[0-9\]+\\(D\\)" "forwprop1" } } */ -- cgit v1.1 From 9a52d6871a268a6a1fc3d0a3660120c44ee94951 Mon Sep 17 00:00:00 2001 From: Prathamesh Kulkarni Date: Thu, 21 Jul 2022 17:06:03 +0530 Subject: Revert "forwprop: Use lhs type instead of arg0 in folding VEC_PERM_EXPR." This reverts commit 4c3231302577445417715a7c22e879e4159376d3. gcc/ChangeLog: Revert: * tree-ssa-forwprop.cc (simplify_permutation): Use lhs type instead of TREE_TYPE (arg0) as result type in folding VEC_PERM_EXPR. --- gcc/tree-ssa-forwprop.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index d04cf4b..fdc4bc8 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -2661,7 +2661,7 @@ simplify_permutation (gimple_stmt_iterator *gsi) /* Shuffle of a constructor. */ bool ret = false; - tree res_type = TREE_TYPE (gimple_assign_lhs (stmt)); + tree res_type = TREE_TYPE (arg0); tree opt = fold_ternary (VEC_PERM_EXPR, res_type, arg0, arg1, op2); if (!opt || (TREE_CODE (opt) != CONSTRUCTOR && TREE_CODE (opt) != VECTOR_CST)) -- cgit v1.1 From 24eae97625e9423e7344f6d7eb6bc2435a62fffd Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Thu, 21 Jul 2022 16:11:23 +0200 Subject: docs: remove trailing dots for 2 Fortran fns gcc/fortran/ChangeLog: * intrinsic.texi: Remove trailing dots for 2 Fortran fns. --- gcc/fortran/intrinsic.texi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/fortran/intrinsic.texi b/gcc/fortran/intrinsic.texi index e3cd827..55f53fc 100644 --- a/gcc/fortran/intrinsic.texi +++ b/gcc/fortran/intrinsic.texi @@ -316,7 +316,7 @@ Some basic guidelines for editing this document: * @code{TRANSFER}: TRANSFER, Transfer bit patterns * @code{TRANSPOSE}: TRANSPOSE, Transpose an array of rank two * @code{TRIM}: TRIM, Remove trailing blank characters of a string -* @code{TTYNAM}: TTYNAM, Get the name of a terminal device. +* @code{TTYNAM}: TTYNAM, Get the name of a terminal device * @code{UBOUND}: UBOUND, Upper dimension bounds of an array * @code{UCOBOUND}: UCOBOUND, Upper codimension bounds of an array * @code{UMASK}: UMASK, Set the file creation mask @@ -8750,7 +8750,7 @@ END PROGRAM @node ISATTY -@section @code{ISATTY} --- Whether a unit is a terminal device. +@section @code{ISATTY} --- Whether a unit is a terminal device @fnindex ISATTY @cindex system, terminal @@ -14613,7 +14613,7 @@ END PROGRAM @node TTYNAM -@section @code{TTYNAM} --- Get the name of a terminal device. +@section @code{TTYNAM} --- Get the name of a terminal device @fnindex TTYNAM @cindex system, terminal -- cgit v1.1