aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2021-01-05 16:16:06 +0100
committerJakub Jelinek <jakub@redhat.com>2021-01-05 16:16:06 +0100
commita7553ad60bebc419d510564b8b9f9e5e03725ff5 (patch)
tree95629feb493724078551a23567e7dbbb69fc61e3 /gcc
parentf70289378721d970ea1cfc39a30565f88cedc788 (diff)
downloadgcc-a7553ad60bebc419d510564b8b9f9e5e03725ff5.zip
gcc-a7553ad60bebc419d510564b8b9f9e5e03725ff5.tar.gz
gcc-a7553ad60bebc419d510564b8b9f9e5e03725ff5.tar.bz2
store-merging: Handle vector CONSTRUCTORs using bswap [PR96239]
I've tried to add such helper, but handling over just analysis and letting each pass handle it differently seems complicated given the limitations of the bswap infrastructure. So, this patch just hooks the optimization also into store-merging so that the original testcase from the PR can be fixed. 2021-01-05 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/96239 * gimple-ssa-store-merging.c (maybe_optimize_vector_constructor): New function. (get_status_for_store_merging): Don't return BB_INVALID for blocks with potential bswap optimizable CONSTRUCTORs. (pass_store_merging::execute): Optimize vector CONSTRUCTORs with bswap if possible. * gcc.dg/tree-ssa/pr96239.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/gimple-ssa-store-merging.c100
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/pr96239.c18
2 files changed, 115 insertions, 3 deletions
diff --git a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c
index 4279c60..4577647c 100644
--- a/gcc/gimple-ssa-store-merging.c
+++ b/gcc/gimple-ssa-store-merging.c
@@ -1257,6 +1257,75 @@ bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
return tgt;
}
+/* Try to optimize an assignment CUR_STMT with CONSTRUCTOR on the rhs
+ using bswap optimizations. CDI_DOMINATORS need to be
+ computed on entry. Return true if it has been optimized and
+ TODO_update_ssa is needed. */
+
+static bool
+maybe_optimize_vector_constructor (gimple *cur_stmt)
+{
+ tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
+ struct symbolic_number n;
+ bool bswap;
+
+ gcc_assert (is_gimple_assign (cur_stmt)
+ && gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR);
+
+ tree rhs = gimple_assign_rhs1 (cur_stmt);
+ if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
+ || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
+ || gimple_assign_lhs (cur_stmt) == NULL_TREE)
+ return false;
+
+ HOST_WIDE_INT sz = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
+ switch (sz)
+ {
+ case 16:
+ load_type = bswap_type = uint16_type_node;
+ break;
+ case 32:
+ if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
+ && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
+ {
+ load_type = uint32_type_node;
+ fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
+ bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
+ }
+ else
+ return false;
+ break;
+ case 64:
+ if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
+ && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
+ || (word_mode == SImode
+ && builtin_decl_explicit_p (BUILT_IN_BSWAP32)
+ && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)))
+ {
+ load_type = uint64_type_node;
+ fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
+ bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
+ }
+ else
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
+ if (!ins_stmt || n.range != (unsigned HOST_WIDE_INT) sz)
+ return false;
+
+ if (bswap && !fndecl && n.range != 16)
+ return false;
+
+ memset (&nop_stats, 0, sizeof (nop_stats));
+ memset (&bswap_stats, 0, sizeof (bswap_stats));
+ return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
+ bswap_type, load_type, &n, bswap) != NULL_TREE;
+}
+
/* Find manual byte swap implementations as well as load in a given
endianness. Byte swaps are turned into a bswap builtin invokation
while endian loads are converted to bswap builtin invokation or
@@ -5128,6 +5197,7 @@ static enum basic_block_status
get_status_for_store_merging (basic_block bb)
{
unsigned int num_statements = 0;
+ unsigned int num_constructors = 0;
gimple_stmt_iterator gsi;
edge e;
@@ -5140,9 +5210,27 @@ get_status_for_store_merging (basic_block bb)
if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
break;
+
+ if (is_gimple_assign (stmt)
+ && gimple_assign_rhs_code (stmt) == CONSTRUCTOR)
+ {
+ tree rhs = gimple_assign_rhs1 (stmt);
+ if (VECTOR_TYPE_P (TREE_TYPE (rhs))
+ && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
+ && gimple_assign_lhs (stmt) != NULL_TREE)
+ {
+ HOST_WIDE_INT sz
+ = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
+ if (sz == 16 || sz == 32 || sz == 64)
+ {
+ num_constructors = 1;
+ break;
+ }
+ }
+ }
}
- if (num_statements == 0)
+ if (num_statements == 0 && num_constructors == 0)
return BB_INVALID;
if (cfun->can_throw_non_call_exceptions && cfun->eh
@@ -5151,7 +5239,7 @@ get_status_for_store_merging (basic_block bb)
&& e->dest == bb->next_bb)
return BB_EXTENDED_VALID;
- return num_statements >= 2 ? BB_VALID : BB_INVALID;
+ return (num_statements >= 2 || num_constructors) ? BB_VALID : BB_INVALID;
}
/* Entry point for the pass. Go over each basic block recording chains of
@@ -5191,9 +5279,10 @@ pass_store_merging::execute (function *fun)
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
- for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); )
{
gimple *stmt = gsi_stmt (gsi);
+ gsi_next (&gsi);
if (is_gimple_debug (stmt))
continue;
@@ -5209,6 +5298,11 @@ pass_store_merging::execute (function *fun)
continue;
}
+ if (is_gimple_assign (stmt)
+ && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
+ && maybe_optimize_vector_constructor (stmt))
+ continue;
+
if (store_valid_for_store_merging_p (stmt))
changed |= process_store (stmt);
else
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96239.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96239.c
new file mode 100644
index 0000000..a099fd9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96239.c
@@ -0,0 +1,18 @@
+/* PR tree-optimization/96239 */
+/* { dg-do compile { target { ilp32 || lp64 } } } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump " r>> 8;" "optimized" { target bswap } } } */
+
+union U { unsigned char c[2]; unsigned short s; };
+
+unsigned short
+foo (unsigned short x)
+{
+ union U u;
+ u.s = x;
+ unsigned char v = u.c[0];
+ unsigned char w = u.c[1];
+ u.c[0] = w;
+ u.c[1] = v;
+ return u.s;
+}