diff options
author | Jakub Jelinek <jakub@redhat.com> | 2023-12-20 11:32:52 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@redhat.com> | 2023-12-20 11:32:52 +0100 |
commit | be9e8de628471399ee5abb1e6ba7738139256b67 (patch) | |
tree | c4ae48adbc1ac47cef07f73dc3f94f1457af95c7 | |
parent | e7dd72aefed851d11655aa301d6e394ec9805e0d (diff) | |
download | gcc-be9e8de628471399ee5abb1e6ba7738139256b67.zip gcc-be9e8de628471399ee5abb1e6ba7738139256b67.tar.gz gcc-be9e8de628471399ee5abb1e6ba7738139256b67.tar.bz2 |
lower-bitint: Fix up handling of nested casts in mergeable stmt handling [PR112941]
The following patch fixes 2 issues in handling of casts for mergeable
stmts.
The first hunk fixes the case when we have two nested casts (typically
after optimization that is zero-extension of a sign-extension because
everything else should have been folded into a single cast). If
the lowering of the outer cast needs to make the code conditional
(e.g.
for (...)
{
if (idx <= 32)
{
if (idx < 32)
{ ... handle_operand (idx); ... }
else
{ ... handle_operand (32); ... }
}
...
}
) and the lowering of the inner one as well, right now it creates invalid
SSA form, because even for the inner cast we need a PHI on the loop
and the PHI argument from the latch edge is a SSA_NAME initialized in
the conditionally executed bb. The hunk fixes that by detecting such
a case and adding further PHI nodes at the end of the ifs such that
the right value propagates to the next loop iteration. We can use
0 arguments for the other edges because the inner operand handling
is only done for the first set of iterations and then the other ifs take
over.
The rest fixes a case of again invalid SSA form, when for a sign extension
we need to use the 0 or -1 value initialized by earlier iteration in
a constant idx case, the code was using the value of the loop PHI argument
from latch edge rather than result; that is correct for cases expanded
in straight line code after the loop, but not inside of the loop for the
cases of handle_cast conditionals, there we should use PHI result. This
is done in the second hunk and supported by the remaining hunks, where
it clears m_bb to tell the code we aren't in the loop anymore.
Note, this patch doesn't deal with similar problems during multiplication,
division, floating casts etc. where we just emit a library call. I'll
need to make sure in that case we don't merge more than one cast per
operand.
2023-12-20 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/112941
* gimple-lower-bitint.cc (bitint_large_huge::handle_cast): If
save_cast_conditional, instead of adding assignment of t4 to
m_data[save_data_cnt + 1] before m_gsi, add phi nodes such that
t4 propagates to m_bb loop. For constant idx, use
m_data[save_data_cnt] rather than m_data[save_data_cnt + 1] if inside
of the m_bb loop.
(bitint_large_huge::lower_mergeable_stmt): Clear m_bb when no longer
expanding inside of that loop.
(bitint_large_huge::lower_comparison_stmt): Likewise.
(bitint_large_huge::lower_addsub_overflow): Likewise.
(bitint_large_huge::lower_mul_overflow): Likewise.
(bitint_large_huge::lower_bit_query): Likewise.
* gcc.dg/bitint-55.c: New test.
-rw-r--r-- | gcc/gimple-lower-bitint.cc | 34 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/bitint-55.c | 129 |
2 files changed, 161 insertions, 2 deletions
diff --git a/gcc/gimple-lower-bitint.cc b/gcc/gimple-lower-bitint.cc index 696bba4..8b392b1 100644 --- a/gcc/gimple-lower-bitint.cc +++ b/gcc/gimple-lower-bitint.cc @@ -1401,8 +1401,29 @@ bitint_large_huge::handle_cast (tree lhs_type, tree rhs1, tree idx) add_phi_arg (phi, m_data[save_data_cnt], edge_false, UNKNOWN_LOCATION); add_phi_arg (phi, ext, edge_true_true, UNKNOWN_LOCATION); - g = gimple_build_assign (m_data[save_data_cnt + 1], t4); - insert_before (g); + if (!save_cast_conditional) + { + g = gimple_build_assign (m_data[save_data_cnt + 1], t4); + insert_before (g); + } + else + for (basic_block bb = gsi_bb (m_gsi);;) + { + edge e1 = single_succ_edge (bb); + edge e2 = find_edge (e1->dest, m_bb), e3; + tree t5 = (e2 ? m_data[save_data_cnt + 1] + : make_ssa_name (m_limb_type)); + phi = create_phi_node (t5, e1->dest); + edge_iterator ei; + FOR_EACH_EDGE (e3, ei, e1->dest->preds) + add_phi_arg (phi, (e3 == e1 ? t4 + : build_zero_cst (m_limb_type)), + e3, UNKNOWN_LOCATION); + if (e2) + break; + t4 = t5; + bb = e1->dest; + } } if (m_bitfld_load) { @@ -1470,6 +1491,8 @@ bitint_large_huge::handle_cast (tree lhs_type, tree rhs1, tree idx) m_data_cnt = tree_to_uhwi (m_data[save_data_cnt + 2]); if (TYPE_UNSIGNED (rhs_type)) t = build_zero_cst (m_limb_type); + else if (m_bb) + t = m_data[save_data_cnt]; else t = m_data[save_data_cnt + 1]; } @@ -2586,6 +2609,7 @@ bitint_large_huge::lower_mergeable_stmt (gimple *stmt, tree_code &cmp_code, m_gsi = gsi_after_labels (edge_bb); else m_gsi = gsi_for_stmt (stmt); + m_bb = NULL; } } } @@ -2712,6 +2736,7 @@ bitint_large_huge::lower_mergeable_stmt (gimple *stmt, tree_code &cmp_code, NULL_TREE, NULL_TREE); insert_before (g); m_gsi = gsi_for_stmt (stmt); + m_bb = NULL; } } } @@ -2890,6 +2915,7 @@ bitint_large_huge::lower_comparison_stmt (gimple *stmt, tree_code &cmp_code, extract_true_false_edges_from_block (gsi_bb (m_gsi), &true_edge, &false_edge); m_gsi = gsi_after_labels (false_edge->dest); + m_bb = NULL; } } @@ -4208,6 +4234,7 @@ bitint_large_huge::lower_addsub_overflow (tree obj, gimple *stmt) NULL_TREE, NULL_TREE); insert_before (g); m_gsi = gsi_for_stmt (final_stmt); + m_bb = NULL; } } } @@ -4405,6 +4432,7 @@ bitint_large_huge::lower_mul_overflow (tree obj, gimple *stmt) &true_edge, &false_edge); m_gsi = gsi_after_labels (false_edge->dest); + m_bb = NULL; } } @@ -4744,6 +4772,7 @@ bitint_large_huge::lower_bit_query (gimple *stmt) m_gsi = gsi_after_labels (edge_bb); else m_gsi = gsi_for_stmt (stmt); + m_bb = NULL; } } } @@ -4905,6 +4934,7 @@ bitint_large_huge::lower_bit_query (gimple *stmt) extract_true_false_edges_from_block (gsi_bb (m_gsi), &true_edge, &false_edge); m_gsi = gsi_after_labels (false_edge->dest); + m_bb = NULL; } } } diff --git a/gcc/testsuite/gcc.dg/bitint-55.c b/gcc/testsuite/gcc.dg/bitint-55.c new file mode 100644 index 0000000..29ddf50 --- /dev/null +++ b/gcc/testsuite/gcc.dg/bitint-55.c @@ -0,0 +1,129 @@ +/* PR tree-optimization/112941 */ +/* { dg-do compile { target bitint } } */ +/* { dg-options "-std=c23 -O2" } */ + +#if __BITINT_MAXWIDTH__ >= 4096 +void +f1 (_BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) +{ + p[0] += (unsigned _BitInt(2048)) r; + p[1] += (unsigned _BitInt(2048)) s; + p[2] += (unsigned _BitInt(2048)) t; + p[3] += (unsigned _BitInt(2048)) u; +} + +void +f2 (_BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) +{ + p[0] -= (unsigned _BitInt(2048)) r; + p[1] -= (unsigned _BitInt(2048)) s; + p[2] -= (unsigned _BitInt(2048)) t; + p[3] -= (unsigned _BitInt(2048)) u; +} + +void +f3 (_BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) +{ + p[0] += (unsigned _BitInt(2110)) r; + p[1] += (unsigned _BitInt(2110)) s; + p[2] += (unsigned _BitInt(2110)) t; + p[3] += (unsigned _BitInt(2110)) u; +} + +void +f4 (_BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) +{ + p[0] -= (unsigned _BitInt(2110)) r; + p[1] -= (unsigned _BitInt(2110)) s; + p[2] -= (unsigned _BitInt(2110)) t; + p[3] -= (unsigned _BitInt(2110)) u; +} + +void +f5 (unsigned _BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) +{ + p[0] += (unsigned _BitInt(2048)) r; + p[1] += (unsigned _BitInt(2048)) s; + p[2] += (unsigned _BitInt(2048)) t; + p[3] += (unsigned _BitInt(2048)) u; +} + +void +f6 (unsigned _BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) +{ + p[0] -= (unsigned _BitInt(2048)) r; + p[1] -= (unsigned _BitInt(2048)) s; + p[2] -= (unsigned _BitInt(2048)) t; + p[3] -= (unsigned _BitInt(2048)) u; +} + +void +f7 (unsigned _BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) +{ + p[0] += (unsigned _BitInt(2110)) r; + p[1] += (unsigned _BitInt(2110)) s; + p[2] += (unsigned _BitInt(2110)) t; + p[3] += (unsigned _BitInt(2110)) u; +} + +void +f8 (unsigned _BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) +{ + p[0] -= (unsigned _BitInt(2110)) r; + p[1] -= (unsigned _BitInt(2110)) s; + p[2] -= (unsigned _BitInt(2110)) t; + p[3] -= (unsigned _BitInt(2110)) u; +} + +#if __SIZEOF_INT128__ +void +f9 (_BitInt(4096) *p, __int128 r) +{ + p[0] += (unsigned _BitInt(2048)) r; +} + +void +f10 (_BitInt(4094) *p, __int128 r) +{ + p[0] -= (unsigned _BitInt(2048)) r; +} + +void +f11 (_BitInt(4096) *p, __int128 r) +{ + p[0] += (unsigned _BitInt(2110)) r; +} + +void +f12 (_BitInt(4094) *p, __int128 r) +{ + p[0] -= (unsigned _BitInt(2110)) r; +} + +void +f13 (unsigned _BitInt(4096) *p, __int128 r) +{ + p[0] += (unsigned _BitInt(2048)) r; +} + +void +f14 (unsigned _BitInt(4094) *p, __int128 r) +{ + p[0] -= (unsigned _BitInt(2048)) r; +} + +void +f15 (unsigned _BitInt(4096) *p, __int128 r) +{ + p[0] += (unsigned _BitInt(2110)) r; +} + +void +f16 (unsigned _BitInt(4094) *p, __int128 r) +{ + p[0] -= (unsigned _BitInt(2110)) r; +} +#endif +#else +int i; +#endif |