diff options
author | Tamar Christina <tamar.christina@arm.com> | 2022-07-08 07:37:20 +0100 |
---|---|---|
committer | Tamar Christina <tamar.christina@arm.com> | 2022-07-08 07:39:33 +0100 |
commit | 13f44099bcc64ddb50a6dbd462bf79b258dfd02c (patch) | |
tree | 7ea840cf4915eabc7761e0c1bc3989a9c3859bba /gcc/internal-fn.cc | |
parent | bf3695691f4fc964a3b1c8274a6949d844e3edff (diff) | |
download | gcc-13f44099bcc64ddb50a6dbd462bf79b258dfd02c.zip gcc-13f44099bcc64ddb50a6dbd462bf79b258dfd02c.tar.gz gcc-13f44099bcc64ddb50a6dbd462bf79b258dfd02c.tar.bz2 |
middle-end: Use subregs to expand COMPLEX_EXPR to set the lowpart.
When lowering COMPLEX_EXPR we currently emit two VEC_EXTRACTs. One for the
lowpart and one for the highpart.
The problem with this is that in RTL the lvalue of the RTX is the only thing
tying the two instructions together.
This means that e.g. combine is unable to try to combine the two instructions
for setting the lowpart and highpart.
For ISAs that have bit extract instructions we can eliminate one of the extracts
if, and only if we're setting the entire complex number.
This change changes the expand code when we're setting the entire complex number
to generate a subreg for the lowpart instead of a vec_extract.
This allows us to optimize sequences such as:
_Complex int f(int a, int b) {
_Complex int t = a + b * 1i;
return t;
}
from:
f:
bfi x2, x0, 0, 32
bfi x2, x1, 32, 32
mov x0, x2
ret
into:
f:
bfi x0, x1, 32, 32
ret
I have also confirmed the codegen for x86_64 did not change.
gcc/ChangeLog:
* expmed.cc (store_bit_field_1): Add parameter that indicates if value is
still undefined and if so emit a subreg move instead.
(store_integral_bit_field): Likewise.
(store_bit_field): Likewise.
* expr.h (write_complex_part): Likewise.
* expmed.h (store_bit_field): Add new parameter.
* builtins.cc (expand_ifn_atomic_compare_exchange_into_call): Use new
parameter.
(expand_ifn_atomic_compare_exchange): Likewise.
* calls.cc (store_unaligned_arguments_into_pseudos): Likewise.
* emit-rtl.cc (validate_subreg): Likewise.
* expr.cc (emit_group_store): Likewise.
(copy_blkmode_from_reg): Likewise.
(copy_blkmode_to_reg): Likewise.
(clear_storage_hints): Likewise.
(write_complex_part): Likewise.
(emit_move_complex_parts): Likewise.
(expand_assignment): Likewise.
(store_expr): Likewise.
(store_field): Likewise.
(expand_expr_real_2): Likewise.
* ifcvt.cc (noce_emit_move_insn): Likewise.
* internal-fn.cc (expand_arith_set_overflow): Likewise.
(expand_arith_overflow_result_store): Likewise.
(expand_addsub_overflow): Likewise.
(expand_neg_overflow): Likewise.
(expand_mul_overflow): Likewise.
(expand_arith_overflow): Likewise.
gcc/testsuite/ChangeLog:
* g++.target/aarch64/complex-init.C: New test.
Diffstat (limited to 'gcc/internal-fn.cc')
-rw-r--r-- | gcc/internal-fn.cc | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 91588f8..d666ccc 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -815,9 +815,9 @@ expand_arith_set_overflow (tree lhs, rtx target) { if (TYPE_PRECISION (TREE_TYPE (TREE_TYPE (lhs))) == 1 && !TYPE_UNSIGNED (TREE_TYPE (TREE_TYPE (lhs)))) - write_complex_part (target, constm1_rtx, true); + write_complex_part (target, constm1_rtx, true, false); else - write_complex_part (target, const1_rtx, true); + write_complex_part (target, const1_rtx, true, false); } /* Helper for expand_*_overflow. Store RES into the __real__ part @@ -872,7 +872,7 @@ expand_arith_overflow_result_store (tree lhs, rtx target, expand_arith_set_overflow (lhs, target); emit_label (done_label); } - write_complex_part (target, lres, false); + write_complex_part (target, lres, false, false); } /* Helper for expand_*_overflow. Store RES into TARGET. */ @@ -917,7 +917,7 @@ expand_addsub_overflow (location_t loc, tree_code code, tree lhs, { target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); if (!is_ubsan) - write_complex_part (target, const0_rtx, true); + write_complex_part (target, const0_rtx, true, false); } /* We assume both operands and result have the same precision @@ -1362,7 +1362,7 @@ expand_neg_overflow (location_t loc, tree lhs, tree arg1, bool is_ubsan, { target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); if (!is_ubsan) - write_complex_part (target, const0_rtx, true); + write_complex_part (target, const0_rtx, true, false); } enum insn_code icode = optab_handler (negv3_optab, mode); @@ -1487,7 +1487,7 @@ expand_mul_overflow (location_t loc, tree lhs, tree arg0, tree arg1, { target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); if (!is_ubsan) - write_complex_part (target, const0_rtx, true); + write_complex_part (target, const0_rtx, true, false); } if (is_ubsan) @@ -2304,7 +2304,7 @@ expand_mul_overflow (location_t loc, tree lhs, tree arg0, tree arg1, do_compare_rtx_and_jump (op1, res, NE, true, mode, NULL_RTX, NULL, all_done_label, profile_probability::very_unlikely ()); emit_label (set_noovf); - write_complex_part (target, const0_rtx, true); + write_complex_part (target, const0_rtx, true, false); emit_label (all_done_label); } @@ -2573,7 +2573,7 @@ expand_arith_overflow (enum tree_code code, gimple *stmt) { /* The infinity precision result will always fit into result. */ rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); - write_complex_part (target, const0_rtx, true); + write_complex_part (target, const0_rtx, true, false); scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type); struct separate_ops ops; ops.code = code; |