diff options
author | Wilco Dijkstra <wilco.dijkstra@arm.com> | 2023-09-13 13:21:50 +0100 |
---|---|---|
committer | Wilco Dijkstra <wilco.dijkstra@arm.com> | 2023-09-18 13:27:44 +0100 |
commit | fc7070025d1a6668ff6cb4391f84771a7662def7 (patch) | |
tree | 66a40b6263d5ca9ecc7fc04a9e032f1085bad85d /gcc/config/aarch64/aarch64.cc | |
parent | 64d5bc35c8c2a66ac133a3e6ace820b0ad8a63fb (diff) | |
download | gcc-fc7070025d1a6668ff6cb4391f84771a7662def7.zip gcc-fc7070025d1a6668ff6cb4391f84771a7662def7.tar.gz gcc-fc7070025d1a6668ff6cb4391f84771a7662def7.tar.bz2 |
AArch64: Improve immediate expansion [PR105928]
Support immediate expansion of immediates which can be created from 2 MOVKs
and a shifted ORR or BIC instruction. Change aarch64_split_dimode_const_store
to apply if we save one instruction.
This reduces the number of 4-instruction immediates in SPECINT/FP by 5%.
gcc/ChangeLog:
PR target/105928
* config/aarch64/aarch64.cc (aarch64_internal_mov_immediate)
Add support for immediates using shifted ORR/BIC.
(aarch64_split_dimode_const_store): Apply if we save one instruction.
* config/aarch64/aarch64.md (<LOGICAL:optab>_<SHIFT:optab><mode>3):
Make pattern global.
gcc/testsuite:
PR target/105928
* gcc.target/aarch64/pr105928.c: Add new test.
* gcc.target/aarch64/vect-cse-codegen.c: Fix test.
Diffstat (limited to 'gcc/config/aarch64/aarch64.cc')
-rw-r--r-- | gcc/config/aarch64/aarch64.cc | 43 |
1 files changed, 32 insertions, 11 deletions
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 7bb1161..219c4ee 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -5640,7 +5640,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, machine_mode mode) { int i; - unsigned HOST_WIDE_INT val, val2, mask; + unsigned HOST_WIDE_INT val, val2, val3, mask; int one_match, zero_match; int num_insns; @@ -5722,6 +5722,35 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, } return 3; } + + /* Try shifting and inserting the bottom 32-bits into the top bits. */ + val2 = val & 0xffffffff; + val3 = 0xffffffff; + val3 = val2 | (val3 << 32); + for (i = 17; i < 48; i++) + if ((val2 | (val2 << i)) == val) + { + if (generate) + { + emit_insn (gen_rtx_SET (dest, GEN_INT (val2 & 0xffff))); + emit_insn (gen_insv_immdi (dest, GEN_INT (16), + GEN_INT (val2 >> 16))); + emit_insn (gen_ior_ashldi3 (dest, dest, GEN_INT (i), dest)); + } + return 3; + } + else if ((val3 & ~(val3 << i)) == val) + { + if (generate) + { + emit_insn (gen_rtx_SET (dest, GEN_INT (val3 | 0xffff0000))); + emit_insn (gen_insv_immdi (dest, GEN_INT (16), + GEN_INT (val2 >> 16))); + emit_insn (gen_and_one_cmpl_ashldi3 (dest, dest, GEN_INT (i), + dest)); + } + return 3; + } } /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which @@ -25540,8 +25569,6 @@ aarch64_split_dimode_const_store (rtx dst, rtx src) rtx lo = gen_lowpart (SImode, src); rtx hi = gen_highpart_mode (SImode, DImode, src); - bool size_p = optimize_function_for_size_p (cfun); - if (!rtx_equal_p (lo, hi)) return false; @@ -25560,14 +25587,8 @@ aarch64_split_dimode_const_store (rtx dst, rtx src) MOV w1, 49370 MOVK w1, 0x140, lsl 16 STP w1, w1, [x0] - So we want to perform this only when we save two instructions - or more. When optimizing for size, however, accept any code size - savings we can. */ - if (size_p && orig_cost <= lo_cost) - return false; - - if (!size_p - && (orig_cost <= lo_cost + 1)) + So we want to perform this when we save at least one instruction. */ + if (orig_cost <= lo_cost) return false; rtx mem_lo = adjust_address (dst, SImode, 0); |