diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2016-11-17 14:25:30 +0000 |
---|---|---|
committer | Kyrylo Tkachov <ktkachov@gcc.gnu.org> | 2016-11-17 14:25:30 +0000 |
commit | 141a3ccff1f53ca8a86453fbe4eaa1bc6b64d920 (patch) | |
tree | 70048b62883e572701b22d82dc04e95b4620e50b | |
parent | 54e63f002885fb595f17f39998e93bb9fc23a49d (diff) | |
download | gcc-141a3ccff1f53ca8a86453fbe4eaa1bc6b64d920.zip gcc-141a3ccff1f53ca8a86453fbe4eaa1bc6b64d920.tar.gz gcc-141a3ccff1f53ca8a86453fbe4eaa1bc6b64d920.tar.bz2 |
[AArch64] Expand DImode constant stores to two SImode stores when profitable
* config/aarch64/aarch64.md (mov<mode>): Call
aarch64_split_dimode_const_store on DImode constant stores.
* config/aarch64/aarch64-protos.h (aarch64_split_dimode_const_store):
New prototype.
* config/aarch64/aarch64.c (aarch64_split_dimode_const_store): New
function.
* gcc.target/aarch64/store_repeating_constant_1.c: New test.
* gcc.target/aarch64/store_repeating_constant_2.c: Likewise.
From-SVN: r242551
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 57 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.md | 5 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/store_repeating_constant_1.c | 11 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/store_repeating_constant_2.c | 15 |
7 files changed, 103 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b9b3b8a..f69f911 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2016-11-17 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + + * config/aarch64/aarch64.md (mov<mode>): Call + aarch64_split_dimode_const_store on DImode constant stores. + * config/aarch64/aarch64-protos.h (aarch64_split_dimode_const_store): + New prototype. + * config/aarch64/aarch64.c (aarch64_split_dimode_const_store): New + function. + 2016-11-17 Bill Schmidt <wschmidt@linux.vnet.ibm.com> Richard Biener <rguenther@suse.de> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 4f4989d..b6ca3df 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -337,6 +337,7 @@ bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode); bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool); bool aarch64_simd_valid_immediate (rtx, machine_mode, bool, struct simd_immediate_info *); +bool aarch64_split_dimode_const_store (rtx, rtx); bool aarch64_symbolic_address_p (rtx); bool aarch64_uimm12_shift (HOST_WIDE_INT); bool aarch64_use_return_insn_p (void); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 11d41cf..e5ca5eb 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -13211,6 +13211,63 @@ aarch64_expand_movmem (rtx *operands) return true; } +/* Split a DImode store of a CONST_INT SRC to MEM DST as two + SImode stores. Handle the case when the constant has identical + bottom and top halves. This is beneficial when the two stores can be + merged into an STP and we avoid synthesising potentially expensive + immediates twice. Return true if such a split is possible. */ + +bool +aarch64_split_dimode_const_store (rtx dst, rtx src) +{ + rtx lo = gen_lowpart (SImode, src); + rtx hi = gen_highpart_mode (SImode, DImode, src); + + bool size_p = optimize_function_for_size_p (cfun); + + if (!rtx_equal_p (lo, hi)) + return false; + + unsigned int orig_cost + = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode); + unsigned int lo_cost + = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode); + + /* We want to transform: + MOV x1, 49370 + MOVK x1, 0x140, lsl 16 + MOVK x1, 0xc0da, lsl 32 + MOVK x1, 0x140, lsl 48 + STR x1, [x0] + into: + MOV w1, 49370 + MOVK w1, 0x140, lsl 16 + STP w1, w1, [x0] + So we want to perform this only when we save two instructions + or more. When optimizing for size, however, accept any code size + savings we can. */ + if (size_p && orig_cost <= lo_cost) + return false; + + if (!size_p + && (orig_cost <= lo_cost + 1)) + return false; + + rtx mem_lo = adjust_address (dst, SImode, 0); + if (!aarch64_mem_pair_operand (mem_lo, SImode)) + return false; + + rtx tmp_reg = gen_reg_rtx (SImode); + aarch64_expand_mov_immediate (tmp_reg, lo); + rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode)); + /* Don't emit an explicit store pair as this may not be always profitable. + Let the sched-fusion logic decide whether to merge them. */ + emit_move_insn (mem_lo, tmp_reg); + emit_move_insn (mem_hi, tmp_reg); + + return true; +} + /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ static unsigned HOST_WIDE_INT diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index a652a7c..5089ccf 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1011,6 +1011,11 @@ (match_operand:GPI 1 "general_operand" ""))] "" " + if (MEM_P (operands[0]) && CONST_INT_P (operands[1]) + && <MODE>mode == DImode + && aarch64_split_dimode_const_store (operands[0], operands[1])) + DONE; + if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx) operands[1] = force_reg (<MODE>mode, operands[1]); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index bd76620..77fa771 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2016-11-17 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + + * gcc.target/aarch64/store_repeating_constant_1.c: New test. + * gcc.target/aarch64/store_repeating_constant_2.c: Likewise. + 2016-11-17 Bill Schmidt <wschmidt@linux.vnet.ibm.com> Richard Biener <rguenther@suse.de> diff --git a/gcc/testsuite/gcc.target/aarch64/store_repeating_constant_1.c b/gcc/testsuite/gcc.target/aarch64/store_repeating_constant_1.c new file mode 100644 index 0000000..50d4568 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/store_repeating_constant_1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=generic" } */ + +void +foo (unsigned long long *a) +{ + a[0] = 0x0140c0da0140c0daULL; +} + +/* { dg-final { scan-assembler-times "movk\\tw.*" 1 } } */ +/* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]+.*" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/store_repeating_constant_2.c b/gcc/testsuite/gcc.target/aarch64/store_repeating_constant_2.c new file mode 100644 index 0000000..c421277 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/store_repeating_constant_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-Os" } */ + +/* Check that for -Os we synthesize only the bottom half and then + store it twice with an STP rather than synthesizing it twice in each + half of an X-reg. */ + +void +foo (unsigned long long *a) +{ + a[0] = 0xc0da0000c0daULL; +} + +/* { dg-final { scan-assembler-times "mov\\tw.*" 1 } } */ +/* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]+.*" 1 } } */ |