diff options
| -rw-r--r-- | gcc/config/aarch64/aarch64.c | 156 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.target/aarch64/stp_1.c | 29 |
2 files changed, 133 insertions, 52 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index a8f53b8..850288d 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -79,6 +79,7 @@ #include "tree-ssa-loop-niter.h" #include "fractional-cost.h" #include "rtlanal.h" +#include "tree-dfa.h" /* This file should be included last. */ #include "target-def.h" @@ -24569,6 +24570,97 @@ aarch64_sched_adjust_priority (rtx_insn *insn, int priority) return priority; } +/* Check if *MEM1 and *MEM2 are consecutive memory references and, + if they are, try to make them use constant offsets from the same base + register. Return true on success. When returning true, set *REVERSED + to true if *MEM1 comes after *MEM2, false if *MEM1 comes before *MEM2. */ +static bool +aarch64_check_consecutive_mems (rtx *mem1, rtx *mem2, bool *reversed) +{ + *reversed = false; + if (GET_RTX_CLASS (GET_CODE (XEXP (*mem1, 0))) == RTX_AUTOINC + || GET_RTX_CLASS (GET_CODE (XEXP (*mem2, 0))) == RTX_AUTOINC) + return false; + + if (!MEM_SIZE_KNOWN_P (*mem1) || !MEM_SIZE_KNOWN_P (*mem2)) + return false; + + auto size1 = MEM_SIZE (*mem1); + auto size2 = MEM_SIZE (*mem2); + + rtx base1, base2, offset1, offset2; + extract_base_offset_in_addr (*mem1, &base1, &offset1); + extract_base_offset_in_addr (*mem2, &base2, &offset2); + + /* Make sure at least one memory is in base+offset form. */ + if (!(base1 && offset1) && !(base2 && offset2)) + return false; + + /* If both mems already use the same base register, just check the + offsets. */ + if (base1 && base2 && rtx_equal_p (base1, base2)) + { + if (!offset1 || !offset2) + return false; + + if (known_eq (UINTVAL (offset1) + size1, UINTVAL (offset2))) + return true; + + if (known_eq (UINTVAL (offset2) + size2, UINTVAL (offset1))) + { + *reversed = true; + return true; + } + + return false; + } + + /* Otherwise, check whether the MEM_EXPRs and MEM_OFFSETs together + guarantee that the values are consecutive. */ + if (MEM_EXPR (*mem1) + && MEM_EXPR (*mem2) + && MEM_OFFSET_KNOWN_P (*mem1) + && MEM_OFFSET_KNOWN_P (*mem2)) + { + poly_int64 expr_offset1; + poly_int64 expr_offset2; + tree expr_base1 = get_addr_base_and_unit_offset (MEM_EXPR (*mem1), + &expr_offset1); + tree expr_base2 = get_addr_base_and_unit_offset (MEM_EXPR (*mem2), + &expr_offset2); + if (!expr_base1 + || !expr_base2 + || !operand_equal_p (expr_base1, expr_base2, OEP_ADDRESS_OF)) + return false; + + expr_offset1 += MEM_OFFSET (*mem1); + expr_offset2 += MEM_OFFSET (*mem2); + + if (known_eq (expr_offset1 + size1, expr_offset2)) + ; + else if (known_eq (expr_offset2 + size2, expr_offset1)) + *reversed = true; + else + return false; + + if (base2) + { + rtx addr1 = plus_constant (Pmode, XEXP (*mem2, 0), + expr_offset1 - expr_offset2); + *mem1 = replace_equiv_address_nv (*mem1, addr1); + } + else + { + rtx addr2 = plus_constant (Pmode, XEXP (*mem1, 0), + expr_offset2 - expr_offset1); + *mem2 = replace_equiv_address_nv (*mem2, addr2); + } + return true; + } + + return false; +} + /* Given OPERANDS of consecutive load/store, check if we can merge them into ldp/stp. LOAD is true if they are load instructions. MODE is the mode of memory operands. */ @@ -24577,9 +24669,8 @@ bool aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, machine_mode mode) { - HOST_WIDE_INT offval_1, offval_2, msize; enum reg_class rclass_1, rclass_2; - rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2; + rtx mem_1, mem_2, reg_1, reg_2; if (load) { @@ -24590,6 +24681,8 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, gcc_assert (REG_P (reg_1) && REG_P (reg_2)); if (REGNO (reg_1) == REGNO (reg_2)) return false; + if (reg_overlap_mentioned_p (reg_1, mem_2)) + return false; } else { @@ -24613,40 +24706,13 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, return false; /* Check if the addresses are in the form of [base+offset]. */ - extract_base_offset_in_addr (mem_1, &base_1, &offset_1); - if (base_1 == NULL_RTX || offset_1 == NULL_RTX) - return false; - extract_base_offset_in_addr (mem_2, &base_2, &offset_2); - if (base_2 == NULL_RTX || offset_2 == NULL_RTX) - return false; - - /* Check if the bases are same. */ - if (!rtx_equal_p (base_1, base_2)) + bool reversed = false; + if (!aarch64_check_consecutive_mems (&mem_1, &mem_2, &reversed)) return false; /* The operands must be of the same size. */ gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)), - GET_MODE_SIZE (GET_MODE (mem_2)))); - - offval_1 = INTVAL (offset_1); - offval_2 = INTVAL (offset_2); - /* We should only be trying this for fixed-sized modes. There is no - SVE LDP/STP instruction. */ - msize = GET_MODE_SIZE (mode).to_constant (); - /* Check if the offsets are consecutive. */ - if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize)) - return false; - - /* Check if the addresses are clobbered by load. */ - if (load) - { - if (reg_mentioned_p (reg_1, mem_1)) - return false; - - /* In increasing order, the last load can clobber the address. */ - if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2)) - return false; - } + GET_MODE_SIZE (GET_MODE (mem_2)))); /* One of the memory accesses must be a mempair operand. If it is not the first one, they need to be swapped by the @@ -24677,27 +24743,13 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, void aarch64_swap_ldrstr_operands (rtx* operands, bool load) { - rtx mem_1, mem_2, base_1, base_2, offset_1, offset_2; - HOST_WIDE_INT offval_1, offval_2; - - if (load) - { - mem_1 = operands[1]; - mem_2 = operands[3]; - } - else - { - mem_1 = operands[0]; - mem_2 = operands[2]; - } - - extract_base_offset_in_addr (mem_1, &base_1, &offset_1); - extract_base_offset_in_addr (mem_2, &base_2, &offset_2); - - offval_1 = INTVAL (offset_1); - offval_2 = INTVAL (offset_2); + int mem_op = load ? 1 : 0; + bool reversed = false; + if (!aarch64_check_consecutive_mems (operands + mem_op, + operands + mem_op + 2, &reversed)) + gcc_unreachable (); - if (offval_1 > offval_2) + if (reversed) { /* Irrespective of whether this is a load or a store, we do the same swap. */ diff --git a/gcc/testsuite/gcc.target/aarch64/stp_1.c b/gcc/testsuite/gcc.target/aarch64/stp_1.c new file mode 100644 index 0000000..c1a5e6b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stp_1.c @@ -0,0 +1,29 @@ +/* { dg-options "-O -fpeephole2" } */ + +int res[2]; + +void +f1 (int x, int y) +{ + res[0] = res[1] = x + y; +} + +void +f2 (int x, int y, int *res) +{ + res[0] = res[1] = x + y; +} + +void +f3 (int x, int y) +{ + res[1] = res[0] = x + y; +} + +void +f4 (int x, int y, int *res) +{ + res[1] = res[0] = x + y; +} + +/* { dg-final { scan-assembler-times {\tstp\t} 4 } } */ |
