aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGreta Yorsh <greta.yorsh@arm.com>2013-05-16 13:02:06 +0100
committerGreta Yorsh <gretay@gcc.gnu.org>2013-05-16 13:02:06 +0100
commit798d3d0454c3f8de61f95f9d8ff53da3465f19fa (patch)
tree28b8ace00c5999333e50f2f7c6f89542be4caa3a
parent0baddc4592fcdcb85d1417e7f06c2666eb7912e8 (diff)
downloadgcc-798d3d0454c3f8de61f95f9d8ff53da3465f19fa.zip
gcc-798d3d0454c3f8de61f95f9d8ff53da3465f19fa.tar.gz
gcc-798d3d0454c3f8de61f95f9d8ff53da3465f19fa.tar.bz2
Internal memcpy using LDRD/STRD
2013-05-16 Greta Yorsh <Greta.Yorsh@arm.com> gcc/ * config/arm/arm-protos.h (gen_movmem_ldrd_strd): New declaration. * config/arm/arm.c (next_consecutive_mem): New function. (gen_movmem_ldrd_strd): Likewise. * config/arm/arm.md (movmemqi): Update condition and code. (unaligned_loaddi, unaligned_storedi): New patterns. gcc/testsuite * gcc.target/arm/unaligned-memcpy-2.c: Adjust expected output. * gcc.target/arm/unaligned-memcpy-3.c: Likewise. * gcc.target/arm/unaligned-memcpy-4.c: Likewise. From-SVN: r198970
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/config/arm/arm-protos.h1
-rw-r--r--gcc/config/arm/arm.c128
-rw-r--r--gcc/config/arm/arm.md68
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c5
-rw-r--r--gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c9
-rw-r--r--gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c8
8 files changed, 226 insertions, 7 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 343d3fc..15fe33b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2013-05-16 Greta Yorsh <Greta.Yorsh@arm.com>
+
+ * config/arm/arm-protos.h (gen_movmem_ldrd_strd): New declaration.
+ * config/arm/arm.c (next_consecutive_mem): New function.
+ (gen_movmem_ldrd_strd): Likewise.
+ * config/arm/arm.md (movmemqi): Update condition and code.
+ (unaligned_loaddi, unaligned_storedi): New patterns.
+
2013-05-16 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
* config.gcc: Obsolete *-*-solaris2.9*.
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index a6af927..c791341 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -120,6 +120,7 @@ extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT);
extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool);
extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool);
extern int arm_gen_movmemqi (rtx *);
+extern bool gen_movmem_ldrd_strd (rtx *);
extern enum machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx);
extern enum machine_mode arm_select_dominance_cc_mode (rtx, rtx,
HOST_WIDE_INT);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 036db8a..c4f5c69 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -11855,6 +11855,134 @@ arm_gen_movmemqi (rtx *operands)
return 1;
}
+/* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
+by mode size. */
+inline static rtx
+next_consecutive_mem (rtx mem)
+{
+ enum machine_mode mode = GET_MODE (mem);
+ HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
+ rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
+
+ return adjust_automodify_address (mem, mode, addr, offset);
+}
+
+/* Copy using LDRD/STRD instructions whenever possible.
+ Returns true upon success. */
+bool
+gen_movmem_ldrd_strd (rtx *operands)
+{
+ unsigned HOST_WIDE_INT len;
+ HOST_WIDE_INT align;
+ rtx src, dst, base;
+ rtx reg0;
+ bool src_aligned, dst_aligned;
+ bool src_volatile, dst_volatile;
+
+ gcc_assert (CONST_INT_P (operands[2]));
+ gcc_assert (CONST_INT_P (operands[3]));
+
+ len = UINTVAL (operands[2]);
+ if (len > 64)
+ return false;
+
+ /* Maximum alignment we can assume for both src and dst buffers. */
+ align = INTVAL (operands[3]);
+
+ if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
+ return false;
+
+ /* Place src and dst addresses in registers
+ and update the corresponding mem rtx. */
+ dst = operands[0];
+ dst_volatile = MEM_VOLATILE_P (dst);
+ dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
+ base = copy_to_mode_reg (SImode, XEXP (dst, 0));
+ dst = adjust_automodify_address (dst, VOIDmode, base, 0);
+
+ src = operands[1];
+ src_volatile = MEM_VOLATILE_P (src);
+ src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
+ base = copy_to_mode_reg (SImode, XEXP (src, 0));
+ src = adjust_automodify_address (src, VOIDmode, base, 0);
+
+ if (!unaligned_access && !(src_aligned && dst_aligned))
+ return false;
+
+ if (src_volatile || dst_volatile)
+ return false;
+
+ /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
+ if (!(dst_aligned || src_aligned))
+ return arm_gen_movmemqi (operands);
+
+ src = adjust_address (src, DImode, 0);
+ dst = adjust_address (dst, DImode, 0);
+ while (len >= 8)
+ {
+ len -= 8;
+ reg0 = gen_reg_rtx (DImode);
+ if (src_aligned)
+ emit_move_insn (reg0, src);
+ else
+ emit_insn (gen_unaligned_loaddi (reg0, src));
+
+ if (dst_aligned)
+ emit_move_insn (dst, reg0);
+ else
+ emit_insn (gen_unaligned_storedi (dst, reg0));
+
+ src = next_consecutive_mem (src);
+ dst = next_consecutive_mem (dst);
+ }
+
+ gcc_assert (len < 8);
+ if (len >= 4)
+ {
+ /* More than a word but less than a double-word to copy. Copy a word. */
+ reg0 = gen_reg_rtx (SImode);
+ src = adjust_address (src, SImode, 0);
+ dst = adjust_address (dst, SImode, 0);
+ if (src_aligned)
+ emit_move_insn (reg0, src);
+ else
+ emit_insn (gen_unaligned_loadsi (reg0, src));
+
+ if (dst_aligned)
+ emit_move_insn (dst, reg0);
+ else
+ emit_insn (gen_unaligned_storesi (dst, reg0));
+
+ src = next_consecutive_mem (src);
+ dst = next_consecutive_mem (dst);
+ len -= 4;
+ }
+
+ if (len == 0)
+ return true;
+
+ /* Copy the remaining bytes. */
+ if (len >= 2)
+ {
+ dst = adjust_address (dst, HImode, 0);
+ src = adjust_address (src, HImode, 0);
+ reg0 = gen_reg_rtx (SImode);
+ emit_insn (gen_unaligned_loadhiu (reg0, src));
+ emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
+ src = next_consecutive_mem (src);
+ dst = next_consecutive_mem (dst);
+ if (len == 2)
+ return true;
+ }
+
+ dst = adjust_address (dst, QImode, 0);
+ src = adjust_address (src, QImode, 0);
+ reg0 = gen_reg_rtx (QImode);
+ emit_move_insn (reg0, src);
+ emit_move_insn (dst, reg0);
+ return true;
+}
+
/* Select a dominance comparison mode if possible for a test of the general
form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
COND_OR == DOM_CC_X_AND_Y => (X && Y)
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index d3bc760..4b45c98 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -4421,6 +4421,64 @@
(set_attr "predicable" "yes")
(set_attr "type" "store1")])
+;; Unaligned double-word load and store.
+;; Split after reload into two unaligned single-word accesses.
+;; It prevents lower_subreg from splitting some other aligned
+;; double-word accesses too early. Used for internal memcpy.
+
+(define_insn_and_split "unaligned_loaddi"
+ [(set (match_operand:DI 0 "s_register_operand" "=l,r")
+ (unspec:DI [(match_operand:DI 1 "memory_operand" "o,o")]
+ UNSPEC_UNALIGNED_LOAD))]
+ "unaligned_access && TARGET_32BIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_LOAD))
+ (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_LOAD))]
+ {
+ operands[2] = gen_highpart (SImode, operands[0]);
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[3] = gen_highpart (SImode, operands[1]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+
+ /* If the first destination register overlaps with the base address,
+ swap the order in which the loads are emitted. */
+ if (reg_overlap_mentioned_p (operands[0], operands[1]))
+ {
+ rtx tmp = operands[1];
+ operands[1] = operands[3];
+ operands[3] = tmp;
+ tmp = operands[0];
+ operands[0] = operands[2];
+ operands[2] = tmp;
+ }
+ }
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "4,8")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "load2")])
+
+(define_insn_and_split "unaligned_storedi"
+ [(set (match_operand:DI 0 "memory_operand" "=o,o")
+ (unspec:DI [(match_operand:DI 1 "s_register_operand" "l,r")]
+ UNSPEC_UNALIGNED_STORE))]
+ "unaligned_access && TARGET_32BIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_STORE))
+ (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_STORE))]
+ {
+ operands[2] = gen_highpart (SImode, operands[0]);
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[3] = gen_highpart (SImode, operands[1]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ }
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "4,8")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "store2")])
+
+
(define_insn "*extv_reg"
[(set (match_operand:SI 0 "s_register_operand" "=r")
(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
@@ -7374,10 +7432,18 @@
(match_operand:BLK 1 "general_operand" "")
(match_operand:SI 2 "const_int_operand" "")
(match_operand:SI 3 "const_int_operand" "")]
- "TARGET_EITHER"
+ ""
"
if (TARGET_32BIT)
{
+ if (TARGET_LDRD && current_tune->prefer_ldrd_strd
+ && !optimize_function_for_size_p (cfun))
+ {
+ if (gen_movmem_ldrd_strd (operands))
+ DONE;
+ FAIL;
+ }
+
if (arm_gen_movmemqi (operands))
DONE;
FAIL;
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 7bacbb5..879b9bc 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2013-05-16 Greta Yorsh <Greta.Yorsh@arm.com>
+
+ * gcc.target/arm/unaligned-memcpy-2.c: Adjust expected output.
+ * gcc.target/arm/unaligned-memcpy-3.c: Likewise.
+ * gcc.target/arm/unaligned-memcpy-4.c: Likewise.
+
2013-05-16 Nathan Sidwell <nathan@codesourcery.com>
* gcc.dg/visibility-21.c: New.
diff --git a/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c b/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c
index c7d24c9..f7bc2f4 100644
--- a/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c
+++ b/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c
@@ -14,7 +14,10 @@ void aligned_dest (char *src)
/* Expect a multi-word store for the main part of the copy, but subword
loads/stores for the remainder. */
-/* { dg-final { scan-assembler-times "stmia" 1 } } */
+/* { dg-final { scan-assembler-times "ldmia" 0 } } */
+/* { dg-final { scan-assembler-times "ldrd" 0 } } */
+/* { dg-final { scan-assembler-times "stmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
+/* { dg-final { scan-assembler-times "strd" 1 { target { arm_prefer_ldrd_strd } } } } */
/* { dg-final { scan-assembler-times "ldrh" 1 } } */
/* { dg-final { scan-assembler-times "strh" 1 } } */
/* { dg-final { scan-assembler-times "ldrb" 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c b/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c
index 5f04137..9e2d164 100644
--- a/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c
+++ b/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c
@@ -14,8 +14,11 @@ void aligned_src (char *dest)
/* Expect a multi-word load for the main part of the copy, but subword
loads/stores for the remainder. */
-/* { dg-final { scan-assembler-times "ldmia" 1 } } */
-/* { dg-final { scan-assembler-times "ldrh" 1 } } */
+/* { dg-final { scan-assembler-times "ldmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
+/* { dg-final { scan-assembler-times "ldrd" 1 { target { arm_prefer_ldrd_strd } } } } */
+/* { dg-final { scan-assembler-times "strd" 0 } } */
+/* { dg-final { scan-assembler-times "stm" 0 } } */
+/* { dg-final { scan-assembler-times "ldrh" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
/* { dg-final { scan-assembler-times "strh" 1 } } */
-/* { dg-final { scan-assembler-times "ldrb" 1 } } */
+/* { dg-final { scan-assembler-times "ldrb" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
/* { dg-final { scan-assembler-times "strb" 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c b/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c
index 9995708..4708c51 100644
--- a/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c
+++ b/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c
@@ -14,5 +14,9 @@ void aligned_both (void)
/* We know both src and dest to be aligned: expect multiword loads/stores. */
-/* { dg-final { scan-assembler-times "ldmia" 1 } } */
-/* { dg-final { scan-assembler-times "stmia" 1 } } */
+/* { dg-final { scan-assembler-times "ldmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
+/* { dg-final { scan-assembler-times "stmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
+/* { dg-final { scan-assembler "ldrd" { target { arm_prefer_ldrd_strd } } } } */
+/* { dg-final { scan-assembler-times "ldm" 0 { target { arm_prefer_ldrd_strd } } } } */
+/* { dg-final { scan-assembler "strd" { target { arm_prefer_ldrd_strd } } } } */
+/* { dg-final { scan-assembler-times "stm" 0 { target { arm_prefer_ldrd_strd } } } } */