aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorNick Clifton <nickc@cygnus.com>1998-09-18 10:27:46 +0000
committerNick Clifton <nickc@gcc.gnu.org>1998-09-18 10:27:46 +0000
commitd2a73f8ee2ddd2137fc4478936f541ae43615e69 (patch)
tree2ba0be0551eaeb5bcbb8b3e569bb2b94b6312275 /gcc
parentfeaefdd522094b8e2b883ca5bbdd2118dc36d756 (diff)
downloadgcc-d2a73f8ee2ddd2137fc4478936f541ae43615e69.zip
gcc-d2a73f8ee2ddd2137fc4478936f541ae43615e69.tar.gz
gcc-d2a73f8ee2ddd2137fc4478936f541ae43615e69.tar.bz2
Fix for PR1654 - implement "movstrsi" pattern to copy simple blocks of memory.
From-SVN: r22467
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog19
-rw-r--r--gcc/config/m32r/m32r.c265
-rw-r--r--gcc/config/m32r/m32r.h5
-rw-r--r--gcc/config/m32r/m32r.md36
4 files changed, 324 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 82f31f5..9e844b0 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,22 @@
+Fri Sep 18 09:44:55 1998 Nick Clifton <nickc@cygnus.com>
+
+ * config/m32r/m32r.h (m32r_block_immediate_operand): Add to
+ PREDICATE_CODES.
+
+ * config/m32r/m32r.md: Add "movstrsi" and "movstrsi_internal"
+ patterns.
+
+ * config/m32r/m32r.c (m32r_print_operand): Add 's' and 'p'
+ operators.
+ (block_move_call): New function: Call a library routine to copy a
+ block of memory.
+ (m32r_expand_block_move): New function: Expand a "movstrsi"
+ pattern into a sequence of insns.
+ (m32r_output_block_move): New function: Expand a
+ "movstrsi_internal" pattern into a sequence of assembler opcodes.
+ (m32r_block_immediate_operand): New function: Return true if the
+ RTL is an integer constant, less than or equal to MAX_MOVE_BYTES.
+
Thu Sep 17 16:42:16 EDT 1998 Andrew MacLeod <amacleod@cygnus.com>
* except.c (start_catch_handler): Issue 'fatal' instead of 'error' and
diff --git a/gcc/config/m32r/m32r.c b/gcc/config/m32r/m32r.c
index f88f35b..41f7ce3 100644
--- a/gcc/config/m32r/m32r.c
+++ b/gcc/config/m32r/m32r.c
@@ -1783,6 +1783,22 @@ m32r_print_operand (file, x, code)
switch (code)
{
+ /* The 's' and 'p' codes are used by output_block_move() to
+ indicate post-increment 's'tores and 'p're-increment loads. */
+ case 's':
+ if (GET_CODE (x) == REG)
+ fprintf (file, "@+%s", reg_names [REGNO (x)]);
+ else
+ output_operand_lossage ("invalid operand to %s code");
+ return;
+
+ case 'p':
+ if (GET_CODE (x) == REG)
+ fprintf (file, "@%s+", reg_names [REGNO (x)]);
+ else
+ output_operand_lossage ("invalid operand to %p code");
+ return;
+
case 'R' :
/* Write second word of DImode or DFmode reference,
register or memory. */
@@ -1822,7 +1838,7 @@ m32r_print_operand (file, x, code)
rtx first, second;
split_double (x, &first, &second);
- fprintf (file, "0x%08lx",
+ fprintf (file, "0x%08x",
code == 'L' ? INTVAL (first) : INTVAL (second));
}
else
@@ -2209,3 +2225,250 @@ emit_cond_move (operands, insn)
return buffer;
}
+
+
+/* Use a library function to move some bytes. */
+static void
+block_move_call (dest_reg, src_reg, bytes_rtx)
+ rtx dest_reg;
+ rtx src_reg;
+ rtx bytes_rtx;
+{
+ /* We want to pass the size as Pmode, which will normally be SImode
+ but will be DImode if we are using 64 bit longs and pointers. */
+ if (GET_MODE (bytes_rtx) != VOIDmode
+ && GET_MODE (bytes_rtx) != Pmode)
+ bytes_rtx = convert_to_mode (Pmode, bytes_rtx, 1);
+
+#ifdef TARGET_MEM_FUNCTIONS
+ emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "memcpy"), 0,
+ VOIDmode, 3, dest_reg, Pmode, src_reg, Pmode,
+ convert_to_mode (TYPE_MODE (sizetype), bytes_rtx,
+ TREE_UNSIGNED (sizetype)),
+ TYPE_MODE (sizetype));
+#else
+ emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "bcopy"), 0,
+ VOIDmode, 3, src_reg, Pmode, dest_reg, Pmode,
+ convert_to_mode (TYPE_MODE (integer_type_node), bytes_rtx,
+ TREE_UNSIGNED (integer_type_node)),
+ TYPE_MODE (integer_type_node));
+#endif
+}
+
+/* The maximum number of bytes to copy using pairs of load/store instructions.
+ If a block is larger than this then a loop will be generated to copy
+ MAX_MOVE_BYTES chunks at a time. The value of 32 is a semi-arbitary choice.
+ A customer uses Dhrystome as their benchmark, and Dhrystone has a 31 byte
+ string copy in it. */
+#define MAX_MOVE_BYTES 32
+
+/* Expand string/block move operations.
+
+ operands[0] is the pointer to the destination.
+ operands[1] is the pointer to the source.
+ operands[2] is the number of bytes to move.
+ operands[3] is the alignment. */
+
+void
+m32r_expand_block_move (operands)
+ rtx operands[];
+{
+ rtx orig_dst = operands[0];
+ rtx orig_src = operands[1];
+ rtx bytes_rtx = operands[2];
+ rtx align_rtx = operands[3];
+ int constp = GET_CODE (bytes_rtx) == CONST_INT;
+ HOST_WIDE_INT bytes = constp ? INTVAL (bytes_rtx) : 0;
+ int align = INTVAL (align_rtx);
+ int leftover;
+ rtx src_reg;
+ rtx dst_reg;
+
+ if (constp && bytes <= 0)
+ return;
+
+ /* Move the address into scratch registers. */
+ dst_reg = copy_addr_to_reg (XEXP (orig_dst, 0));
+ src_reg = copy_addr_to_reg (XEXP (orig_src, 0));
+
+ if (align > UNITS_PER_WORD)
+ align = UNITS_PER_WORD;
+
+ /* If we prefer size over speed, always use a function call.
+ If we do not know the size, use a function call.
+ If the blocks are not word aligned, use a function call. */
+ if (optimize_size || ! constp || align != UNITS_PER_WORD)
+ {
+ block_move_call (dst_reg, src_reg, bytes_rtx);
+ return;
+ }
+
+ leftover = bytes % MAX_MOVE_BYTES;
+ bytes -= leftover;
+
+ /* If necessary, generate a loop to handle the bulk of the copy. */
+ if (bytes)
+ {
+ rtx label;
+ rtx final_src;
+
+ bytes_rtx = GEN_INT (MAX_MOVE_BYTES);
+
+ /* If we are going to have to perform this loop more than
+ once, then generate a label and compute the address the
+ source register will contain upon completion of the final
+ itteration. */
+ if (bytes > MAX_MOVE_BYTES)
+ {
+ final_src = gen_reg_rtx (Pmode);
+
+ if (INT16_P(bytes))
+ emit_insn (gen_addsi3 (final_src, src_reg, bytes_rtx));
+ else
+ {
+ emit_insn (gen_movsi (final_src, bytes_rtx));
+ emit_insn (gen_addsi3 (final_src, final_src, src_reg));
+ }
+
+ label = gen_label_rtx ();
+ emit_label (label);
+ }
+
+ /* It is known that output_block_move() will update src_reg to point
+ to the word after the end of the source block, and dst_reg to point
+ to the last word of the destination block, provided that the block
+ is MAX_MOVE_BYTES long. */
+ emit_insn (gen_movstrsi_internal (dst_reg, src_reg, bytes_rtx));
+ emit_insn (gen_addsi3 (dst_reg, dst_reg, GEN_INT (4)));
+
+ if (bytes > MAX_MOVE_BYTES)
+ {
+ emit_insn (gen_cmpsi (src_reg, final_src));
+ emit_jump_insn (gen_bne (label));
+ }
+ }
+
+ if (leftover)
+ emit_insn (gen_movstrsi_internal (dst_reg, src_reg, GEN_INT (leftover)));
+}
+
+
+/* Emit load/stores for a small constant word aligned block_move.
+
+ operands[0] is the memory address of the destination.
+ operands[1] is the memory address of the source.
+ operands[2] is the number of bytes to move.
+ operands[3] is a temp register.
+ operands[4] is a temp register. */
+
+char *
+m32r_output_block_move (insn, operands)
+ rtx insn;
+ rtx operands[];
+{
+ HOST_WIDE_INT bytes = INTVAL (operands[2]);
+ int first_time;
+ int got_extra = 0;
+
+ if (bytes < 1 || bytes > MAX_MOVE_BYTES)
+ abort ();
+
+ /* We do not have a post-increment store available, so the first set of
+ stores are done without any increment, then the remaining ones can use
+ the pre-increment addressing mode.
+
+ Note: expand_block_move() also relies upon this behaviour when building
+ loops to copy large blocks. */
+ first_time = 1;
+
+ while (bytes > 0)
+ {
+ if (bytes >= 8)
+ {
+ if (first_time)
+ {
+ output_asm_insn ("ld\t%3, %p1", operands);
+ output_asm_insn ("ld\t%4, %p1", operands);
+ output_asm_insn ("st\t%3, @%0", operands);
+ output_asm_insn ("st\t%4, %s0", operands);
+ }
+ else
+ {
+ output_asm_insn ("ld\t%3, %p1", operands);
+ output_asm_insn ("ld\t%4, %p1", operands);
+ output_asm_insn ("st\t%3, %s0", operands);
+ output_asm_insn ("st\t%4, %s0", operands);
+ }
+
+ bytes -= 8;
+ }
+ else if (bytes >= 4)
+ {
+ if (bytes > 4)
+ got_extra = 1;
+
+ output_asm_insn ("ld\t%3, %p1", operands);
+
+ if (got_extra)
+ output_asm_insn ("ld\t%4, %p1", operands);
+
+ if (first_time)
+ output_asm_insn ("st\t%3, @%0", operands);
+ else
+ output_asm_insn ("st\t%3, %s0", operands);
+
+ bytes -= 4;
+ }
+ else
+ {
+ /* Get the entire next word, even though we do not want all of it.
+ The saves us from doing several smaller loads, and we assume that
+ we cannot cause a page fault when at least part of the word is in
+ valid memory. If got_extra is true then we have already loaded
+ the next word as part of loading and storing the previous word. */
+ if (! got_extra)
+ output_asm_insn ("ld\t%4, @%1", operands);
+
+ if (bytes >= 2)
+ {
+ bytes -= 2;
+
+ output_asm_insn ("sth\t%4, @%0", operands);
+
+ /* If there is a byte left to store then increment the
+ destination address and shift the contents of the source
+ register down by 16 bits. We could not do the address
+ increment in the store half word instruction, because it does
+ not have an auto increment mode. */
+ if (bytes > 0) /* assert (bytes == 1) */
+ {
+ output_asm_insn ("srai\t%4, #16", operands);
+ output_asm_insn ("addi\t%0, #2", operands);
+ }
+ }
+
+ output_asm_insn ("stb\t%4, @%0", operands);
+
+ bytes = 0;
+ }
+
+ first_time = 0;
+ }
+
+ return "";
+}
+
+/* Return true if op is an integer constant, less than or equal to
+ MAX_MOVE_BYTES. */
+int
+m32r_block_immediate_operand (op, mode)
+ rtx op;
+ int mode;
+{
+ if (GET_CODE (op) != CONST_INT
+ || INTVAL (op) > MAX_MOVE_BYTES
+ || INTVAL (op) <= 0)
+ return 0;
+
+ return 1;
+}
diff --git a/gcc/config/m32r/m32r.h b/gcc/config/m32r/m32r.h
index 3435a17..382f66e 100644
--- a/gcc/config/m32r/m32r.h
+++ b/gcc/config/m32r/m32r.h
@@ -1987,6 +1987,7 @@ enum m32r_function_type
{ "cmp_int16_operand", { CONST_INT }}, \
{ "call_address_operand", { SYMBOL_REF, LABEL_REF, CONST }}, \
{ "small_insn_p", { INSN, CALL_INSN, JUMP_INSN }}, \
+{ "m32r_block_immediate_operand",{ CONST_INT }}, \
{ "large_insn_p", { INSN, CALL_INSN, JUMP_INSN }},
/* Functions declared in m32r.c */
@@ -2078,3 +2079,7 @@ extern char *emit_cond_move PROTO((Rtx *, Rtx));
/* Needed by a peephole optimisation. */
#define PRESERVE_DEATH_INFO_REGNO_P(regno) (regno < FIRST_PSEUDO_REGISTER)
+
+extern char * m32r_output_block_move PROTO((Rtx, Rtx *));
+extern int m32r_block_immediate_operand PROTO((Rtx, int));
+extern void m32r_expand_block_move PROTO((Rtx *));
diff --git a/gcc/config/m32r/m32r.md b/gcc/config/m32r/m32r.md
index 5349ca8..e163890 100644
--- a/gcc/config/m32r/m32r.md
+++ b/gcc/config/m32r/m32r.md
@@ -1759,3 +1759,39 @@
(set_attr "length" "4")
]
)
+
+;; Block moves, see m32r.c for more details.
+;; Argument 0 is the destination
+;; Argument 1 is the source
+;; Argument 2 is the length
+;; Argument 3 is the alignment
+
+(define_expand "movstrsi"
+ [(parallel [(set (match_operand:BLK 0 "general_operand" "")
+ (match_operand:BLK 1 "general_operand" ""))
+ (use (match_operand:SI 2 "immediate_operand" ""))
+ (use (match_operand:SI 3 "immediate_operand" ""))])]
+ ""
+ "
+{
+ if (operands[0]) /* avoid unused code messages */
+ {
+ m32r_expand_block_move (operands);
+ DONE;
+ }
+}")
+
+;; Insn generated by block moves
+
+(define_insn "movstrsi_internal"
+ [(set (mem:BLK (match_operand:SI 0 "register_operand" "r")) ;; destination
+ (mem:BLK (match_operand:SI 1 "register_operand" "r"))) ;; source
+ (use (match_operand:SI 2 "m32r_block_immediate_operand" "J"));; # bytes to move
+ (clobber (match_scratch:SI 3 "=&r")) ;; temp 1
+ (clobber (match_scratch:SI 4 "=&r")) ;; temp 2
+ (clobber (match_dup 0))
+ (clobber (match_dup 1))]
+ ""
+ "* return m32r_output_block_move (insn, operands);"
+ [(set_attr "type" "store8")
+ (set_attr "length" "72")]) ;; Maximum