diff options
author | Nick Clifton <nickc@cygnus.com> | 1998-09-18 10:27:46 +0000 |
---|---|---|
committer | Nick Clifton <nickc@gcc.gnu.org> | 1998-09-18 10:27:46 +0000 |
commit | d2a73f8ee2ddd2137fc4478936f541ae43615e69 (patch) | |
tree | 2ba0be0551eaeb5bcbb8b3e569bb2b94b6312275 /gcc | |
parent | feaefdd522094b8e2b883ca5bbdd2118dc36d756 (diff) | |
download | gcc-d2a73f8ee2ddd2137fc4478936f541ae43615e69.zip gcc-d2a73f8ee2ddd2137fc4478936f541ae43615e69.tar.gz gcc-d2a73f8ee2ddd2137fc4478936f541ae43615e69.tar.bz2 |
Fix for PR1654 - implement "movstrsi" pattern to copy simple blocks of memory.
From-SVN: r22467
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 19 | ||||
-rw-r--r-- | gcc/config/m32r/m32r.c | 265 | ||||
-rw-r--r-- | gcc/config/m32r/m32r.h | 5 | ||||
-rw-r--r-- | gcc/config/m32r/m32r.md | 36 |
4 files changed, 324 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 82f31f5..9e844b0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +Fri Sep 18 09:44:55 1998 Nick Clifton <nickc@cygnus.com> + + * config/m32r/m32r.h (m32r_block_immediate_operand): Add to + PREDICATE_CODES. + + * config/m32r/m32r.md: Add "movstrsi" and "movstrsi_internal" + patterns. + + * config/m32r/m32r.c (m32r_print_operand): Add 's' and 'p' + operators. + (block_move_call): New function: Call a library routine to copy a + block of memory. + (m32r_expand_block_move): New function: Expand a "movstrsi" + pattern into a sequence of insns. + (m32r_output_block_move): New function: Expand a + "movstrsi_internal" pattern into a sequence of assembler opcodes. + (m32r_block_immediate_operand): New function: Return true if the + RTL is an integer constant, less than or equal to MAX_MOVE_BYTES. + Thu Sep 17 16:42:16 EDT 1998 Andrew MacLeod <amacleod@cygnus.com> * except.c (start_catch_handler): Issue 'fatal' instead of 'error' and diff --git a/gcc/config/m32r/m32r.c b/gcc/config/m32r/m32r.c index f88f35b..41f7ce3 100644 --- a/gcc/config/m32r/m32r.c +++ b/gcc/config/m32r/m32r.c @@ -1783,6 +1783,22 @@ m32r_print_operand (file, x, code) switch (code) { + /* The 's' and 'p' codes are used by output_block_move() to + indicate post-increment 's'tores and 'p're-increment loads. */ + case 's': + if (GET_CODE (x) == REG) + fprintf (file, "@+%s", reg_names [REGNO (x)]); + else + output_operand_lossage ("invalid operand to %s code"); + return; + + case 'p': + if (GET_CODE (x) == REG) + fprintf (file, "@%s+", reg_names [REGNO (x)]); + else + output_operand_lossage ("invalid operand to %p code"); + return; + case 'R' : /* Write second word of DImode or DFmode reference, register or memory. */ @@ -1822,7 +1838,7 @@ m32r_print_operand (file, x, code) rtx first, second; split_double (x, &first, &second); - fprintf (file, "0x%08lx", + fprintf (file, "0x%08x", code == 'L' ? INTVAL (first) : INTVAL (second)); } else @@ -2209,3 +2225,250 @@ emit_cond_move (operands, insn) return buffer; } + + +/* Use a library function to move some bytes. */ +static void +block_move_call (dest_reg, src_reg, bytes_rtx) + rtx dest_reg; + rtx src_reg; + rtx bytes_rtx; +{ + /* We want to pass the size as Pmode, which will normally be SImode + but will be DImode if we are using 64 bit longs and pointers. */ + if (GET_MODE (bytes_rtx) != VOIDmode + && GET_MODE (bytes_rtx) != Pmode) + bytes_rtx = convert_to_mode (Pmode, bytes_rtx, 1); + +#ifdef TARGET_MEM_FUNCTIONS + emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "memcpy"), 0, + VOIDmode, 3, dest_reg, Pmode, src_reg, Pmode, + convert_to_mode (TYPE_MODE (sizetype), bytes_rtx, + TREE_UNSIGNED (sizetype)), + TYPE_MODE (sizetype)); +#else + emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "bcopy"), 0, + VOIDmode, 3, src_reg, Pmode, dest_reg, Pmode, + convert_to_mode (TYPE_MODE (integer_type_node), bytes_rtx, + TREE_UNSIGNED (integer_type_node)), + TYPE_MODE (integer_type_node)); +#endif +} + +/* The maximum number of bytes to copy using pairs of load/store instructions. + If a block is larger than this then a loop will be generated to copy + MAX_MOVE_BYTES chunks at a time. The value of 32 is a semi-arbitary choice. + A customer uses Dhrystome as their benchmark, and Dhrystone has a 31 byte + string copy in it. */ +#define MAX_MOVE_BYTES 32 + +/* Expand string/block move operations. + + operands[0] is the pointer to the destination. + operands[1] is the pointer to the source. + operands[2] is the number of bytes to move. + operands[3] is the alignment. */ + +void +m32r_expand_block_move (operands) + rtx operands[]; +{ + rtx orig_dst = operands[0]; + rtx orig_src = operands[1]; + rtx bytes_rtx = operands[2]; + rtx align_rtx = operands[3]; + int constp = GET_CODE (bytes_rtx) == CONST_INT; + HOST_WIDE_INT bytes = constp ? INTVAL (bytes_rtx) : 0; + int align = INTVAL (align_rtx); + int leftover; + rtx src_reg; + rtx dst_reg; + + if (constp && bytes <= 0) + return; + + /* Move the address into scratch registers. */ + dst_reg = copy_addr_to_reg (XEXP (orig_dst, 0)); + src_reg = copy_addr_to_reg (XEXP (orig_src, 0)); + + if (align > UNITS_PER_WORD) + align = UNITS_PER_WORD; + + /* If we prefer size over speed, always use a function call. + If we do not know the size, use a function call. + If the blocks are not word aligned, use a function call. */ + if (optimize_size || ! constp || align != UNITS_PER_WORD) + { + block_move_call (dst_reg, src_reg, bytes_rtx); + return; + } + + leftover = bytes % MAX_MOVE_BYTES; + bytes -= leftover; + + /* If necessary, generate a loop to handle the bulk of the copy. */ + if (bytes) + { + rtx label; + rtx final_src; + + bytes_rtx = GEN_INT (MAX_MOVE_BYTES); + + /* If we are going to have to perform this loop more than + once, then generate a label and compute the address the + source register will contain upon completion of the final + itteration. */ + if (bytes > MAX_MOVE_BYTES) + { + final_src = gen_reg_rtx (Pmode); + + if (INT16_P(bytes)) + emit_insn (gen_addsi3 (final_src, src_reg, bytes_rtx)); + else + { + emit_insn (gen_movsi (final_src, bytes_rtx)); + emit_insn (gen_addsi3 (final_src, final_src, src_reg)); + } + + label = gen_label_rtx (); + emit_label (label); + } + + /* It is known that output_block_move() will update src_reg to point + to the word after the end of the source block, and dst_reg to point + to the last word of the destination block, provided that the block + is MAX_MOVE_BYTES long. */ + emit_insn (gen_movstrsi_internal (dst_reg, src_reg, bytes_rtx)); + emit_insn (gen_addsi3 (dst_reg, dst_reg, GEN_INT (4))); + + if (bytes > MAX_MOVE_BYTES) + { + emit_insn (gen_cmpsi (src_reg, final_src)); + emit_jump_insn (gen_bne (label)); + } + } + + if (leftover) + emit_insn (gen_movstrsi_internal (dst_reg, src_reg, GEN_INT (leftover))); +} + + +/* Emit load/stores for a small constant word aligned block_move. + + operands[0] is the memory address of the destination. + operands[1] is the memory address of the source. + operands[2] is the number of bytes to move. + operands[3] is a temp register. + operands[4] is a temp register. */ + +char * +m32r_output_block_move (insn, operands) + rtx insn; + rtx operands[]; +{ + HOST_WIDE_INT bytes = INTVAL (operands[2]); + int first_time; + int got_extra = 0; + + if (bytes < 1 || bytes > MAX_MOVE_BYTES) + abort (); + + /* We do not have a post-increment store available, so the first set of + stores are done without any increment, then the remaining ones can use + the pre-increment addressing mode. + + Note: expand_block_move() also relies upon this behaviour when building + loops to copy large blocks. */ + first_time = 1; + + while (bytes > 0) + { + if (bytes >= 8) + { + if (first_time) + { + output_asm_insn ("ld\t%3, %p1", operands); + output_asm_insn ("ld\t%4, %p1", operands); + output_asm_insn ("st\t%3, @%0", operands); + output_asm_insn ("st\t%4, %s0", operands); + } + else + { + output_asm_insn ("ld\t%3, %p1", operands); + output_asm_insn ("ld\t%4, %p1", operands); + output_asm_insn ("st\t%3, %s0", operands); + output_asm_insn ("st\t%4, %s0", operands); + } + + bytes -= 8; + } + else if (bytes >= 4) + { + if (bytes > 4) + got_extra = 1; + + output_asm_insn ("ld\t%3, %p1", operands); + + if (got_extra) + output_asm_insn ("ld\t%4, %p1", operands); + + if (first_time) + output_asm_insn ("st\t%3, @%0", operands); + else + output_asm_insn ("st\t%3, %s0", operands); + + bytes -= 4; + } + else + { + /* Get the entire next word, even though we do not want all of it. + The saves us from doing several smaller loads, and we assume that + we cannot cause a page fault when at least part of the word is in + valid memory. If got_extra is true then we have already loaded + the next word as part of loading and storing the previous word. */ + if (! got_extra) + output_asm_insn ("ld\t%4, @%1", operands); + + if (bytes >= 2) + { + bytes -= 2; + + output_asm_insn ("sth\t%4, @%0", operands); + + /* If there is a byte left to store then increment the + destination address and shift the contents of the source + register down by 16 bits. We could not do the address + increment in the store half word instruction, because it does + not have an auto increment mode. */ + if (bytes > 0) /* assert (bytes == 1) */ + { + output_asm_insn ("srai\t%4, #16", operands); + output_asm_insn ("addi\t%0, #2", operands); + } + } + + output_asm_insn ("stb\t%4, @%0", operands); + + bytes = 0; + } + + first_time = 0; + } + + return ""; +} + +/* Return true if op is an integer constant, less than or equal to + MAX_MOVE_BYTES. */ +int +m32r_block_immediate_operand (op, mode) + rtx op; + int mode; +{ + if (GET_CODE (op) != CONST_INT + || INTVAL (op) > MAX_MOVE_BYTES + || INTVAL (op) <= 0) + return 0; + + return 1; +} diff --git a/gcc/config/m32r/m32r.h b/gcc/config/m32r/m32r.h index 3435a17..382f66e 100644 --- a/gcc/config/m32r/m32r.h +++ b/gcc/config/m32r/m32r.h @@ -1987,6 +1987,7 @@ enum m32r_function_type { "cmp_int16_operand", { CONST_INT }}, \ { "call_address_operand", { SYMBOL_REF, LABEL_REF, CONST }}, \ { "small_insn_p", { INSN, CALL_INSN, JUMP_INSN }}, \ +{ "m32r_block_immediate_operand",{ CONST_INT }}, \ { "large_insn_p", { INSN, CALL_INSN, JUMP_INSN }}, /* Functions declared in m32r.c */ @@ -2078,3 +2079,7 @@ extern char *emit_cond_move PROTO((Rtx *, Rtx)); /* Needed by a peephole optimisation. */ #define PRESERVE_DEATH_INFO_REGNO_P(regno) (regno < FIRST_PSEUDO_REGISTER) + +extern char * m32r_output_block_move PROTO((Rtx, Rtx *)); +extern int m32r_block_immediate_operand PROTO((Rtx, int)); +extern void m32r_expand_block_move PROTO((Rtx *)); diff --git a/gcc/config/m32r/m32r.md b/gcc/config/m32r/m32r.md index 5349ca8..e163890 100644 --- a/gcc/config/m32r/m32r.md +++ b/gcc/config/m32r/m32r.md @@ -1759,3 +1759,39 @@ (set_attr "length" "4") ] ) + +;; Block moves, see m32r.c for more details. +;; Argument 0 is the destination +;; Argument 1 is the source +;; Argument 2 is the length +;; Argument 3 is the alignment + +(define_expand "movstrsi" + [(parallel [(set (match_operand:BLK 0 "general_operand" "") + (match_operand:BLK 1 "general_operand" "")) + (use (match_operand:SI 2 "immediate_operand" "")) + (use (match_operand:SI 3 "immediate_operand" ""))])] + "" + " +{ + if (operands[0]) /* avoid unused code messages */ + { + m32r_expand_block_move (operands); + DONE; + } +}") + +;; Insn generated by block moves + +(define_insn "movstrsi_internal" + [(set (mem:BLK (match_operand:SI 0 "register_operand" "r")) ;; destination + (mem:BLK (match_operand:SI 1 "register_operand" "r"))) ;; source + (use (match_operand:SI 2 "m32r_block_immediate_operand" "J"));; # bytes to move + (clobber (match_scratch:SI 3 "=&r")) ;; temp 1 + (clobber (match_scratch:SI 4 "=&r")) ;; temp 2 + (clobber (match_dup 0)) + (clobber (match_dup 1))] + "" + "* return m32r_output_block_move (insn, operands);" + [(set_attr "type" "store8") + (set_attr "length" "72")]) ;; Maximum |