aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn David Anglin <dave.anglin@nrc-cnrc.gc.ca>2003-11-08 20:36:30 +0000
committerJohn David Anglin <danglin@gcc.gnu.org>2003-11-08 20:36:30 +0000
commitcdc9103cba6f914981e3ba352d22db578c4aa209 (patch)
tree63aafd1067bf87ae1d6116cc2f87f72d9b889f0f
parentac47cc13ab45226d800f5cbc98ad50ed99812b4b (diff)
downloadgcc-cdc9103cba6f914981e3ba352d22db578c4aa209.zip
gcc-cdc9103cba6f914981e3ba352d22db578c4aa209.tar.gz
gcc-cdc9103cba6f914981e3ba352d22db578c4aa209.tar.bz2
re PR rtl-optimization/12630 (Various unrecognizable insns and ICEs at -O3)
PR optimization/12630 * pa.c (compute_movstrsi_length): Rename to compute_movstr_length. Handle length computation 64-bit moves. (compute_clrstr_length, output_block_clear): Implement block clear. (output_block_move): Handle 64-bit moves. (pa_adjust_insn_length): Use compute_movstr_length and compute_clrstr_length. * pa.md (movstrsi): Revise operand order and comments. Don't use match_scratch. (movstrsi_internal): Delete. (movstrsi_prereload, movstrsi_postreload): New insns. Define splitter and peephole2 patterns to transform prereload to postreload form. (movstrdi, movstrdi_prereload, movstrdi_postreload, clrstrsi, clrstrsi_prereload, clrstrsi_postreload, clrstrdi, clrstrdi_prereload, clrstrdi_postreload): New patterns for 64-bit block move, and block clear. * pa-protos.h (output_block_clear): New prototype. From-SVN: r73375
-rw-r--r--gcc/ChangeLog20
-rw-r--r--gcc/config/pa/pa-protos.h1
-rw-r--r--gcc/config/pa/pa.c197
-rw-r--r--gcc/config/pa/pa.md479
4 files changed, 669 insertions, 28 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 7835708..60d7921 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,23 @@
+2003-11-08 John David Anglin <dave.anglin@nrc-cnrc.gc.ca>
+
+ PR optimization/12630
+ * pa.c (compute_movstrsi_length): Rename to compute_movstr_length.
+ Handle length computation 64-bit moves.
+ (compute_clrstr_length, output_block_clear): Implement block clear.
+ (output_block_move): Handle 64-bit moves.
+ (pa_adjust_insn_length): Use compute_movstr_length and
+ compute_clrstr_length.
+ * pa.md (movstrsi): Revise operand order and comments. Don't use
+ match_scratch.
+ (movstrsi_internal): Delete.
+ (movstrsi_prereload, movstrsi_postreload): New insns. Define splitter
+ and peephole2 patterns to transform prereload to postreload form.
+ (movstrdi, movstrdi_prereload, movstrdi_postreload, clrstrsi,
+ clrstrsi_prereload, clrstrsi_postreload, clrstrdi, clrstrdi_prereload,
+ clrstrdi_postreload): New patterns for 64-bit block move, and block
+ clear.
+ * pa-protos.h (output_block_clear): New prototype.
+
2003-11-08 Andreas Schwab <schwab@suse.de>
* dbxout.c (current_file): Also wrap inside DBX_DEBUGGING_INFO ||
diff --git a/gcc/config/pa/pa-protos.h b/gcc/config/pa/pa-protos.h
index f5f15cb..4d5ce69 100644
--- a/gcc/config/pa/pa-protos.h
+++ b/gcc/config/pa/pa-protos.h
@@ -40,6 +40,7 @@ extern const char *output_ior (rtx *);
extern const char *output_move_double (rtx *);
extern const char *output_fp_move_double (rtx *);
extern const char *output_block_move (rtx *, int);
+extern const char *output_block_clear (rtx *, int);
extern const char *output_cbranch (rtx *, int, int, int, rtx);
extern const char *output_lbranch (rtx, rtx);
extern const char *output_bb (rtx *, int, int, int, rtx, int);
diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c
index 856e301..8c8aebb 100644
--- a/gcc/config/pa/pa.c
+++ b/gcc/config/pa/pa.c
@@ -103,7 +103,8 @@ static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
static int forward_branch_p (rtx);
static int shadd_constant_p (int);
static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
-static int compute_movstrsi_length (rtx);
+static int compute_movstr_length (rtx);
+static int compute_clrstr_length (rtx);
static bool pa_assemble_integer (rtx, unsigned int, int);
static void remove_useless_addtr_insns (int);
static void store_reg (int, int, int);
@@ -2431,8 +2432,8 @@ find_addr_reg (rtx addr)
OPERANDS[0] is the destination pointer as a REG, clobbered.
OPERANDS[1] is the source pointer as a REG, clobbered.
OPERANDS[2] is a register for temporary storage.
- OPERANDS[4] is the size as a CONST_INT
OPERANDS[3] is a register for temporary storage.
+ OPERANDS[4] is the size as a CONST_INT
OPERANDS[5] is the alignment safe to use, as a CONST_INT.
OPERANDS[6] is another temporary register. */
@@ -2442,15 +2443,43 @@ output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
int align = INTVAL (operands[5]);
unsigned long n_bytes = INTVAL (operands[4]);
- /* We can't move more than four bytes at a time because the PA
+ /* We can't move more than a word at a time because the PA
has no longer integer move insns. (Could use fp mem ops?) */
- if (align > 4)
- align = 4;
+ if (align > (TARGET_64BIT ? 8 : 4))
+ align = (TARGET_64BIT ? 8 : 4);
/* Note that we know each loop below will execute at least twice
(else we would have open-coded the copy). */
switch (align)
{
+ case 8:
+ /* Pre-adjust the loop counter. */
+ operands[4] = GEN_INT (n_bytes - 16);
+ output_asm_insn ("ldi %4,%2", operands);
+
+ /* Copying loop. */
+ output_asm_insn ("ldd,ma 8(%1),%3", operands);
+ output_asm_insn ("ldd,ma 8(%1),%6", operands);
+ output_asm_insn ("std,ma %3,8(%0)", operands);
+ output_asm_insn ("addib,>= -16,%2,.-12", operands);
+ output_asm_insn ("std,ma %6,8(%0)", operands);
+
+ /* Handle the residual. There could be up to 7 bytes of
+ residual to copy! */
+ if (n_bytes % 16 != 0)
+ {
+ operands[4] = GEN_INT (n_bytes % 8);
+ if (n_bytes % 16 >= 8)
+ output_asm_insn ("ldd,ma 8(%1),%3", operands);
+ if (n_bytes % 8 != 0)
+ output_asm_insn ("ldd 0(%1),%6", operands);
+ if (n_bytes % 16 >= 8)
+ output_asm_insn ("std,ma %3,8(%0)", operands);
+ if (n_bytes % 8 != 0)
+ output_asm_insn ("stdby,e %6,%4(%0)", operands);
+ }
+ return "";
+
case 4:
/* Pre-adjust the loop counter. */
operands[4] = GEN_INT (n_bytes - 8);
@@ -2536,7 +2565,7 @@ output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
count insns rather than emit them. */
static int
-compute_movstrsi_length (rtx insn)
+compute_movstr_length (rtx insn)
{
rtx pat = PATTERN (insn);
unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
@@ -2545,8 +2574,8 @@ compute_movstrsi_length (rtx insn)
/* We can't move more than four bytes at a time because the PA
has no longer integer move insns. (Could use fp mem ops?) */
- if (align > 4)
- align = 4;
+ if (align > (TARGET_64BIT ? 8 : 4))
+ align = (TARGET_64BIT ? 8 : 4);
/* The basic copying loop. */
n_insns = 6;
@@ -2564,6 +2593,148 @@ compute_movstrsi_length (rtx insn)
/* Lengths are expressed in bytes now; each insn is 4 bytes. */
return n_insns * 4;
}
+
+/* Emit code to perform a block clear.
+
+ OPERANDS[0] is the destination pointer as a REG, clobbered.
+ OPERANDS[1] is a register for temporary storage.
+ OPERANDS[2] is the size as a CONST_INT
+ OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
+
+const char *
+output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
+{
+ int align = INTVAL (operands[3]);
+ unsigned long n_bytes = INTVAL (operands[2]);
+
+ /* We can't clear more than a word at a time because the PA
+ has no longer integer move insns. */
+ if (align > (TARGET_64BIT ? 8 : 4))
+ align = (TARGET_64BIT ? 8 : 4);
+
+ /* Note that we know each loop below will execute at least twice
+ (else we would have open-coded the copy). */
+ switch (align)
+ {
+ case 8:
+ /* Pre-adjust the loop counter. */
+ operands[2] = GEN_INT (n_bytes - 16);
+ output_asm_insn ("ldi %2,%1", operands);
+
+ /* Loop. */
+ output_asm_insn ("std,ma %%r0,8(%0)", operands);
+ output_asm_insn ("addib,>= -16,%1,.-4", operands);
+ output_asm_insn ("std,ma %%r0,8(%0)", operands);
+
+ /* Handle the residual. There could be up to 7 bytes of
+ residual to copy! */
+ if (n_bytes % 16 != 0)
+ {
+ operands[2] = GEN_INT (n_bytes % 8);
+ if (n_bytes % 16 >= 8)
+ output_asm_insn ("std,ma %%r0,8(%0)", operands);
+ if (n_bytes % 8 != 0)
+ output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
+ }
+ return "";
+
+ case 4:
+ /* Pre-adjust the loop counter. */
+ operands[2] = GEN_INT (n_bytes - 8);
+ output_asm_insn ("ldi %2,%1", operands);
+
+ /* Loop. */
+ output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
+ output_asm_insn ("addib,>= -8,%1,.-4", operands);
+ output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
+
+ /* Handle the residual. There could be up to 7 bytes of
+ residual to copy! */
+ if (n_bytes % 8 != 0)
+ {
+ operands[2] = GEN_INT (n_bytes % 4);
+ if (n_bytes % 8 >= 4)
+ output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
+ if (n_bytes % 4 != 0)
+ output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
+ }
+ return "";
+
+ case 2:
+ /* Pre-adjust the loop counter. */
+ operands[2] = GEN_INT (n_bytes - 4);
+ output_asm_insn ("ldi %2,%1", operands);
+
+ /* Loop. */
+ output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
+ output_asm_insn ("addib,>= -4,%1,.-4", operands);
+ output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
+
+ /* Handle the residual. */
+ if (n_bytes % 4 != 0)
+ {
+ if (n_bytes % 4 >= 2)
+ output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
+ if (n_bytes % 2 != 0)
+ output_asm_insn ("stb %%r0,0(%0)", operands);
+ }
+ return "";
+
+ case 1:
+ /* Pre-adjust the loop counter. */
+ operands[2] = GEN_INT (n_bytes - 2);
+ output_asm_insn ("ldi %2,%1", operands);
+
+ /* Loop. */
+ output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
+ output_asm_insn ("addib,>= -2,%1,.-4", operands);
+ output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
+
+ /* Handle the residual. */
+ if (n_bytes % 2 != 0)
+ output_asm_insn ("stb %%r0,0(%0)", operands);
+
+ return "";
+
+ default:
+ abort ();
+ }
+}
+
+/* Count the number of insns necessary to handle this block move.
+
+ Basic structure is the same as emit_block_move, except that we
+ count insns rather than emit them. */
+
+static int
+compute_clrstr_length (rtx insn)
+{
+ rtx pat = PATTERN (insn);
+ unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
+ unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
+ unsigned int n_insns = 0;
+
+ /* We can't clear more than a word at a time because the PA
+ has no longer integer move insns. */
+ if (align > (TARGET_64BIT ? 8 : 4))
+ align = (TARGET_64BIT ? 8 : 4);
+
+ /* The basic loop. */
+ n_insns = 4;
+
+ /* Residuals. */
+ if (n_bytes % (2 * align) != 0)
+ {
+ if ((n_bytes % (2 * align)) >= align)
+ n_insns++;
+
+ if ((n_bytes % align) != 0)
+ n_insns++;
+ }
+
+ /* Lengths are expressed in bytes now; each insn is 4 bytes. */
+ return n_insns * 4;
+}
const char *
@@ -4337,7 +4508,15 @@ pa_adjust_insn_length (rtx insn, int length)
&& GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
&& GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
&& GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
- return compute_movstrsi_length (insn) - 4;
+ return compute_movstr_length (insn) - 4;
+ /* Block clear pattern. */
+ else if (GET_CODE (insn) == INSN
+ && GET_CODE (pat) == PARALLEL
+ && GET_CODE (XVECEXP (pat, 0, 0)) == SET
+ && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
+ && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
+ && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
+ return compute_clrstr_length (insn) - 4;
/* Conditional branch with an unfilled delay slot. */
else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
{
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index 79f2e26..aa0bfcb 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -2955,20 +2955,20 @@
(set_attr "length" "4")])
;; The definition of this insn does not really explain what it does,
-;; but it should suffice
-;; that anything generated as this insn will be recognized as one
-;; and that it will not successfully combine with anything.
+;; but it should suffice that anything generated as this insn will be
+;; recognized as a movstrsi operation, and that it will not successfully
+;; combine with anything.
(define_expand "movstrsi"
[(parallel [(set (match_operand:BLK 0 "" "")
(match_operand:BLK 1 "" ""))
- (clobber (match_scratch:SI 7 ""))
- (clobber (match_scratch:SI 8 ""))
(clobber (match_dup 4))
(clobber (match_dup 5))
(clobber (match_dup 6))
+ (clobber (match_dup 7))
+ (clobber (match_dup 8))
(use (match_operand:SI 2 "arith_operand" ""))
(use (match_operand:SI 3 "const_int_operand" ""))])]
- "!TARGET_64BIT"
+ "!TARGET_64BIT && optimize > 0"
"
{
int size, align;
@@ -2990,7 +2990,7 @@
If the size is large in respect to the known alignment, then use
the library routines.
- If the size is small in repsect to the known alignment, then open
+ If the size is small in respect to the known alignment, then open
code the copy (since that will lead to better scheduling).
Else use the block move pattern. */
@@ -3003,8 +3003,7 @@
align = INTVAL (operands[3]);
align = align > 4 ? 4 : align;
- /* If size/alignment > 8 (eg size is large in respect to alignment),
- then use the library routines. */
+ /* If size/alignment is large, then use the library routines. */
if (size / align > 16)
FAIL;
@@ -3022,28 +3021,470 @@
operands[4] = gen_reg_rtx (SImode);
operands[5] = gen_reg_rtx (SImode);
operands[6] = gen_reg_rtx (SImode);
- operands[7] = XEXP (operands[0], 0);
- operands[8] = XEXP (operands[1], 0);
+ operands[7] = gen_reg_rtx (SImode);
+ operands[8] = gen_reg_rtx (SImode);
}")
;; The operand constraints are written like this to support both compile-time
-;; and run-time determined byte count. If the count is run-time determined,
-;; the register with the byte count is clobbered by the copying code, and
-;; therefore it is forced to operand 2. If the count is compile-time
-;; determined, we need two scratch registers for the unrolled code.
-(define_insn "movstrsi_internal"
+;; and run-time determined byte counts. The expander and output_block_move
+;; only support compile-time determined counts at this time.
+;;
+;; If the count is run-time determined, the register with the byte count
+;; is clobbered by the copying code, and therefore it is forced to operand 2.
+;;
+;; We used to clobber operands 0 and 1. However, a change to regrename.c
+;; broke this semantic for pseudo registers. We can't use match_scratch
+;; as this requires two registers in the class R1_REGS when the MEMs for
+;; operands 0 and 1 are both equivalent to symbolic MEMs. Thus, we are
+;; forced to internally copy operands 0 and 1 to operands 7 and 8,
+;; respectively. We then split or peephole optimize after reload.
+(define_insn "movstrsi_prereload"
[(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r"))
(mem:BLK (match_operand:SI 1 "register_operand" "r,r")))
- (clobber (match_scratch:SI 7 "=0,0"))
- (clobber (match_scratch:SI 8 "=1,1"))
(clobber (match_operand:SI 2 "register_operand" "=r,r")) ;loop cnt/tmp
- (clobber (match_operand:SI 3 "register_operand" "=&r,&r")) ;item tmp
+ (clobber (match_operand:SI 3 "register_operand" "=&r,&r")) ;item tmp1
(clobber (match_operand:SI 6 "register_operand" "=&r,&r")) ;item tmp2
+ (clobber (match_operand:SI 7 "register_operand" "=&r,&r")) ;item tmp3
+ (clobber (match_operand:SI 8 "register_operand" "=&r,&r")) ;item tmp4
(use (match_operand:SI 4 "arith_operand" "J,2")) ;byte count
(use (match_operand:SI 5 "const_int_operand" "n,n"))] ;alignment
"!TARGET_64BIT"
+ "#"
+ [(set_attr "type" "multi,multi")])
+
+(define_split
+ [(parallel [(set (mem:BLK (match_operand:SI 0 "register_operand" ""))
+ (mem:BLK (match_operand:SI 1 "register_operand" "")))
+ (clobber (match_operand:SI 2 "register_operand" ""))
+ (clobber (match_operand:SI 3 "register_operand" ""))
+ (clobber (match_operand:SI 6 "register_operand" ""))
+ (clobber (match_operand:SI 7 "register_operand" ""))
+ (clobber (match_operand:SI 8 "register_operand" ""))
+ (use (match_operand:SI 4 "arith_operand" ""))
+ (use (match_operand:SI 5 "const_int_operand" ""))])]
+ "!TARGET_64BIT && reload_completed && !flag_peephole2"
+ [(set (match_dup 7) (match_dup 0))
+ (set (match_dup 8) (match_dup 1))
+ (parallel [(set (mem:BLK (match_dup 7)) (mem:BLK (match_dup 8)))
+ (clobber (match_dup 2))
+ (clobber (match_dup 3))
+ (clobber (match_dup 6))
+ (clobber (match_dup 7))
+ (clobber (match_dup 8))
+ (use (match_dup 4))
+ (use (match_dup 5))
+ (const_int 0)])]
+ "")
+
+(define_peephole2
+ [(parallel [(set (mem:BLK (match_operand:SI 0 "register_operand" ""))
+ (mem:BLK (match_operand:SI 1 "register_operand" "")))
+ (clobber (match_operand:SI 2 "register_operand" ""))
+ (clobber (match_operand:SI 3 "register_operand" ""))
+ (clobber (match_operand:SI 6 "register_operand" ""))
+ (clobber (match_operand:SI 7 "register_operand" ""))
+ (clobber (match_operand:SI 8 "register_operand" ""))
+ (use (match_operand:SI 4 "arith_operand" ""))
+ (use (match_operand:SI 5 "const_int_operand" ""))])]
+ "!TARGET_64BIT"
+ [(parallel [(set (mem:BLK (match_dup 7)) (mem:BLK (match_dup 8)))
+ (clobber (match_dup 2))
+ (clobber (match_dup 3))
+ (clobber (match_dup 6))
+ (clobber (match_dup 7))
+ (clobber (match_dup 8))
+ (use (match_dup 4))
+ (use (match_dup 5))
+ (const_int 0)])]
+ "
+{
+ if (dead_or_set_p (curr_insn, operands[0]))
+ operands[7] = operands[0];
+ else
+ emit_insn (gen_rtx_SET (VOIDmode, operands[7], operands[0]));
+
+ if (dead_or_set_p (curr_insn, operands[1]))
+ operands[8] = operands[1];
+ else
+ emit_insn (gen_rtx_SET (VOIDmode, operands[8], operands[1]));
+}")
+
+(define_insn "movstrsi_postreload"
+ [(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r"))
+ (mem:BLK (match_operand:SI 1 "register_operand" "r,r")))
+ (clobber (match_operand:SI 2 "register_operand" "=r,r")) ;loop cnt/tmp
+ (clobber (match_operand:SI 3 "register_operand" "=&r,&r")) ;item tmp1
+ (clobber (match_operand:SI 6 "register_operand" "=&r,&r")) ;item tmp2
+ (clobber (match_dup 0))
+ (clobber (match_dup 1))
+ (use (match_operand:SI 4 "arith_operand" "J,2")) ;byte count
+ (use (match_operand:SI 5 "const_int_operand" "n,n")) ;alignment
+ (const_int 0)]
+ "!TARGET_64BIT && reload_completed"
"* return output_block_move (operands, !which_alternative);"
[(set_attr "type" "multi,multi")])
+
+(define_expand "movstrdi"
+ [(parallel [(set (match_operand:BLK 0 "" "")
+ (match_operand:BLK 1 "" ""))
+ (clobber (match_dup 4))
+ (clobber (match_dup 5))
+ (clobber (match_dup 6))
+ (clobber (match_dup 7))
+ (clobber (match_dup 8))
+ (use (match_operand:DI 2 "arith_operand" ""))
+ (use (match_operand:DI 3 "const_int_operand" ""))])]
+ "TARGET_64BIT && optimize > 0"
+ "
+{
+ int size, align;
+
+ /* HP provides very fast block move library routine for the PA;
+ this routine includes:
+
+ 4x4 byte at a time block moves,
+ 1x4 byte at a time with alignment checked at runtime with
+ attempts to align the source and destination as needed
+ 1x1 byte loop
+
+ With that in mind, here's the heuristics to try and guess when
+ the inlined block move will be better than the library block
+ move:
+
+ If the size isn't constant, then always use the library routines.
+
+ If the size is large in respect to the known alignment, then use
+ the library routines.
+
+ If the size is small in respect to the known alignment, then open
+ code the copy (since that will lead to better scheduling).
+
+ Else use the block move pattern. */
+
+ /* Undetermined size, use the library routine. */
+ if (GET_CODE (operands[2]) != CONST_INT)
+ FAIL;
+
+ size = INTVAL (operands[2]);
+ align = INTVAL (operands[3]);
+ align = align > 8 ? 8 : align;
+
+ /* If size/alignment is large, then use the library routines. */
+ if (size / align > 16)
+ FAIL;
+
+ /* This does happen, but not often enough to worry much about. */
+ if (size / align < MOVE_RATIO)
+ FAIL;
+
+ /* Fall through means we're going to use our block move pattern. */
+ operands[0]
+ = replace_equiv_address (operands[0],
+ copy_to_mode_reg (DImode, XEXP (operands[0], 0)));
+ operands[1]
+ = replace_equiv_address (operands[1],
+ copy_to_mode_reg (DImode, XEXP (operands[1], 0)));
+ operands[4] = gen_reg_rtx (DImode);
+ operands[5] = gen_reg_rtx (DImode);
+ operands[6] = gen_reg_rtx (DImode);
+ operands[7] = gen_reg_rtx (DImode);
+ operands[8] = gen_reg_rtx (DImode);
+}")
+
+;; The operand constraints are written like this to support both compile-time
+;; and run-time determined byte counts. The expander and output_block_move
+;; only support compile-time determined counts at this time.
+;;
+;; If the count is run-time determined, the register with the byte count
+;; is clobbered by the copying code, and therefore it is forced to operand 2.
+;;
+;; We used to clobber operands 0 and 1. However, a change to regrename.c
+;; broke this semantic for pseudo registers. We can't use match_scratch
+;; as this requires two registers in the class R1_REGS when the MEMs for
+;; operands 0 and 1 are both equivalent to symbolic MEMs. Thus, we are
+;; forced to internally copy operands 0 and 1 to operands 7 and 8,
+;; respectively. We then split or peephole optimize after reload.
+(define_insn "movstrdi_prereload"
+ [(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r"))
+ (mem:BLK (match_operand:DI 1 "register_operand" "r,r")))
+ (clobber (match_operand:DI 2 "register_operand" "=r,r")) ;loop cnt/tmp
+ (clobber (match_operand:DI 3 "register_operand" "=&r,&r")) ;item tmp1
+ (clobber (match_operand:DI 6 "register_operand" "=&r,&r")) ;item tmp2
+ (clobber (match_operand:DI 7 "register_operand" "=&r,&r")) ;item tmp3
+ (clobber (match_operand:DI 8 "register_operand" "=&r,&r")) ;item tmp4
+ (use (match_operand:DI 4 "arith_operand" "J,2")) ;byte count
+ (use (match_operand:DI 5 "const_int_operand" "n,n"))] ;alignment
+ "TARGET_64BIT"
+ "#"
+ [(set_attr "type" "multi,multi")])
+
+(define_split
+ [(parallel [(set (mem:BLK (match_operand:DI 0 "register_operand" ""))
+ (mem:BLK (match_operand:DI 1 "register_operand" "")))
+ (clobber (match_operand:DI 2 "register_operand" ""))
+ (clobber (match_operand:DI 3 "register_operand" ""))
+ (clobber (match_operand:DI 6 "register_operand" ""))
+ (clobber (match_operand:DI 7 "register_operand" ""))
+ (clobber (match_operand:DI 8 "register_operand" ""))
+ (use (match_operand:DI 4 "arith_operand" ""))
+ (use (match_operand:DI 5 "const_int_operand" ""))])]
+ "TARGET_64BIT && reload_completed && !flag_peephole2"
+ [(set (match_dup 7) (match_dup 0))
+ (set (match_dup 8) (match_dup 1))
+ (parallel [(set (mem:BLK (match_dup 7)) (mem:BLK (match_dup 8)))
+ (clobber (match_dup 2))
+ (clobber (match_dup 3))
+ (clobber (match_dup 6))
+ (clobber (match_dup 7))
+ (clobber (match_dup 8))
+ (use (match_dup 4))
+ (use (match_dup 5))
+ (const_int 0)])]
+ "")
+
+(define_peephole2
+ [(parallel [(set (mem:BLK (match_operand:DI 0 "register_operand" ""))
+ (mem:BLK (match_operand:DI 1 "register_operand" "")))
+ (clobber (match_operand:DI 2 "register_operand" ""))
+ (clobber (match_operand:DI 3 "register_operand" ""))
+ (clobber (match_operand:DI 6 "register_operand" ""))
+ (clobber (match_operand:DI 7 "register_operand" ""))
+ (clobber (match_operand:DI 8 "register_operand" ""))
+ (use (match_operand:DI 4 "arith_operand" ""))
+ (use (match_operand:DI 5 "const_int_operand" ""))])]
+ "TARGET_64BIT"
+ [(parallel [(set (mem:BLK (match_dup 7)) (mem:BLK (match_dup 8)))
+ (clobber (match_dup 2))
+ (clobber (match_dup 3))
+ (clobber (match_dup 6))
+ (clobber (match_dup 7))
+ (clobber (match_dup 8))
+ (use (match_dup 4))
+ (use (match_dup 5))
+ (const_int 0)])]
+ "
+{
+ if (dead_or_set_p (curr_insn, operands[0]))
+ operands[7] = operands[0];
+ else
+ emit_insn (gen_rtx_SET (VOIDmode, operands[7], operands[0]));
+
+ if (dead_or_set_p (curr_insn, operands[1]))
+ operands[8] = operands[1];
+ else
+ emit_insn (gen_rtx_SET (VOIDmode, operands[8], operands[1]));
+}")
+
+(define_insn "movstrdi_postreload"
+ [(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r"))
+ (mem:BLK (match_operand:DI 1 "register_operand" "r,r")))
+ (clobber (match_operand:DI 2 "register_operand" "=r,r")) ;loop cnt/tmp
+ (clobber (match_operand:DI 3 "register_operand" "=&r,&r")) ;item tmp1
+ (clobber (match_operand:DI 6 "register_operand" "=&r,&r")) ;item tmp2
+ (clobber (match_dup 0))
+ (clobber (match_dup 1))
+ (use (match_operand:DI 4 "arith_operand" "J,2")) ;byte count
+ (use (match_operand:DI 5 "const_int_operand" "n,n")) ;alignment
+ (const_int 0)]
+ "TARGET_64BIT && reload_completed"
+ "* return output_block_move (operands, !which_alternative);"
+ [(set_attr "type" "multi,multi")])
+
+(define_expand "clrstrsi"
+ [(parallel [(set (match_operand:BLK 0 "" "")
+ (const_int 0))
+ (clobber (match_dup 3))
+ (clobber (match_dup 4))
+ (use (match_operand:SI 1 "arith_operand" ""))
+ (use (match_operand:SI 2 "const_int_operand" ""))])]
+ "!TARGET_64BIT && optimize > 0"
+ "
+{
+ int size, align;
+
+ /* Undetermined size, use the library routine. */
+ if (GET_CODE (operands[1]) != CONST_INT)
+ FAIL;
+
+ size = INTVAL (operands[1]);
+ align = INTVAL (operands[2]);
+ align = align > 4 ? 4 : align;
+
+ /* If size/alignment is large, then use the library routines. */
+ if (size / align > 16)
+ FAIL;
+
+ /* This does happen, but not often enough to worry much about. */
+ if (size / align < MOVE_RATIO)
+ FAIL;
+
+ /* Fall through means we're going to use our block clear pattern. */
+ operands[0]
+ = replace_equiv_address (operands[0],
+ copy_to_mode_reg (SImode, XEXP (operands[0], 0)));
+ operands[3] = gen_reg_rtx (SImode);
+ operands[4] = gen_reg_rtx (SImode);
+}")
+
+(define_insn "clrstrsi_prereload"
+ [(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r"))
+ (const_int 0))
+ (clobber (match_operand:SI 1 "register_operand" "=r,r")) ;loop cnt/tmp
+ (clobber (match_operand:SI 4 "register_operand" "=&r,&r")) ;tmp1
+ (use (match_operand:SI 2 "arith_operand" "J,1")) ;byte count
+ (use (match_operand:SI 3 "const_int_operand" "n,n"))] ;alignment
+ "!TARGET_64BIT"
+ "#"
+ [(set_attr "type" "multi,multi")])
+
+(define_split
+ [(parallel [(set (mem:BLK (match_operand:SI 0 "register_operand" ""))
+ (const_int 0))
+ (clobber (match_operand:SI 1 "register_operand" ""))
+ (clobber (match_operand:SI 4 "register_operand" ""))
+ (use (match_operand:SI 2 "arith_operand" ""))
+ (use (match_operand:SI 3 "const_int_operand" ""))])]
+ "!TARGET_64BIT && reload_completed && !flag_peephole2"
+ [(set (match_dup 4) (match_dup 0))
+ (parallel [(set (mem:BLK (match_dup 4)) (const_int 0))
+ (clobber (match_dup 1))
+ (clobber (match_dup 4))
+ (use (match_dup 2))
+ (use (match_dup 3))
+ (const_int 0)])]
+ "")
+
+(define_peephole2
+ [(parallel [(set (mem:BLK (match_operand:SI 0 "register_operand" ""))
+ (const_int 0))
+ (clobber (match_operand:SI 1 "register_operand" ""))
+ (clobber (match_operand:SI 4 "register_operand" ""))
+ (use (match_operand:SI 2 "arith_operand" ""))
+ (use (match_operand:SI 3 "const_int_operand" ""))])]
+ "!TARGET_64BIT"
+ [(parallel [(set (mem:BLK (match_dup 4)) (const_int 0))
+ (clobber (match_dup 1))
+ (clobber (match_dup 4))
+ (use (match_dup 2))
+ (use (match_dup 3))
+ (const_int 0)])]
+ "
+{
+ if (dead_or_set_p (curr_insn, operands[0]))
+ operands[4] = operands[0];
+ else
+ emit_insn (gen_rtx_SET (VOIDmode, operands[4], operands[0]));
+}")
+
+(define_insn "clrstrsi_postreload"
+ [(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r"))
+ (const_int 0))
+ (clobber (match_operand:SI 1 "register_operand" "=r,r")) ;loop cnt/tmp
+ (clobber (match_dup 0))
+ (use (match_operand:SI 2 "arith_operand" "J,1")) ;byte count
+ (use (match_operand:SI 3 "const_int_operand" "n,n")) ;alignment
+ (const_int 0)]
+ "!TARGET_64BIT && reload_completed"
+ "* return output_block_clear (operands, !which_alternative);"
+ [(set_attr "type" "multi,multi")])
+
+(define_expand "clrstrdi"
+ [(parallel [(set (match_operand:BLK 0 "" "")
+ (const_int 0))
+ (clobber (match_dup 3))
+ (clobber (match_dup 4))
+ (use (match_operand:DI 1 "arith_operand" ""))
+ (use (match_operand:DI 2 "const_int_operand" ""))])]
+ "TARGET_64BIT && optimize > 0"
+ "
+{
+ int size, align;
+
+ /* Undetermined size, use the library routine. */
+ if (GET_CODE (operands[1]) != CONST_INT)
+ FAIL;
+
+ size = INTVAL (operands[1]);
+ align = INTVAL (operands[2]);
+ align = align > 8 ? 8 : align;
+
+ /* If size/alignment is large, then use the library routines. */
+ if (size / align > 16)
+ FAIL;
+
+ /* This does happen, but not often enough to worry much about. */
+ if (size / align < MOVE_RATIO)
+ FAIL;
+
+ /* Fall through means we're going to use our block clear pattern. */
+ operands[0]
+ = replace_equiv_address (operands[0],
+ copy_to_mode_reg (DImode, XEXP (operands[0], 0)));
+ operands[3] = gen_reg_rtx (DImode);
+ operands[4] = gen_reg_rtx (DImode);
+}")
+
+(define_insn "clrstrdi_prereload"
+ [(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r"))
+ (const_int 0))
+ (clobber (match_operand:DI 1 "register_operand" "=r,r")) ;loop cnt/tmp
+ (clobber (match_operand:DI 4 "register_operand" "=&r,&r")) ;item tmp1
+ (use (match_operand:DI 2 "arith_operand" "J,1")) ;byte count
+ (use (match_operand:DI 3 "const_int_operand" "n,n"))] ;alignment
+ "TARGET_64BIT"
+ "#"
+ [(set_attr "type" "multi,multi")])
+
+(define_split
+ [(parallel [(set (mem:BLK (match_operand:DI 0 "register_operand" ""))
+ (const_int 0))
+ (clobber (match_operand:DI 1 "register_operand" ""))
+ (clobber (match_operand:DI 4 "register_operand" ""))
+ (use (match_operand:DI 2 "arith_operand" ""))
+ (use (match_operand:DI 3 "const_int_operand" ""))])]
+ "TARGET_64BIT && reload_completed && !flag_peephole2"
+ [(set (match_dup 4) (match_dup 0))
+ (parallel [(set (mem:BLK (match_dup 4)) (const_int 0))
+ (clobber (match_dup 1))
+ (clobber (match_dup 4))
+ (use (match_dup 2))
+ (use (match_dup 3))
+ (const_int 0)])]
+ "")
+
+(define_peephole2
+ [(parallel [(set (mem:BLK (match_operand:DI 0 "register_operand" ""))
+ (const_int 0))
+ (clobber (match_operand:DI 1 "register_operand" ""))
+ (clobber (match_operand:DI 4 "register_operand" ""))
+ (use (match_operand:DI 2 "arith_operand" ""))
+ (use (match_operand:DI 3 "const_int_operand" ""))])]
+ "TARGET_64BIT"
+ [(parallel [(set (mem:BLK (match_dup 4)) (const_int 0))
+ (clobber (match_dup 1))
+ (clobber (match_dup 4))
+ (use (match_dup 2))
+ (use (match_dup 3))
+ (const_int 0)])]
+ "
+{
+ if (dead_or_set_p (curr_insn, operands[0]))
+ operands[4] = operands[0];
+ else
+ emit_insn (gen_rtx_SET (VOIDmode, operands[4], operands[0]));
+}")
+
+(define_insn "clrstrdi_postreload"
+ [(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r"))
+ (const_int 0))
+ (clobber (match_operand:DI 1 "register_operand" "=r,r")) ;loop cnt/tmp
+ (clobber (match_dup 0))
+ (use (match_operand:DI 2 "arith_operand" "J,1")) ;byte count
+ (use (match_operand:DI 3 "const_int_operand" "n,n")) ;alignment
+ (const_int 0)]
+ "TARGET_64BIT && reload_completed"
+ "* return output_block_clear (operands, !which_alternative);"
+ [(set_attr "type" "multi,multi")])
;; Floating point move insns