aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/sparc
diff options
context:
space:
mode:
authorJohn Carr <jfc@mit.edu>1998-07-13 16:53:32 +0000
committerJohn Carr <jfc@gcc.gnu.org>1998-07-13 16:53:32 +0000
commitc6b0465b283d2acba778754d165025fe74410d8e (patch)
tree42512669ff0ee8332e567f8e852ef50163e5c353 /gcc/config/sparc
parente69fd6e52f9a342c40cc065c93a86b0f292a3005 (diff)
downloadgcc-c6b0465b283d2acba778754d165025fe74410d8e.zip
gcc-c6b0465b283d2acba778754d165025fe74410d8e.tar.gz
gcc-c6b0465b283d2acba778754d165025fe74410d8e.tar.bz2
sparc.c, [...]: New trampoline code.
Mon Jul 13 16:15:10 1998 John Carr <jfc@mit.edu> * sparc.c, sparc.h, sparc.md: New trampoline code. Allow integer operand 1 to V8+ DImode shift instructions. Fix bugs in V8+ wide multiply patterns. In 32 bit mode, split DImode register moves and logical instructions. Write V9 branch prediction flag. Use V9 conditional move more often for scc. * print-rtl.c (print_rtx): Use HOST_PTR_PRINTF to print pointer. From-SVN: r21108
Diffstat (limited to 'gcc/config/sparc')
-rw-r--r--gcc/config/sparc/sparc.c191
-rw-r--r--gcc/config/sparc/sparc.h53
-rw-r--r--gcc/config/sparc/sparc.md137
3 files changed, 206 insertions, 175 deletions
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 62be726..d45295f 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -2480,7 +2480,7 @@ output_move_quad (operands)
int fix = offset - i * 4;
/* Back up to the appropriate place. */
- temp[1] = gen_rtx_CONST_INT (VOIDmode, -fix);
+ temp[1] = GEN_INT (-fix);
if (addreg0)
{
temp[0] = addreg0;
@@ -2495,7 +2495,7 @@ output_move_quad (operands)
wordpart[i]);
/* Don't modify the register that's the destination of the
move. */
- temp[0] = gen_rtx_CONST_INT (VOIDmode, -(offset - fix));
+ temp[0] = GEN_INT (-(offset - fix));
if (addreg0 && REGNO (addreg0) != REGNO (wordpart[i][0]))
{
temp[1] = addreg0;
@@ -2512,7 +2512,7 @@ output_move_quad (operands)
}
if (offset)
{
- temp[1] = gen_rtx_CONST_INT (VOIDmode, -offset);
+ temp[1] = GEN_INT (-offset);
/* Undo the adds we just did. */
if (addreg0)
{
@@ -2712,6 +2712,17 @@ output_scc_insn (operands, insn)
rtx label = 0, next = insn;
int need_label = 0;
+ /* This code used to be called with final_sequence nonzero (for fpcc
+ delay slots), but that is no longer allowed. */
+ if (final_sequence)
+ abort ();
+
+ /* On UltraSPARC a conditional moves blocks until 3 cycles after prior loads
+ complete. It might be beneficial here to use branches if any recent
+ instructions were loads. */
+ if (TARGET_V9 && REGNO (operands[1]) == SPARC_ICC_REG)
+ return "mov 0,%0\n\tmov%C2 %x1,1,%0";
+
/* Try doing a jump optimization which jump.c can't do for us
because we did not expose that setcc works by using branches.
@@ -2724,21 +2735,12 @@ output_scc_insn (operands, insn)
if (GET_CODE (next) == CODE_LABEL)
label = next;
next = NEXT_INSN (next);
- if (next == 0)
- break;
}
- while (GET_CODE (next) == NOTE || GET_CODE (next) == CODE_LABEL);
+ while (next && GET_CODE (next) == NOTE || GET_CODE (next) == CODE_LABEL);
- /* If we are in a sequence, and the following insn is a sequence also,
- then just following the current insn's next field will take us to the
- first insn of the next sequence, which is the wrong place. We don't
- want to optimize with a branch that has had its delay slot filled.
- Avoid this by verifying that NEXT_INSN (PREV_INSN (next)) == next
- which fails only if NEXT is such a branch. */
-
- if (next && GET_CODE (next) == JUMP_INSN && simplejump_p (next)
- && (! final_sequence || NEXT_INSN (PREV_INSN (next)) == next))
+ if (next && GET_CODE (next) == JUMP_INSN && simplejump_p (next))
label = JUMP_LABEL (next);
+
/* If not optimizing, jump label fields are not set. To be safe, always
check here to whether label is still zero. */
if (label == 0)
@@ -2752,35 +2754,8 @@ output_scc_insn (operands, insn)
/* operands[3] is an unused slot. */
operands[3] = label;
- /* If we are in a delay slot, assume it is the delay slot of an fpcc
- insn since our type isn't allowed anywhere else. */
-
- /* ??? Fpcc instructions no longer have delay slots, so this code is
- probably obsolete. */
-
- /* The fastest way to emit code for this is an annulled branch followed
- by two move insns. This will take two cycles if the branch is taken,
- and three cycles if the branch is not taken.
-
- However, if we are in the delay slot of another branch, this won't work,
- because we can't put a branch in the delay slot of another branch.
- The above sequence would effectively take 3 or 4 cycles respectively
- since a no op would have be inserted between the two branches.
- In this case, we want to emit a move, annulled branch, and then the
- second move. This sequence always takes 3 cycles, and hence is faster
- when we are in a branch delay slot. */
-
- if (final_sequence)
- {
- strcpy (string, "mov 0,%0\n\t");
- strcat (string, output_cbranch (operands[2], 3, 0, 1, 0));
- strcat (string, "\n\tmov 1,%0");
- }
- else
- {
- strcpy (string, output_cbranch (operands[2], 3, 0, 1, 0));
- strcat (string, "\n\tmov 1,%0\n\tmov 0,%0");
- }
+ strcpy (string, output_cbranch (operands[2], 3, 0, 1, 0));
+ strcat (string, "\n\tmov 1,%0\n\tmov 0,%0");
if (need_label)
strcat (string, "\n%l3:");
@@ -4380,13 +4355,16 @@ sparc_builtin_saveregs (arglist)
ANNUL is non-zero if we should generate an annulling branch.
- NOOP is non-zero if we have to follow this branch by a noop. */
+ NOOP is non-zero if we have to follow this branch by a noop.
+
+ INSN, if set, is the insn. */
char *
-output_cbranch (op, label, reversed, annul, noop)
+output_cbranch (op, label, reversed, annul, noop, insn)
rtx op;
int label;
int reversed, annul, noop;
+ rtx insn;
{
static char string[20];
enum rtx_code code = GET_CODE (op);
@@ -4506,8 +4484,6 @@ output_cbranch (op, label, reversed, annul, noop)
if (annul)
strcat (string, ",a");
- /* ??? If v9, optional prediction bit ",pt" or ",pf" goes here. */
-
if (! TARGET_V9)
{
labeloff = 3;
@@ -4515,6 +4491,11 @@ output_cbranch (op, label, reversed, annul, noop)
}
else
{
+ rtx note;
+
+ if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
+ strcat (string, INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
+
labeloff = 9;
if (mode == CCFPmode || mode == CCFPEmode)
{
@@ -5372,34 +5353,49 @@ void
sparc_initialize_trampoline (tramp, fnaddr, cxt)
rtx tramp, fnaddr, cxt;
{
- rtx high_cxt = expand_shift (RSHIFT_EXPR, SImode, cxt,
- size_int (10), 0, 1);
- rtx high_fn = expand_shift (RSHIFT_EXPR, SImode, fnaddr,
- size_int (10), 0, 1);
- rtx low_cxt = expand_and (cxt, GEN_INT (0x3ff), 0);
- rtx low_fn = expand_and (fnaddr, GEN_INT (0x3ff), 0);
- rtx g1_sethi = gen_rtx_HIGH (SImode, GEN_INT (0x03000000));
- rtx g2_sethi = gen_rtx_HIGH (SImode, GEN_INT (0x05000000));
- rtx g1_ori = gen_rtx_HIGH (SImode, GEN_INT (0x82106000));
- rtx g2_ori = gen_rtx_HIGH (SImode, GEN_INT (0x8410A000));
- rtx tem = gen_reg_rtx (SImode);
- emit_move_insn (tem, g1_sethi);
- emit_insn (gen_iorsi3 (high_fn, high_fn, tem));
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)), high_fn);
- emit_move_insn (tem, g1_ori);
- emit_insn (gen_iorsi3 (low_fn, low_fn, tem));
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), low_fn);
- emit_move_insn (tem, g2_sethi);
- emit_insn (gen_iorsi3 (high_cxt, high_cxt, tem));
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), high_cxt);
- emit_move_insn (tem, g2_ori);
- emit_insn (gen_iorsi3 (low_cxt, low_cxt, tem));
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)), low_cxt);
+ /* SPARC 32 bit trampoline:
+
+ sethi %hi(fn),%g1
+ sethi %hi(static),%g2
+ jmp %g1+%lo(fn)
+ or %g2,%lo(static),%g2
+
+ SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
+ JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
+ */
+
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)),
+ expand_binop (SImode, ior_optab,
+ expand_shift (RSHIFT_EXPR, SImode, fnaddr,
+ size_int (10), 0, 1),
+ GEN_INT (0x03000000),
+ NULL_RTX, 1, OPTAB_DIRECT));
+
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
+ expand_binop (SImode, ior_optab,
+ expand_shift (RSHIFT_EXPR, SImode, cxt,
+ size_int (10), 0, 1),
+ GEN_INT (0x05000000),
+ NULL_RTX, 1, OPTAB_DIRECT));
+
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
+ expand_binop (SImode, ior_optab,
+ expand_and (fnaddr, GEN_INT (0x3ff), NULL_RTX),
+ GEN_INT (0x81c06000),
+ NULL_RTX, 1, OPTAB_DIRECT));
+
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
+ expand_binop (SImode, ior_optab,
+ expand_and (cxt, GEN_INT (0x3ff), NULL_RTX),
+ GEN_INT (0x8410a000),
+ NULL_RTX, 1, OPTAB_DIRECT));
+
emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp))));
- emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
- plus_constant (tramp, 8)))));
- emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
- plus_constant (tramp, 16)))));
+ /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
+ aligned on a 16 byte boundary so one flush clears it all. */
+ if (sparc_cpu != PROCESSOR_ULTRASPARC)
+ emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
+ plus_constant (tramp, 8)))));
}
/* The 64 bit version is simpler because it makes more sense to load the
@@ -5410,17 +5406,27 @@ void
sparc64_initialize_trampoline (tramp, fnaddr, cxt)
rtx tramp, fnaddr, cxt;
{
- emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), cxt);
- emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 32)), fnaddr);
+ /*
+ rd %pc,%g1
+ ldx [%g1+24],%g5
+ jmp %g5
+ ldx [%g1+16],%g5
+ +16 bytes data
+ */
+
+ emit_move_insn (gen_rtx_MEM (SImode, tramp),
+ GEN_INT (0x83414000));
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
+ GEN_INT (0xca586018));
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
+ GEN_INT (0x81c04000));
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
+ GEN_INT (0xca586010));
+ emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt);
+ emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 20)), fnaddr);
emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, tramp))));
- emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode,
- plus_constant (tramp, 8)))));
- emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode,
- plus_constant (tramp, 16)))));
- emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode,
- plus_constant (tramp, 24)))));
- emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode,
- plus_constant (tramp, 32)))));
+ if (sparc_cpu != PROCESSOR_ULTRASPARC)
+ emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
}
/* Subroutines to support a flat (single) register window calling
@@ -6438,10 +6444,17 @@ sparc_v8plus_shift (operands, insn, opcode)
if (GET_CODE (operands[3]) == SCRATCH)
operands[3] = operands[0];
- output_asm_insn ("sllx %H1,32,%3", operands);
- if (sparc_check_64 (operands[1], insn) <= 0)
- output_asm_insn ("srl %L1,0,%L1", operands);
- output_asm_insn ("or %L1,%3,%3", operands);
+ if (GET_CODE (operands[1]) == CONST_INT)
+ {
+ output_asm_insn ("mov %1,%3", operands);
+ }
+ else
+ {
+ output_asm_insn ("sllx %H1,32,%3", operands);
+ if (sparc_check_64 (operands[1], insn) <= 0)
+ output_asm_insn ("srl %L1,0,%L1", operands);
+ output_asm_insn ("or %L1,%3,%3", operands);
+ }
strcpy(asm_code, opcode);
if (which_alternative != 2)
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index 1e0c644..1f21659 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -2099,54 +2099,11 @@ extern union tree_node *current_function_decl;
#define EPILOGUE_USES(REGNO) \
(!TARGET_FLAT && REGNO == 31)
-/* Output assembler code for a block containing the constant parts
- of a trampoline, leaving space for the variable parts. */
-
-/* On 32 bit sparcs, the trampoline contains five instructions:
- sethi #TOP_OF_FUNCTION,%g1
- or #BOTTOM_OF_FUNCTION,%g1,%g1
- sethi #TOP_OF_STATIC,%g2
- jmp g1
- or #BOTTOM_OF_STATIC,%g2,%g2
-
- On 64 bit sparcs, the trampoline contains 4 insns and two pseudo-immediate
- constants (plus some padding):
- rd %pc,%g1
- ldx[%g1+20],%g5
- ldx[%g1+28],%g1
- jmp %g1
- nop
- nop
- .xword context
- .xword function */
-/* ??? Stack is execute-protected in v9. */
-
-#define TRAMPOLINE_TEMPLATE(FILE) \
-do { \
- if (TARGET_ARCH64) \
- { \
- fprintf (FILE, "\trd %%pc,%%g1\n"); \
- fprintf (FILE, "\tldx [%%g1+24],%%g5\n"); \
- fprintf (FILE, "\tldx [%%g1+32],%%g1\n"); \
- fprintf (FILE, "\tjmp %%g1\n"); \
- fprintf (FILE, "\tnop\n"); \
- fprintf (FILE, "\tnop\n"); \
- /* -mmedlow shouldn't generate .xwords, so don't use them at all */ \
- fprintf (FILE, "\t.word 0,0,0,0\n"); \
- } \
- else \
- { \
- ASM_OUTPUT_INT (FILE, const0_rtx); \
- ASM_OUTPUT_INT (FILE, const0_rtx); \
- ASM_OUTPUT_INT (FILE, const0_rtx); \
- ASM_OUTPUT_INT (FILE, GEN_INT (0x81C04000)); \
- ASM_OUTPUT_INT (FILE, const0_rtx); \
- } \
-} while (0)
-
/* Length in units of the trampoline for entering a nested function. */
-#define TRAMPOLINE_SIZE (TARGET_ARCH64 ? 40 : 20)
+#define TRAMPOLINE_SIZE (TARGET_ARCH64 ? 32 : 16)
+
+#define TRAMPOLINE_ALIGNMENT 128 /* 16 bytes */
/* Emit RTL insns to initialize the variable parts of a trampoline.
FNADDR is an RTX for the address of the function's pure code.
@@ -2155,12 +2112,10 @@ do { \
void sparc_initialize_trampoline ();
void sparc64_initialize_trampoline ();
#define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) \
- do { \
if (TARGET_ARCH64) \
sparc64_initialize_trampoline (TRAMP, FNADDR, CXT); \
else \
- sparc_initialize_trampoline (TRAMP, FNADDR, CXT); \
- } while (0)
+ sparc_initialize_trampoline (TRAMP, FNADDR, CXT)
/* Generate necessary RTL for __builtin_saveregs().
ARGLIST is the argument list; see expr.c. */
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index ca8de61..156c086 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -1308,6 +1308,22 @@
[(set_attr "type" "multi")
(set_attr "length" "3")])
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (match_operator:SI 2 "noov_compare_op"
+ [(match_operand 1 "icc_or_fcc_reg_operand" "")
+ (const_int 0)]))]
+ ;; 32 bit LTU/GEU are better implemented using addx/subx
+ "TARGET_V9 && REGNO (operands[1]) == SPARC_ICC_REG
+ && (GET_MODE (operands[1]) == CCXmode
+ || (GET_CODE (operands[2]) != LTU && GET_CODE (operands[2]) != GEU))"
+ [(set (match_dup 0) (const_int 0))
+ (set (match_dup 0)
+ (if_then_else:SI (match_op_dup:SI 2 [(match_dup 1) (const_int 0)])
+ (const_int 1)
+ (match_dup 0)))]
+ "")
+
(define_insn "*scc_di"
[(set (match_operand:DI 0 "register_operand" "=r")
(match_operator:DI 2 "noov_compare_op"
@@ -1521,7 +1537,7 @@
{
return output_cbranch (operands[0], 1, 0,
final_sequence && INSN_ANNULLED_BRANCH_P (insn),
- ! final_sequence);
+ ! final_sequence, insn);
}"
[(set_attr "type" "branch")])
@@ -1536,7 +1552,7 @@
{
return output_cbranch (operands[0], 1, 1,
final_sequence && INSN_ANNULLED_BRANCH_P (insn),
- ! final_sequence);
+ ! final_sequence, insn);
}"
[(set_attr "type" "branch")])
@@ -1552,7 +1568,7 @@
{
return output_cbranch (operands[1], 2, 0,
final_sequence && INSN_ANNULLED_BRANCH_P (insn),
- ! final_sequence);
+ ! final_sequence, insn);
}"
[(set_attr "type" "branch")])
@@ -1568,7 +1584,7 @@
{
return output_cbranch (operands[1], 2, 1,
final_sequence && INSN_ANNULLED_BRANCH_P (insn),
- ! final_sequence);
+ ! final_sequence, insn);
}"
[(set_attr "type" "branch")])
@@ -1584,7 +1600,7 @@
{
return output_cbranch (operands[1], 2, 0,
final_sequence && INSN_ANNULLED_BRANCH_P (insn),
- ! final_sequence);
+ ! final_sequence, insn);
}"
[(set_attr "type" "branch")])
@@ -1600,7 +1616,7 @@
{
return output_cbranch (operands[1], 2, 1,
final_sequence && INSN_ANNULLED_BRANCH_P (insn),
- ! final_sequence);
+ ! final_sequence, insn);
}"
[(set_attr "type" "branch")])
@@ -2240,6 +2256,20 @@
operands[2] = copy_rtx (operands[0]);
PUT_MODE (operands[2], SImode);")
+;; Split register to register moves.
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (match_operand:DI 1 "arith_double_operand" "rIN"))]
+ "! TARGET_ARCH64 && GET_CODE (operands[1]) == REG && REGNO (operands[0]) < 32
+ && (GET_CODE (operands[1]) != REG || REGNO (operands[1]) < 32)
+ && ! reg_overlap_mentioned_p (operands[0], operands[1])"
+ [(set (match_dup 2) (match_dup 4))
+ (set (match_dup 3) (match_dup 5))]
+ "operands[2] = gen_highpart (SImode, operands[0]);
+ operands[3] = gen_lowpart (SImode, operands[0]);
+ operands[4] = gen_highpart (SImode, operands[1]);
+ operands[5] = gen_lowpart (SImode, operands[1]);")
+
(define_insn "*movdi_sp32"
[(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,U,Q,r,r,?f,?f,?Q")
(match_operand:DI 1 "general_operand" "r,U,T,r,Q,i,f,Q,f"))]
@@ -3217,7 +3247,7 @@
if (which_alternative == 1)
return \"st %1,%0\";
if (sparc_check_64 (operands[1], insn) > 0)
- return \"nop\";
+ return final_sequence ? \"nop\" : \"\";
return \"srl %1,0,%0\";
"
[(set_attr "type" "shift,store")])
@@ -3639,7 +3669,7 @@ return \"srl %1,0,%0\";
gen_rtx_PLUS (DImode, operands[1],
operands[2])),
gen_rtx_CLOBBER (VOIDmode,
- gen_rtx_REG (SImode, SPARC_ICC_REG)))));
+ gen_rtx_REG (CCmode, SPARC_ICC_REG)))));
DONE;
}
}")
@@ -3648,7 +3678,7 @@ return \"srl %1,0,%0\";
[(set (match_operand:DI 0 "register_operand" "=r")
(plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
(match_operand:DI 2 "arith_double_operand" "rHI")))
- (clobber (reg:SI 100))]
+ (clobber (reg:CC 100))]
"! TARGET_ARCH64"
"*
{
@@ -3681,7 +3711,7 @@ return \"srl %1,0,%0\";
[(set (match_operand:DI 0 "register_operand" "=r")
(plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
(match_operand:DI 2 "arith_double_operand" "rHI")))
- (clobber (reg:SI 100))]
+ (clobber (reg:CC 100))]
"! TARGET_ARCH64 && reload_completed"
[(parallel [(set (reg:CC_NOOV 100)
(compare:CC_NOOV (plus:SI (match_dup 4)
@@ -3698,16 +3728,13 @@ return \"srl %1,0,%0\";
operands[5] = gen_lowpart (SImode, operands[2]);
operands[6] = gen_highpart (SImode, operands[0]);
operands[7] = gen_highpart (SImode, operands[1]);
- if (GET_CODE (operands[2]) == CONST_INT)
- operands[8] = INTVAL (operands[2]) < 0 ? constm1_rtx : const0_rtx;
- else
- operands[8] = gen_highpart (SImode, operands[2]);")
+ operands[8] = gen_highpart (SImode, operands[2]);")
(define_split
[(set (match_operand:DI 0 "register_operand" "=r")
(minus:DI (match_operand:DI 1 "arith_double_operand" "r")
(match_operand:DI 2 "arith_double_operand" "rHI")))
- (clobber (reg:SI 100))]
+ (clobber (reg:CC 100))]
"! TARGET_ARCH64 && reload_completed"
[(parallel [(set (reg:CC_NOOV 100)
(compare:CC_NOOV (minus:SI (match_dup 4)
@@ -3749,7 +3776,7 @@ return \"srl %1,0,%0\";
[(set (match_operand:DI 0 "register_operand" "=r")
(plus:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
(match_operand:DI 2 "register_operand" "r")))
- (clobber (reg:SI 100))]
+ (clobber (reg:CC 100))]
"! TARGET_ARCH64"
"addcc %L2,%1,%L0\;addx %H2,0,%H0"
[(set_attr "type" "multi")])
@@ -3823,7 +3850,7 @@ return \"srl %1,0,%0\";
gen_rtx_MINUS (DImode, operands[1],
operands[2])),
gen_rtx_CLOBBER (VOIDmode,
- gen_rtx_REG (SImode, SPARC_ICC_REG)))));
+ gen_rtx_REG (CCmode, SPARC_ICC_REG)))));
DONE;
}
}")
@@ -3832,7 +3859,7 @@ return \"srl %1,0,%0\";
[(set (match_operand:DI 0 "register_operand" "=r")
(minus:DI (match_operand:DI 1 "register_operand" "r")
(match_operand:DI 2 "arith_double_operand" "rHI")))
- (clobber (reg:SI 100))]
+ (clobber (reg:CC 100))]
"! TARGET_ARCH64"
"*
{
@@ -3862,7 +3889,7 @@ return \"srl %1,0,%0\";
[(set (match_operand:DI 0 "register_operand" "=r")
(minus:DI (match_operand:DI 1 "register_operand" "r")
(zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))
- (clobber (reg:SI 100))]
+ (clobber (reg:CC 100))]
"! TARGET_ARCH64"
"subcc %L1,%2,%L0\;addx %H1,0,%H0"
[(set_attr "type" "multi")])
@@ -4113,11 +4140,27 @@ return \"srl %1,0,%0\";
smul %1,%2,%4\;srlx %4,%3,%0"
[(set_attr "length" "2")])
+;; The combiner changes TRUNCATE in the previous pattern to SUBREG.
+(define_insn ""
+ [(set (match_operand:SI 0 "register_operand" "=h,r")
+ (subreg:SI
+ (lshiftrt:DI
+ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+ (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r")))
+ (match_operand:SI 3 "const_int_operand" "i,i"))
+ 1))
+ (clobber (match_scratch:SI 4 "=X,&h"))]
+ "TARGET_V8PLUS"
+ "@
+ smul %1,%2,%0\;srlx %0,%3,%0
+ smul %1,%2,%4\;srlx %4,%3,%0"
+ [(set_attr "length" "2")])
+
(define_insn "const_smulsi3_highpart_v8plus"
[(set (match_operand:SI 0 "register_operand" "=h,r")
(truncate:SI
(lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
- (match_operand:SI 2 "register_operand" "r,r"))
+ (match_operand 2 "small_int" "i,i"))
(match_operand:SI 3 "const_int_operand" "i,i"))))
(clobber (match_scratch:SI 4 "=X,&h"))]
"TARGET_V8PLUS"
@@ -4510,6 +4553,23 @@ return \"srl %1,0,%0\";
operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff);
}")
+;; Split DImode logical operations requiring two instructions.
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (match_operator:DI 1 "cc_arithop" ; AND, IOR, XOR
+ [(match_operand:DI 2 "register_operand" "")
+ (match_operand:DI 3 "arith_double_operand" "")]))]
+ "! TARGET_ARCH64 && reload_completed
+ && GET_CODE (operands[0]) == REG && REGNO (operands[0]) < 32"
+ [(set (match_dup 4) (match_op_dup:SI 1 [(match_dup 6) (match_dup 8)]))
+ (set (match_dup 5) (match_op_dup:SI 1 [(match_dup 7) (match_dup 9)]))]
+ "operands[4] = gen_highpart (SImode, operands[0]);
+ operands[5] = gen_lowpart (SImode, operands[0]);
+ operands[6] = gen_highpart (SImode, operands[2]);
+ operands[7] = gen_lowpart (SImode, operands[2]);
+ operands[8] = gen_highpart (SImode, operands[3]);
+ operands[9] = gen_lowpart (SImode, operands[3]);")
+
(define_insn "*and_not_di_sp32"
[(set (match_operand:DI 0 "register_operand" "=r,b")
(and:DI (not:DI (match_operand:DI 1 "register_operand" "r,b"))
@@ -4899,7 +4959,7 @@ return \"srl %1,0,%0\";
gen_rtx_SET (VOIDmode, operand0,
gen_rtx_NEG (DImode, operand1)),
gen_rtx_CLOBBER (VOIDmode,
- gen_rtx_REG (SImode, SPARC_ICC_REG)))));
+ gen_rtx_REG (CCmode, SPARC_ICC_REG)))));
DONE;
}
}")
@@ -4907,7 +4967,7 @@ return \"srl %1,0,%0\";
(define_insn "*negdi2_sp32"
[(set (match_operand:DI 0 "register_operand" "=r")
(neg:DI (match_operand:DI 1 "register_operand" "r")))
- (clobber (reg:SI 100))]
+ (clobber (reg:CC 100))]
"! TARGET_ARCH64"
"*
{
@@ -5331,13 +5391,28 @@ return \"srl %1,0,%0\";
(define_insn "ashldi3_v8plus"
[(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
- (ashift:DI (match_operand:DI 1 "register_operand" "r,0,r")
+ (ashift:DI (match_operand:DI 1 "arith_operand" "rI,0,rI")
(match_operand:SI 2 "arith_operand" "rI,rI,rI")))
(clobber (match_scratch:SI 3 "=X,X,&h"))]
"TARGET_V8PLUS"
"*return sparc_v8plus_shift (operands, insn, \"sllx\");"
[(set_attr "length" "5,5,6")])
+;; Optimize (1LL<<x)-1
+(define_insn ""
+ [(set (match_operand:DI 0 "register_operand" "=h")
+ (plus:DI (ashift:DI (const_int 1)
+ (match_operand:SI 2 "arith_operand" "rI"))
+ (const_int -1)))]
+ "TARGET_V8PLUS"
+ "*
+{
+ if (GET_CODE (operands[2]) == REG && REGNO (operands[2]) == REGNO (operands[0]))
+ return \"mov 1,%L0\;sllx %L0,%2,%L0\;sub %L0,1,%L0\;srlx %L0,32,%H0\";
+ return \"mov 1,%H0\;sllx %H0,%2,%L0\;sub %L0,1,%L0\;srlx %L0,32,%H0\";
+}"
+ [(set_attr "length" "4")])
+
(define_insn "*cmp_cc_ashift_1"
[(set (reg:CC_NOOV 100)
(compare:CC_NOOV (ashift:SI (match_operand:SI 0 "register_operand" "r")
@@ -5402,7 +5477,7 @@ if (! TARGET_ARCH64)
(define_insn "ashrdi3_v8plus"
[(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
- (ashiftrt:DI (match_operand:DI 1 "register_operand" "r,0,r")
+ (ashiftrt:DI (match_operand:DI 1 "arith_operand" "rI,0,rI")
(match_operand:SI 2 "arith_operand" "rI,rI,rI")))
(clobber (match_scratch:SI 3 "=X,X,&h"))]
"TARGET_V8PLUS"
@@ -5454,7 +5529,7 @@ if (! TARGET_ARCH64)
(define_insn "lshrdi3_v8plus"
[(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
- (lshiftrt:DI (match_operand:DI 1 "register_operand" "r,0,r")
+ (lshiftrt:DI (match_operand:DI 1 "arith_operand" "rI,0,rI")
(match_operand:SI 2 "arith_operand" "rI,rI,rI")))
(clobber (match_scratch:SI 3 "=X,X,&h"))]
"TARGET_V8PLUS"
@@ -6605,18 +6680,6 @@ if (! TARGET_ARCH64)
"TARGET_ARCH64 && short_branch (INSN_UID (insn), INSN_UID (operands[2])) && in_same_eh_region (insn, operands[2]) && in_same_eh_region (insn, ins1)"
"call %a0,%1\;add %%o7,(%l2-.-4),%%o7")
-;; Other miscellaneous peepholes.
-
-;; (reg:SI 100) is created by the {add,neg,sub}di patterns.
-(define_peephole
- [(parallel [(set (match_operand:SI 0 "register_operand" "=r")
- (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
- (reg:SI 100)))
- (clobber (reg:CC 100))])
- (set (reg:CC 100) (compare (match_dup 0) (const_int 0)))]
- ""
- "subxcc %r1,0,%0")
-
;; After a nonlocal goto, we need to restore the PIC register, but only
;; if we need it. So do nothing much here, but we'll check for this in
;; finalize_pic.