diff options
author | John Carr <jfc@mit.edu> | 1998-07-13 16:53:32 +0000 |
---|---|---|
committer | John Carr <jfc@gcc.gnu.org> | 1998-07-13 16:53:32 +0000 |
commit | c6b0465b283d2acba778754d165025fe74410d8e (patch) | |
tree | 42512669ff0ee8332e567f8e852ef50163e5c353 /gcc/config/sparc | |
parent | e69fd6e52f9a342c40cc065c93a86b0f292a3005 (diff) | |
download | gcc-c6b0465b283d2acba778754d165025fe74410d8e.zip gcc-c6b0465b283d2acba778754d165025fe74410d8e.tar.gz gcc-c6b0465b283d2acba778754d165025fe74410d8e.tar.bz2 |
sparc.c, [...]: New trampoline code.
Mon Jul 13 16:15:10 1998 John Carr <jfc@mit.edu>
* sparc.c, sparc.h, sparc.md: New trampoline code.
Allow integer operand 1 to V8+ DImode shift instructions.
Fix bugs in V8+ wide multiply patterns.
In 32 bit mode, split DImode register moves and logical instructions.
Write V9 branch prediction flag.
Use V9 conditional move more often for scc.
* print-rtl.c (print_rtx): Use HOST_PTR_PRINTF to print pointer.
From-SVN: r21108
Diffstat (limited to 'gcc/config/sparc')
-rw-r--r-- | gcc/config/sparc/sparc.c | 191 | ||||
-rw-r--r-- | gcc/config/sparc/sparc.h | 53 | ||||
-rw-r--r-- | gcc/config/sparc/sparc.md | 137 |
3 files changed, 206 insertions, 175 deletions
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index 62be726..d45295f 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -2480,7 +2480,7 @@ output_move_quad (operands) int fix = offset - i * 4; /* Back up to the appropriate place. */ - temp[1] = gen_rtx_CONST_INT (VOIDmode, -fix); + temp[1] = GEN_INT (-fix); if (addreg0) { temp[0] = addreg0; @@ -2495,7 +2495,7 @@ output_move_quad (operands) wordpart[i]); /* Don't modify the register that's the destination of the move. */ - temp[0] = gen_rtx_CONST_INT (VOIDmode, -(offset - fix)); + temp[0] = GEN_INT (-(offset - fix)); if (addreg0 && REGNO (addreg0) != REGNO (wordpart[i][0])) { temp[1] = addreg0; @@ -2512,7 +2512,7 @@ output_move_quad (operands) } if (offset) { - temp[1] = gen_rtx_CONST_INT (VOIDmode, -offset); + temp[1] = GEN_INT (-offset); /* Undo the adds we just did. */ if (addreg0) { @@ -2712,6 +2712,17 @@ output_scc_insn (operands, insn) rtx label = 0, next = insn; int need_label = 0; + /* This code used to be called with final_sequence nonzero (for fpcc + delay slots), but that is no longer allowed. */ + if (final_sequence) + abort (); + + /* On UltraSPARC a conditional moves blocks until 3 cycles after prior loads + complete. It might be beneficial here to use branches if any recent + instructions were loads. */ + if (TARGET_V9 && REGNO (operands[1]) == SPARC_ICC_REG) + return "mov 0,%0\n\tmov%C2 %x1,1,%0"; + /* Try doing a jump optimization which jump.c can't do for us because we did not expose that setcc works by using branches. @@ -2724,21 +2735,12 @@ output_scc_insn (operands, insn) if (GET_CODE (next) == CODE_LABEL) label = next; next = NEXT_INSN (next); - if (next == 0) - break; } - while (GET_CODE (next) == NOTE || GET_CODE (next) == CODE_LABEL); + while (next && GET_CODE (next) == NOTE || GET_CODE (next) == CODE_LABEL); - /* If we are in a sequence, and the following insn is a sequence also, - then just following the current insn's next field will take us to the - first insn of the next sequence, which is the wrong place. We don't - want to optimize with a branch that has had its delay slot filled. - Avoid this by verifying that NEXT_INSN (PREV_INSN (next)) == next - which fails only if NEXT is such a branch. */ - - if (next && GET_CODE (next) == JUMP_INSN && simplejump_p (next) - && (! final_sequence || NEXT_INSN (PREV_INSN (next)) == next)) + if (next && GET_CODE (next) == JUMP_INSN && simplejump_p (next)) label = JUMP_LABEL (next); + /* If not optimizing, jump label fields are not set. To be safe, always check here to whether label is still zero. */ if (label == 0) @@ -2752,35 +2754,8 @@ output_scc_insn (operands, insn) /* operands[3] is an unused slot. */ operands[3] = label; - /* If we are in a delay slot, assume it is the delay slot of an fpcc - insn since our type isn't allowed anywhere else. */ - - /* ??? Fpcc instructions no longer have delay slots, so this code is - probably obsolete. */ - - /* The fastest way to emit code for this is an annulled branch followed - by two move insns. This will take two cycles if the branch is taken, - and three cycles if the branch is not taken. - - However, if we are in the delay slot of another branch, this won't work, - because we can't put a branch in the delay slot of another branch. - The above sequence would effectively take 3 or 4 cycles respectively - since a no op would have be inserted between the two branches. - In this case, we want to emit a move, annulled branch, and then the - second move. This sequence always takes 3 cycles, and hence is faster - when we are in a branch delay slot. */ - - if (final_sequence) - { - strcpy (string, "mov 0,%0\n\t"); - strcat (string, output_cbranch (operands[2], 3, 0, 1, 0)); - strcat (string, "\n\tmov 1,%0"); - } - else - { - strcpy (string, output_cbranch (operands[2], 3, 0, 1, 0)); - strcat (string, "\n\tmov 1,%0\n\tmov 0,%0"); - } + strcpy (string, output_cbranch (operands[2], 3, 0, 1, 0)); + strcat (string, "\n\tmov 1,%0\n\tmov 0,%0"); if (need_label) strcat (string, "\n%l3:"); @@ -4380,13 +4355,16 @@ sparc_builtin_saveregs (arglist) ANNUL is non-zero if we should generate an annulling branch. - NOOP is non-zero if we have to follow this branch by a noop. */ + NOOP is non-zero if we have to follow this branch by a noop. + + INSN, if set, is the insn. */ char * -output_cbranch (op, label, reversed, annul, noop) +output_cbranch (op, label, reversed, annul, noop, insn) rtx op; int label; int reversed, annul, noop; + rtx insn; { static char string[20]; enum rtx_code code = GET_CODE (op); @@ -4506,8 +4484,6 @@ output_cbranch (op, label, reversed, annul, noop) if (annul) strcat (string, ",a"); - /* ??? If v9, optional prediction bit ",pt" or ",pf" goes here. */ - if (! TARGET_V9) { labeloff = 3; @@ -4515,6 +4491,11 @@ output_cbranch (op, label, reversed, annul, noop) } else { + rtx note; + + if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX))) + strcat (string, INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn"); + labeloff = 9; if (mode == CCFPmode || mode == CCFPEmode) { @@ -5372,34 +5353,49 @@ void sparc_initialize_trampoline (tramp, fnaddr, cxt) rtx tramp, fnaddr, cxt; { - rtx high_cxt = expand_shift (RSHIFT_EXPR, SImode, cxt, - size_int (10), 0, 1); - rtx high_fn = expand_shift (RSHIFT_EXPR, SImode, fnaddr, - size_int (10), 0, 1); - rtx low_cxt = expand_and (cxt, GEN_INT (0x3ff), 0); - rtx low_fn = expand_and (fnaddr, GEN_INT (0x3ff), 0); - rtx g1_sethi = gen_rtx_HIGH (SImode, GEN_INT (0x03000000)); - rtx g2_sethi = gen_rtx_HIGH (SImode, GEN_INT (0x05000000)); - rtx g1_ori = gen_rtx_HIGH (SImode, GEN_INT (0x82106000)); - rtx g2_ori = gen_rtx_HIGH (SImode, GEN_INT (0x8410A000)); - rtx tem = gen_reg_rtx (SImode); - emit_move_insn (tem, g1_sethi); - emit_insn (gen_iorsi3 (high_fn, high_fn, tem)); - emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)), high_fn); - emit_move_insn (tem, g1_ori); - emit_insn (gen_iorsi3 (low_fn, low_fn, tem)); - emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), low_fn); - emit_move_insn (tem, g2_sethi); - emit_insn (gen_iorsi3 (high_cxt, high_cxt, tem)); - emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), high_cxt); - emit_move_insn (tem, g2_ori); - emit_insn (gen_iorsi3 (low_cxt, low_cxt, tem)); - emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)), low_cxt); + /* SPARC 32 bit trampoline: + + sethi %hi(fn),%g1 + sethi %hi(static),%g2 + jmp %g1+%lo(fn) + or %g2,%lo(static),%g2 + + SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii + JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii + */ + + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)), + expand_binop (SImode, ior_optab, + expand_shift (RSHIFT_EXPR, SImode, fnaddr, + size_int (10), 0, 1), + GEN_INT (0x03000000), + NULL_RTX, 1, OPTAB_DIRECT)); + + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), + expand_binop (SImode, ior_optab, + expand_shift (RSHIFT_EXPR, SImode, cxt, + size_int (10), 0, 1), + GEN_INT (0x05000000), + NULL_RTX, 1, OPTAB_DIRECT)); + + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), + expand_binop (SImode, ior_optab, + expand_and (fnaddr, GEN_INT (0x3ff), NULL_RTX), + GEN_INT (0x81c06000), + NULL_RTX, 1, OPTAB_DIRECT)); + + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)), + expand_binop (SImode, ior_optab, + expand_and (cxt, GEN_INT (0x3ff), NULL_RTX), + GEN_INT (0x8410a000), + NULL_RTX, 1, OPTAB_DIRECT)); + emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp)))); - emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, - plus_constant (tramp, 8))))); - emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, - plus_constant (tramp, 16))))); + /* On UltraSPARC a flush flushes an entire cache line. The trampoline is + aligned on a 16 byte boundary so one flush clears it all. */ + if (sparc_cpu != PROCESSOR_ULTRASPARC) + emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, + plus_constant (tramp, 8))))); } /* The 64 bit version is simpler because it makes more sense to load the @@ -5410,17 +5406,27 @@ void sparc64_initialize_trampoline (tramp, fnaddr, cxt) rtx tramp, fnaddr, cxt; { - emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), cxt); - emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 32)), fnaddr); + /* + rd %pc,%g1 + ldx [%g1+24],%g5 + jmp %g5 + ldx [%g1+16],%g5 + +16 bytes data + */ + + emit_move_insn (gen_rtx_MEM (SImode, tramp), + GEN_INT (0x83414000)); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), + GEN_INT (0xca586018)); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), + GEN_INT (0x81c04000)); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)), + GEN_INT (0xca586010)); + emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt); + emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 20)), fnaddr); emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, tramp)))); - emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, - plus_constant (tramp, 8))))); - emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, - plus_constant (tramp, 16))))); - emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, - plus_constant (tramp, 24))))); - emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, - plus_constant (tramp, 32))))); + if (sparc_cpu != PROCESSOR_ULTRASPARC) + emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8))))); } /* Subroutines to support a flat (single) register window calling @@ -6438,10 +6444,17 @@ sparc_v8plus_shift (operands, insn, opcode) if (GET_CODE (operands[3]) == SCRATCH) operands[3] = operands[0]; - output_asm_insn ("sllx %H1,32,%3", operands); - if (sparc_check_64 (operands[1], insn) <= 0) - output_asm_insn ("srl %L1,0,%L1", operands); - output_asm_insn ("or %L1,%3,%3", operands); + if (GET_CODE (operands[1]) == CONST_INT) + { + output_asm_insn ("mov %1,%3", operands); + } + else + { + output_asm_insn ("sllx %H1,32,%3", operands); + if (sparc_check_64 (operands[1], insn) <= 0) + output_asm_insn ("srl %L1,0,%L1", operands); + output_asm_insn ("or %L1,%3,%3", operands); + } strcpy(asm_code, opcode); if (which_alternative != 2) diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h index 1e0c644..1f21659 100644 --- a/gcc/config/sparc/sparc.h +++ b/gcc/config/sparc/sparc.h @@ -2099,54 +2099,11 @@ extern union tree_node *current_function_decl; #define EPILOGUE_USES(REGNO) \ (!TARGET_FLAT && REGNO == 31) -/* Output assembler code for a block containing the constant parts - of a trampoline, leaving space for the variable parts. */ - -/* On 32 bit sparcs, the trampoline contains five instructions: - sethi #TOP_OF_FUNCTION,%g1 - or #BOTTOM_OF_FUNCTION,%g1,%g1 - sethi #TOP_OF_STATIC,%g2 - jmp g1 - or #BOTTOM_OF_STATIC,%g2,%g2 - - On 64 bit sparcs, the trampoline contains 4 insns and two pseudo-immediate - constants (plus some padding): - rd %pc,%g1 - ldx[%g1+20],%g5 - ldx[%g1+28],%g1 - jmp %g1 - nop - nop - .xword context - .xword function */ -/* ??? Stack is execute-protected in v9. */ - -#define TRAMPOLINE_TEMPLATE(FILE) \ -do { \ - if (TARGET_ARCH64) \ - { \ - fprintf (FILE, "\trd %%pc,%%g1\n"); \ - fprintf (FILE, "\tldx [%%g1+24],%%g5\n"); \ - fprintf (FILE, "\tldx [%%g1+32],%%g1\n"); \ - fprintf (FILE, "\tjmp %%g1\n"); \ - fprintf (FILE, "\tnop\n"); \ - fprintf (FILE, "\tnop\n"); \ - /* -mmedlow shouldn't generate .xwords, so don't use them at all */ \ - fprintf (FILE, "\t.word 0,0,0,0\n"); \ - } \ - else \ - { \ - ASM_OUTPUT_INT (FILE, const0_rtx); \ - ASM_OUTPUT_INT (FILE, const0_rtx); \ - ASM_OUTPUT_INT (FILE, const0_rtx); \ - ASM_OUTPUT_INT (FILE, GEN_INT (0x81C04000)); \ - ASM_OUTPUT_INT (FILE, const0_rtx); \ - } \ -} while (0) - /* Length in units of the trampoline for entering a nested function. */ -#define TRAMPOLINE_SIZE (TARGET_ARCH64 ? 40 : 20) +#define TRAMPOLINE_SIZE (TARGET_ARCH64 ? 32 : 16) + +#define TRAMPOLINE_ALIGNMENT 128 /* 16 bytes */ /* Emit RTL insns to initialize the variable parts of a trampoline. FNADDR is an RTX for the address of the function's pure code. @@ -2155,12 +2112,10 @@ do { \ void sparc_initialize_trampoline (); void sparc64_initialize_trampoline (); #define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) \ - do { \ if (TARGET_ARCH64) \ sparc64_initialize_trampoline (TRAMP, FNADDR, CXT); \ else \ - sparc_initialize_trampoline (TRAMP, FNADDR, CXT); \ - } while (0) + sparc_initialize_trampoline (TRAMP, FNADDR, CXT) /* Generate necessary RTL for __builtin_saveregs(). ARGLIST is the argument list; see expr.c. */ diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md index ca8de61..156c086 100644 --- a/gcc/config/sparc/sparc.md +++ b/gcc/config/sparc/sparc.md @@ -1308,6 +1308,22 @@ [(set_attr "type" "multi") (set_attr "length" "3")]) +(define_split + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 2 "noov_compare_op" + [(match_operand 1 "icc_or_fcc_reg_operand" "") + (const_int 0)]))] + ;; 32 bit LTU/GEU are better implemented using addx/subx + "TARGET_V9 && REGNO (operands[1]) == SPARC_ICC_REG + && (GET_MODE (operands[1]) == CCXmode + || (GET_CODE (operands[2]) != LTU && GET_CODE (operands[2]) != GEU))" + [(set (match_dup 0) (const_int 0)) + (set (match_dup 0) + (if_then_else:SI (match_op_dup:SI 2 [(match_dup 1) (const_int 0)]) + (const_int 1) + (match_dup 0)))] + "") + (define_insn "*scc_di" [(set (match_operand:DI 0 "register_operand" "=r") (match_operator:DI 2 "noov_compare_op" @@ -1521,7 +1537,7 @@ { return output_cbranch (operands[0], 1, 0, final_sequence && INSN_ANNULLED_BRANCH_P (insn), - ! final_sequence); + ! final_sequence, insn); }" [(set_attr "type" "branch")]) @@ -1536,7 +1552,7 @@ { return output_cbranch (operands[0], 1, 1, final_sequence && INSN_ANNULLED_BRANCH_P (insn), - ! final_sequence); + ! final_sequence, insn); }" [(set_attr "type" "branch")]) @@ -1552,7 +1568,7 @@ { return output_cbranch (operands[1], 2, 0, final_sequence && INSN_ANNULLED_BRANCH_P (insn), - ! final_sequence); + ! final_sequence, insn); }" [(set_attr "type" "branch")]) @@ -1568,7 +1584,7 @@ { return output_cbranch (operands[1], 2, 1, final_sequence && INSN_ANNULLED_BRANCH_P (insn), - ! final_sequence); + ! final_sequence, insn); }" [(set_attr "type" "branch")]) @@ -1584,7 +1600,7 @@ { return output_cbranch (operands[1], 2, 0, final_sequence && INSN_ANNULLED_BRANCH_P (insn), - ! final_sequence); + ! final_sequence, insn); }" [(set_attr "type" "branch")]) @@ -1600,7 +1616,7 @@ { return output_cbranch (operands[1], 2, 1, final_sequence && INSN_ANNULLED_BRANCH_P (insn), - ! final_sequence); + ! final_sequence, insn); }" [(set_attr "type" "branch")]) @@ -2240,6 +2256,20 @@ operands[2] = copy_rtx (operands[0]); PUT_MODE (operands[2], SImode);") +;; Split register to register moves. +(define_split + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "arith_double_operand" "rIN"))] + "! TARGET_ARCH64 && GET_CODE (operands[1]) == REG && REGNO (operands[0]) < 32 + && (GET_CODE (operands[1]) != REG || REGNO (operands[1]) < 32) + && ! reg_overlap_mentioned_p (operands[0], operands[1])" + [(set (match_dup 2) (match_dup 4)) + (set (match_dup 3) (match_dup 5))] + "operands[2] = gen_highpart (SImode, operands[0]); + operands[3] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[5] = gen_lowpart (SImode, operands[1]);") + (define_insn "*movdi_sp32" [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,U,Q,r,r,?f,?f,?Q") (match_operand:DI 1 "general_operand" "r,U,T,r,Q,i,f,Q,f"))] @@ -3217,7 +3247,7 @@ if (which_alternative == 1) return \"st %1,%0\"; if (sparc_check_64 (operands[1], insn) > 0) - return \"nop\"; + return final_sequence ? \"nop\" : \"\"; return \"srl %1,0,%0\"; " [(set_attr "type" "shift,store")]) @@ -3639,7 +3669,7 @@ return \"srl %1,0,%0\"; gen_rtx_PLUS (DImode, operands[1], operands[2])), gen_rtx_CLOBBER (VOIDmode, - gen_rtx_REG (SImode, SPARC_ICC_REG))))); + gen_rtx_REG (CCmode, SPARC_ICC_REG))))); DONE; } }") @@ -3648,7 +3678,7 @@ return \"srl %1,0,%0\"; [(set (match_operand:DI 0 "register_operand" "=r") (plus:DI (match_operand:DI 1 "arith_double_operand" "%r") (match_operand:DI 2 "arith_double_operand" "rHI"))) - (clobber (reg:SI 100))] + (clobber (reg:CC 100))] "! TARGET_ARCH64" "* { @@ -3681,7 +3711,7 @@ return \"srl %1,0,%0\"; [(set (match_operand:DI 0 "register_operand" "=r") (plus:DI (match_operand:DI 1 "arith_double_operand" "%r") (match_operand:DI 2 "arith_double_operand" "rHI"))) - (clobber (reg:SI 100))] + (clobber (reg:CC 100))] "! TARGET_ARCH64 && reload_completed" [(parallel [(set (reg:CC_NOOV 100) (compare:CC_NOOV (plus:SI (match_dup 4) @@ -3698,16 +3728,13 @@ return \"srl %1,0,%0\"; operands[5] = gen_lowpart (SImode, operands[2]); operands[6] = gen_highpart (SImode, operands[0]); operands[7] = gen_highpart (SImode, operands[1]); - if (GET_CODE (operands[2]) == CONST_INT) - operands[8] = INTVAL (operands[2]) < 0 ? constm1_rtx : const0_rtx; - else - operands[8] = gen_highpart (SImode, operands[2]);") + operands[8] = gen_highpart (SImode, operands[2]);") (define_split [(set (match_operand:DI 0 "register_operand" "=r") (minus:DI (match_operand:DI 1 "arith_double_operand" "r") (match_operand:DI 2 "arith_double_operand" "rHI"))) - (clobber (reg:SI 100))] + (clobber (reg:CC 100))] "! TARGET_ARCH64 && reload_completed" [(parallel [(set (reg:CC_NOOV 100) (compare:CC_NOOV (minus:SI (match_dup 4) @@ -3749,7 +3776,7 @@ return \"srl %1,0,%0\"; [(set (match_operand:DI 0 "register_operand" "=r") (plus:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) (match_operand:DI 2 "register_operand" "r"))) - (clobber (reg:SI 100))] + (clobber (reg:CC 100))] "! TARGET_ARCH64" "addcc %L2,%1,%L0\;addx %H2,0,%H0" [(set_attr "type" "multi")]) @@ -3823,7 +3850,7 @@ return \"srl %1,0,%0\"; gen_rtx_MINUS (DImode, operands[1], operands[2])), gen_rtx_CLOBBER (VOIDmode, - gen_rtx_REG (SImode, SPARC_ICC_REG))))); + gen_rtx_REG (CCmode, SPARC_ICC_REG))))); DONE; } }") @@ -3832,7 +3859,7 @@ return \"srl %1,0,%0\"; [(set (match_operand:DI 0 "register_operand" "=r") (minus:DI (match_operand:DI 1 "register_operand" "r") (match_operand:DI 2 "arith_double_operand" "rHI"))) - (clobber (reg:SI 100))] + (clobber (reg:CC 100))] "! TARGET_ARCH64" "* { @@ -3862,7 +3889,7 @@ return \"srl %1,0,%0\"; [(set (match_operand:DI 0 "register_operand" "=r") (minus:DI (match_operand:DI 1 "register_operand" "r") (zero_extend:DI (match_operand:SI 2 "register_operand" "r")))) - (clobber (reg:SI 100))] + (clobber (reg:CC 100))] "! TARGET_ARCH64" "subcc %L1,%2,%L0\;addx %H1,0,%H0" [(set_attr "type" "multi")]) @@ -4113,11 +4140,27 @@ return \"srl %1,0,%0\"; smul %1,%2,%4\;srlx %4,%3,%0" [(set_attr "length" "2")]) +;; The combiner changes TRUNCATE in the previous pattern to SUBREG. +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=h,r") + (subreg:SI + (lshiftrt:DI + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r"))) + (match_operand:SI 3 "const_int_operand" "i,i")) + 1)) + (clobber (match_scratch:SI 4 "=X,&h"))] + "TARGET_V8PLUS" + "@ + smul %1,%2,%0\;srlx %0,%3,%0 + smul %1,%2,%4\;srlx %4,%3,%0" + [(set_attr "length" "2")]) + (define_insn "const_smulsi3_highpart_v8plus" [(set (match_operand:SI 0 "register_operand" "=h,r") (truncate:SI (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r")) - (match_operand:SI 2 "register_operand" "r,r")) + (match_operand 2 "small_int" "i,i")) (match_operand:SI 3 "const_int_operand" "i,i")))) (clobber (match_scratch:SI 4 "=X,&h"))] "TARGET_V8PLUS" @@ -4510,6 +4553,23 @@ return \"srl %1,0,%0\"; operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff); }") +;; Split DImode logical operations requiring two instructions. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (match_operator:DI 1 "cc_arithop" ; AND, IOR, XOR + [(match_operand:DI 2 "register_operand" "") + (match_operand:DI 3 "arith_double_operand" "")]))] + "! TARGET_ARCH64 && reload_completed + && GET_CODE (operands[0]) == REG && REGNO (operands[0]) < 32" + [(set (match_dup 4) (match_op_dup:SI 1 [(match_dup 6) (match_dup 8)])) + (set (match_dup 5) (match_op_dup:SI 1 [(match_dup 7) (match_dup 9)]))] + "operands[4] = gen_highpart (SImode, operands[0]); + operands[5] = gen_lowpart (SImode, operands[0]); + operands[6] = gen_highpart (SImode, operands[2]); + operands[7] = gen_lowpart (SImode, operands[2]); + operands[8] = gen_highpart (SImode, operands[3]); + operands[9] = gen_lowpart (SImode, operands[3]);") + (define_insn "*and_not_di_sp32" [(set (match_operand:DI 0 "register_operand" "=r,b") (and:DI (not:DI (match_operand:DI 1 "register_operand" "r,b")) @@ -4899,7 +4959,7 @@ return \"srl %1,0,%0\"; gen_rtx_SET (VOIDmode, operand0, gen_rtx_NEG (DImode, operand1)), gen_rtx_CLOBBER (VOIDmode, - gen_rtx_REG (SImode, SPARC_ICC_REG))))); + gen_rtx_REG (CCmode, SPARC_ICC_REG))))); DONE; } }") @@ -4907,7 +4967,7 @@ return \"srl %1,0,%0\"; (define_insn "*negdi2_sp32" [(set (match_operand:DI 0 "register_operand" "=r") (neg:DI (match_operand:DI 1 "register_operand" "r"))) - (clobber (reg:SI 100))] + (clobber (reg:CC 100))] "! TARGET_ARCH64" "* { @@ -5331,13 +5391,28 @@ return \"srl %1,0,%0\"; (define_insn "ashldi3_v8plus" [(set (match_operand:DI 0 "register_operand" "=&h,&h,r") - (ashift:DI (match_operand:DI 1 "register_operand" "r,0,r") + (ashift:DI (match_operand:DI 1 "arith_operand" "rI,0,rI") (match_operand:SI 2 "arith_operand" "rI,rI,rI"))) (clobber (match_scratch:SI 3 "=X,X,&h"))] "TARGET_V8PLUS" "*return sparc_v8plus_shift (operands, insn, \"sllx\");" [(set_attr "length" "5,5,6")]) +;; Optimize (1LL<<x)-1 +(define_insn "" + [(set (match_operand:DI 0 "register_operand" "=h") + (plus:DI (ashift:DI (const_int 1) + (match_operand:SI 2 "arith_operand" "rI")) + (const_int -1)))] + "TARGET_V8PLUS" + "* +{ + if (GET_CODE (operands[2]) == REG && REGNO (operands[2]) == REGNO (operands[0])) + return \"mov 1,%L0\;sllx %L0,%2,%L0\;sub %L0,1,%L0\;srlx %L0,32,%H0\"; + return \"mov 1,%H0\;sllx %H0,%2,%L0\;sub %L0,1,%L0\;srlx %L0,32,%H0\"; +}" + [(set_attr "length" "4")]) + (define_insn "*cmp_cc_ashift_1" [(set (reg:CC_NOOV 100) (compare:CC_NOOV (ashift:SI (match_operand:SI 0 "register_operand" "r") @@ -5402,7 +5477,7 @@ if (! TARGET_ARCH64) (define_insn "ashrdi3_v8plus" [(set (match_operand:DI 0 "register_operand" "=&h,&h,r") - (ashiftrt:DI (match_operand:DI 1 "register_operand" "r,0,r") + (ashiftrt:DI (match_operand:DI 1 "arith_operand" "rI,0,rI") (match_operand:SI 2 "arith_operand" "rI,rI,rI"))) (clobber (match_scratch:SI 3 "=X,X,&h"))] "TARGET_V8PLUS" @@ -5454,7 +5529,7 @@ if (! TARGET_ARCH64) (define_insn "lshrdi3_v8plus" [(set (match_operand:DI 0 "register_operand" "=&h,&h,r") - (lshiftrt:DI (match_operand:DI 1 "register_operand" "r,0,r") + (lshiftrt:DI (match_operand:DI 1 "arith_operand" "rI,0,rI") (match_operand:SI 2 "arith_operand" "rI,rI,rI"))) (clobber (match_scratch:SI 3 "=X,X,&h"))] "TARGET_V8PLUS" @@ -6605,18 +6680,6 @@ if (! TARGET_ARCH64) "TARGET_ARCH64 && short_branch (INSN_UID (insn), INSN_UID (operands[2])) && in_same_eh_region (insn, operands[2]) && in_same_eh_region (insn, ins1)" "call %a0,%1\;add %%o7,(%l2-.-4),%%o7") -;; Other miscellaneous peepholes. - -;; (reg:SI 100) is created by the {add,neg,sub}di patterns. -(define_peephole - [(parallel [(set (match_operand:SI 0 "register_operand" "=r") - (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ") - (reg:SI 100))) - (clobber (reg:CC 100))]) - (set (reg:CC 100) (compare (match_dup 0) (const_int 0)))] - "" - "subxcc %r1,0,%0") - ;; After a nonlocal goto, we need to restore the PIC register, but only ;; if we need it. So do nothing much here, but we'll check for this in ;; finalize_pic. |