diff options
author | Claudiu Zissulescu <claziss@synopsys.com> | 2017-09-01 13:43:51 +0200 |
---|---|---|
committer | Claudiu Zissulescu <claziss@gcc.gnu.org> | 2017-09-01 13:43:51 +0200 |
commit | a2de90a45a9193cb19a5837110698fe59d6deb41 (patch) | |
tree | efe25ca85f6d3a5f7496251262247e817b30cc0e /gcc/config | |
parent | 782bdf21895dc769cfd85a08c8c1b780924775f3 (diff) | |
download | gcc-a2de90a45a9193cb19a5837110698fe59d6deb41.zip gcc-a2de90a45a9193cb19a5837110698fe59d6deb41.tar.gz gcc-a2de90a45a9193cb19a5837110698fe59d6deb41.tar.bz2 |
[ARC] Reimplement ZOL support.
2017-05-22 Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/arc-c.c (__ARC_LPC_WIDTH__): Add builtin define.
* config/arc/arc.c (ARC_MAX_LOOP_LENGTH): Define.
(arc_conditional_register_usage): Remove ARC600 lp_count
exception.
(arc_file_start): Emit Tag_ARC_CPU_variation.
(arc_can_use_doloop_p): New conditions to use ZOLs.
(hwloop_fail): New function.
(hwloop_optimize): Likewise.
(hwloop_pattern_reg): Likewise.
(arc_doloop_hooks): New struct, to be used with reorg_loops.
(arc_reorg_loops): New function, calls reorg_loops.
(arc_reorg): Call arc_reorg_loops. Remove old ZOL handling.
(arc600_corereg_hazard): Remove ZOL checking, case handled by
hwloop_optimize.
(arc_loop_hazard): Remove function, functionality moved into
hwloop_optimize.
(arc_hazard): Remove arc_loop_hazard call.
(arc_adjust_insn_length): Remove ZOL handling, functionality moved
into hwloop_optimize.
(arc_label_align): Remove ZOL handling.
* config/arc/arc.h (LOOP_ALIGN): Changed to 0.
* config/arc/arc.md (doloop_begin): Remove pattern.
(doloop_begin_i): Likewise.
(doloop_end_i): Likewise.
(doloop_fallback): Likewise.
(doloop_fallback_m): Likewise.
(doloop_end): Reimplement expand.
(arc_lp): New pattern for LP instruction.
(loop_end): New pattern.
(loop_fail): Likewise.
(decrement_and_branch_until_zero): Likewise.
* config/arc/arc.opt (mlpc-width): New option.
* doc/invoke.texi (mlpc-width): Document option.
testsuite/
2017-05-22 Claudiu Zissulescu <claziss@synopsys.com>
* gcc.target/arc/loop-1.c: Deleted.
From-SVN: r251589
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/arc/arc-c.c | 2 | ||||
-rw-r--r-- | gcc/config/arc/arc.c | 717 | ||||
-rw-r--r-- | gcc/config/arc/arc.h | 10 | ||||
-rw-r--r-- | gcc/config/arc/arc.md | 419 | ||||
-rw-r--r-- | gcc/config/arc/arc.opt | 25 | ||||
-rw-r--r-- | gcc/config/arc/predicates.md | 2 |
6 files changed, 531 insertions, 644 deletions
diff --git a/gcc/config/arc/arc-c.c b/gcc/config/arc/arc-c.c index de877a1..44ff338 100644 --- a/gcc/config/arc/arc-c.c +++ b/gcc/config/arc/arc-c.c @@ -62,6 +62,8 @@ arc_cpu_cpp_builtins (cpp_reader * pfile) builtin_define_with_int_value ("__ARC_TLS_REGNO__", arc_tp_regno); + builtin_define_with_int_value ("__ARC_LPC_WIDTH__", arc_lpcwidth); + builtin_define (TARGET_BIG_ENDIAN ? "__BIG_ENDIAN__" : "__LITTLE_ENDIAN__"); if (TARGET_BIG_ENDIAN) diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c index fcca0f1..9704240 100644 --- a/gcc/config/arc/arc.c +++ b/gcc/config/arc/arc.c @@ -65,11 +65,15 @@ along with GCC; see the file COPYING3. If not see #include "rtl-iter.h" #include "alias.h" #include "opts.h" +#include "hw-doloop.h" /* Which cpu we're compiling for (ARC600, ARC601, ARC700). */ static char arc_cpu_name[10] = ""; static const char *arc_cpu_string = arc_cpu_name; +/* Maximum size of a loop. */ +#define ARC_MAX_LOOP_LENGTH 4095 + /* ??? Loads can handle any constant, stores can only handle small ones. */ /* OTOH, LIMMs cost extra, so their usefulness is limited. */ #define RTX_OK_FOR_OFFSET_P(MODE, X) \ @@ -1705,18 +1709,7 @@ arc_conditional_register_usage (void) i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++) reg_alloc_order [i] = i; } - /* For ARC600, lp_count may not be read in an instruction - following immediately after another one setting it to a new value. - There was some discussion on how to enforce scheduling constraints for - processors with missing interlocks on the gcc mailing list: - http://gcc.gnu.org/ml/gcc/2008-05/msg00021.html . - However, we can't actually use this approach, because for ARC the - delay slot scheduling pass is active, which runs after - machine_dependent_reorg. */ - if (TARGET_ARC600) - CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT); - else if (!TARGET_LP_WR_INTERLOCK) - fixed_regs[LP_COUNT] = 1; + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (!call_used_regs[regno]) CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno); @@ -6900,28 +6893,33 @@ arc_pass_by_reference (cumulative_args_t ca_v ATTRIBUTE_UNUSED, /* Implement TARGET_CAN_USE_DOLOOP_P. */ static bool -arc_can_use_doloop_p (const widest_int &iterations, const widest_int &, +arc_can_use_doloop_p (const widest_int &, + const widest_int &iterations_max, unsigned int loop_depth, bool entered_at_top) { - if (loop_depth > 1) + /* Considering limitations in the hardware, only use doloop + for innermost loops which must be entered from the top. */ + if (loop_depth > 1 || !entered_at_top) return false; - /* Setting up the loop with two sr instructions costs 6 cycles. */ - if (TARGET_ARC700 - && !entered_at_top - && wi::gtu_p (iterations, 0) - && wi::leu_p (iterations, flag_pic ? 6 : 3)) + + /* Check for lp_count width boundary. */ + if (arc_lpcwidth != 32 + && (wi::gtu_p (iterations_max, ((1 << arc_lpcwidth) - 1)) + || wi::eq_p (iterations_max, 0))) return false; return true; } -/* NULL if INSN insn is valid within a low-overhead loop. - Otherwise return why doloop cannot be applied. */ +/* NULL if INSN insn is valid within a low-overhead loop. Otherwise + return why doloop cannot be applied. */ static const char * arc_invalid_within_doloop (const rtx_insn *insn) { if (CALL_P (insn)) return "Function call in the loop."; + + /* FIXME! add here all the ZOL exceptions. */ return NULL; } @@ -7020,6 +7018,359 @@ workaround_arc_anomaly (void) } } +/* A callback for the hw-doloop pass. Called when a loop we have discovered + turns out not to be optimizable; we have to split the loop_end pattern into + a subtract and a test. */ + +static void +hwloop_fail (hwloop_info loop) +{ + rtx test; + rtx insn = loop->loop_end; + + if (TARGET_V2 + && (loop->length && (loop->length <= ARC_MAX_LOOP_LENGTH)) + && REG_P (loop->iter_reg)) + { + /* TARGET_V2 has dbnz instructions. */ + test = gen_dbnz (loop->iter_reg, loop->start_label); + insn = emit_jump_insn_before (test, loop->loop_end); + } + else if (REG_P (loop->iter_reg) && (REGNO (loop->iter_reg) == LP_COUNT)) + { + /* We have the lp_count as loop iterator, try to use it. */ + emit_insn_before (gen_loop_fail (), loop->loop_end); + test = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REG), + const0_rtx); + test = gen_rtx_IF_THEN_ELSE (VOIDmode, test, + gen_rtx_LABEL_REF (Pmode, loop->start_label), + pc_rtx); + insn = emit_jump_insn_before (gen_rtx_SET (pc_rtx, test), + loop->loop_end); + } + else + { + emit_insn_before (gen_addsi3 (loop->iter_reg, + loop->iter_reg, + constm1_rtx), + loop->loop_end); + test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx); + insn = emit_jump_insn_before (gen_cbranchsi4 (test, + loop->iter_reg, + const0_rtx, + loop->start_label), + loop->loop_end); + } + JUMP_LABEL (insn) = loop->start_label; + LABEL_NUSES (loop->start_label)++; + delete_insn (loop->loop_end); +} + +/* Optimize LOOP. */ + +static bool +hwloop_optimize (hwloop_info loop) +{ + int i; + edge entry_edge; + basic_block entry_bb, bb; + rtx iter_reg, end_label; + rtx_insn *insn, *seq, *entry_after, *last_insn; + unsigned int length; + bool need_fix = false; + rtx lp_reg = gen_rtx_REG (SImode, LP_COUNT); + + if (loop->depth > 1) + { + if (dump_file) + fprintf (dump_file, ";; loop %d is not innermost\n", + loop->loop_no); + return false; + } + + if (!loop->incoming_dest) + { + if (dump_file) + fprintf (dump_file, ";; loop %d has more than one entry\n", + loop->loop_no); + return false; + } + + if (loop->incoming_dest != loop->head) + { + if (dump_file) + fprintf (dump_file, ";; loop %d is not entered from head\n", + loop->loop_no); + return false; + } + + if (loop->has_call || loop->has_asm) + { + if (dump_file) + fprintf (dump_file, ";; loop %d has invalid insn\n", + loop->loop_no); + return false; + } + + /* Scan all the blocks to make sure they don't use iter_reg. */ + if (loop->iter_reg_used || loop->iter_reg_used_outside) + { + if (dump_file) + fprintf (dump_file, ";; loop %d uses iterator\n", + loop->loop_no); + return false; + } + + /* Check if start_label appears before doloop_end. */ + length = 0; + for (insn = loop->start_label; + insn && insn != loop->loop_end; + insn = NEXT_INSN (insn)) + length += NONDEBUG_INSN_P (insn) ? get_attr_length (insn) : 0; + + if (!insn) + { + if (dump_file) + fprintf (dump_file, ";; loop %d start_label not before loop_end\n", + loop->loop_no); + return false; + } + + loop->length = length; + if (loop->length > ARC_MAX_LOOP_LENGTH) + { + if (dump_file) + fprintf (dump_file, ";; loop %d too long\n", loop->loop_no); + return false; + } + + /* Check if we use a register or not. */ + if (!REG_P (loop->iter_reg)) + { + if (dump_file) + fprintf (dump_file, ";; loop %d iterator is MEM\n", + loop->loop_no); + return false; + } + + /* Check if loop register is lpcount. */ + if (REG_P (loop->iter_reg) && (REGNO (loop->iter_reg)) != LP_COUNT) + { + if (dump_file) + fprintf (dump_file, ";; loop %d doesn't use lp_count as loop" + " iterator\n", + loop->loop_no); + /* This loop doesn't use the lp_count, check though if we can + fix it. */ + if (TEST_HARD_REG_BIT (loop->regs_set_in_loop, LP_COUNT) + /* In very unique cases we may have LP_COUNT alive. */ + || (loop->incoming_src + && REGNO_REG_SET_P (df_get_live_out (loop->incoming_src), + LP_COUNT))) + return false; + else + need_fix = true; + } + + /* Check for control like instruction as the last instruction of a + ZOL. */ + bb = loop->tail; + last_insn = PREV_INSN (loop->loop_end); + + while (1) + { + for (; last_insn != BB_HEAD (bb); + last_insn = PREV_INSN (last_insn)) + if (NONDEBUG_INSN_P (last_insn)) + break; + + if (last_insn != BB_HEAD (bb)) + break; + + if (single_pred_p (bb) + && single_pred_edge (bb)->flags & EDGE_FALLTHRU + && single_pred (bb) != ENTRY_BLOCK_PTR_FOR_FN (cfun)) + { + bb = single_pred (bb); + last_insn = BB_END (bb); + continue; + } + else + { + last_insn = NULL; + break; + } + } + + if (!last_insn) + { + if (dump_file) + fprintf (dump_file, ";; loop %d has no last instruction\n", + loop->loop_no); + return false; + } + + if ((TARGET_ARC600_FAMILY || TARGET_HS) + && INSN_P (last_insn) + && (JUMP_P (last_insn) || CALL_P (last_insn) + || GET_CODE (PATTERN (last_insn)) == SEQUENCE + || get_attr_type (last_insn) == TYPE_BRCC + || get_attr_type (last_insn) == TYPE_BRCC_NO_DELAY_SLOT)) + { + if (loop->length + 2 > ARC_MAX_LOOP_LENGTH) + { + if (dump_file) + fprintf (dump_file, ";; loop %d too long\n", loop->loop_no); + return false; + } + if (dump_file) + fprintf (dump_file, ";; loop %d has a control like last insn;" + "add a nop\n", + loop->loop_no); + + last_insn = emit_insn_after (gen_nopv (), last_insn); + } + + if (LABEL_P (last_insn)) + { + if (dump_file) + fprintf (dump_file, ";; loop %d has a label as last insn;" + "add a nop\n", + loop->loop_no); + last_insn = emit_insn_after (gen_nopv (), last_insn); + } + loop->last_insn = last_insn; + + /* Get the loop iteration register. */ + iter_reg = loop->iter_reg; + + gcc_assert (REG_P (iter_reg)); + + entry_edge = NULL; + + FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge) + if (entry_edge->flags & EDGE_FALLTHRU) + break; + + if (entry_edge == NULL) + { + if (dump_file) + fprintf (dump_file, ";; loop %d has no fallthru edge jumping" + "into the loop\n", + loop->loop_no); + return false; + } + /* The loop is good. */ + end_label = gen_label_rtx (); + loop->end_label = end_label; + + /* Place the zero_cost_loop_start instruction before the loop. */ + entry_bb = entry_edge->src; + + start_sequence (); + + if (need_fix) + { + /* The loop uses a R-register, but the lp_count is free, thus + use lp_count. */ + emit_insn (gen_movsi (lp_reg, iter_reg)); + SET_HARD_REG_BIT (loop->regs_set_in_loop, LP_COUNT); + iter_reg = lp_reg; + if (dump_file) + { + fprintf (dump_file, ";; fix loop %d to use lp_count\n", + loop->loop_no); + } + } + + insn = emit_insn (gen_arc_lp (iter_reg, + loop->start_label, + loop->end_label)); + + seq = get_insns (); + end_sequence (); + + entry_after = BB_END (entry_bb); + if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1 + || !entry_after) + { + basic_block new_bb; + edge e; + edge_iterator ei; + + emit_insn_before (seq, BB_HEAD (loop->head)); + seq = emit_label_before (gen_label_rtx (), seq); + new_bb = create_basic_block (seq, insn, entry_bb); + FOR_EACH_EDGE (e, ei, loop->incoming) + { + if (!(e->flags & EDGE_FALLTHRU)) + redirect_edge_and_branch_force (e, new_bb); + else + redirect_edge_succ (e, new_bb); + } + + make_edge (new_bb, loop->head, 0); + } + else + { +#if 0 + while (DEBUG_INSN_P (entry_after) + || (NOTE_P (entry_after) + && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK + /* Make sure we don't split a call and its corresponding + CALL_ARG_LOCATION note. */ + && NOTE_KIND (entry_after) != NOTE_INSN_CALL_ARG_LOCATION)) + entry_after = NEXT_INSN (entry_after); +#endif + entry_after = next_nonnote_insn_bb (entry_after); + + gcc_assert (entry_after); + emit_insn_before (seq, entry_after); + } + + delete_insn (loop->loop_end); + /* Insert the loop end label before the last instruction of the + loop. */ + emit_label_after (end_label, loop->last_insn); + + return true; +} + +/* A callback for the hw-doloop pass. This function examines INSN; if + it is a loop_end pattern we recognize, return the reg rtx for the + loop counter. Otherwise, return NULL_RTX. */ + +static rtx +hwloop_pattern_reg (rtx_insn *insn) +{ + rtx reg; + + if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end) + return NULL_RTX; + + reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1)); + if (!REG_P (reg)) + return NULL_RTX; + return reg; +} + +static struct hw_doloop_hooks arc_doloop_hooks = +{ + hwloop_pattern_reg, + hwloop_optimize, + hwloop_fail +}; + +/* Run from machine_dependent_reorg, this pass looks for doloop_end insns + and tries to rewrite the RTL of these loops so that proper Blackfin + hardware loops are generated. */ + +static void +arc_reorg_loops (void) +{ + reorg_loops (true, &arc_doloop_hooks); +} + static int arc_reorg_in_progress = 0; /* ARC's machince specific reorg function. */ @@ -7033,204 +7384,17 @@ arc_reorg (void) long offset; int changed; - workaround_arc_anomaly (); - cfun->machine->arc_reorg_started = 1; arc_reorg_in_progress = 1; - /* Link up loop ends with their loop start. */ - { - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) - if (GET_CODE (insn) == JUMP_INSN - && recog_memoized (insn) == CODE_FOR_doloop_end_i) - { - rtx_insn *top_label - = as_a <rtx_insn *> (XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 1), 0)); - rtx num = GEN_INT (CODE_LABEL_NUMBER (top_label)); - rtx_insn *lp, *prev = prev_nonnote_insn (top_label); - rtx_insn *lp_simple = NULL; - rtx_insn *next = NULL; - rtx op0 = XEXP (XVECEXP (PATTERN (insn), 0, 1), 0); - int seen_label = 0; - - for (lp = prev; - (lp && NONJUMP_INSN_P (lp) - && recog_memoized (lp) != CODE_FOR_doloop_begin_i); - lp = prev_nonnote_insn (lp)) - ; - if (!lp || !NONJUMP_INSN_P (lp) - || dead_or_set_regno_p (lp, LP_COUNT)) - { - HOST_WIDE_INT loop_end_id - = INTVAL (XEXP (XVECEXP (PATTERN (insn), 0, 4), 0)); - - for (prev = next = insn, lp = NULL ; prev || next;) - { - if (prev) - { - if (NONJUMP_INSN_P (prev) - && recog_memoized (prev) == CODE_FOR_doloop_begin_i - && (INTVAL (XEXP (XVECEXP (PATTERN (prev), 0, 5), 0)) - == loop_end_id)) - { - lp = prev; - break; - } - else if (LABEL_P (prev)) - seen_label = 1; - prev = prev_nonnote_insn (prev); - } - if (next) - { - if (NONJUMP_INSN_P (next) - && recog_memoized (next) == CODE_FOR_doloop_begin_i - && (INTVAL (XEXP (XVECEXP (PATTERN (next), 0, 5), 0)) - == loop_end_id)) - { - lp = next; - break; - } - next = next_nonnote_insn (next); - } - } - prev = NULL; - } - else - lp_simple = lp; - if (lp && !dead_or_set_regno_p (lp, LP_COUNT)) - { - rtx begin_cnt = XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0); - if (INTVAL (XEXP (XVECEXP (PATTERN (lp), 0, 4), 0))) - /* The loop end insn has been duplicated. That can happen - when there is a conditional block at the very end of - the loop. */ - goto failure; - /* If Register allocation failed to allocate to the right - register, There is no point into teaching reload to - fix this up with reloads, as that would cost more - than using an ordinary core register with the - doloop_fallback pattern. */ - if ((true_regnum (op0) != LP_COUNT || !REG_P (begin_cnt)) - /* Likewise, if the loop setup is evidently inside the loop, - we loose. */ - || (!lp_simple && lp != next && !seen_label)) - { - remove_insn (lp); - goto failure; - } - /* It is common that the optimizers copy the loop count from - another register, and doloop_begin_i is stuck with the - source of the move. Making doloop_begin_i only accept "l" - is nonsentical, as this then makes reload evict the pseudo - used for the loop end. The underlying cause is that the - optimizers don't understand that the register allocation for - doloop_begin_i should be treated as part of the loop. - Try to work around this problem by verifying the previous - move exists. */ - if (true_regnum (begin_cnt) != LP_COUNT) - { - rtx_insn *mov; - rtx set, note; + compute_bb_for_insn (); - for (mov = prev_nonnote_insn (lp); mov; - mov = prev_nonnote_insn (mov)) - { - if (!NONJUMP_INSN_P (mov)) - mov = 0; - else if ((set = single_set (mov)) - && rtx_equal_p (SET_SRC (set), begin_cnt) - && rtx_equal_p (SET_DEST (set), op0)) - break; - } - if (mov) - { - XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0) = op0; - note = find_regno_note (lp, REG_DEAD, REGNO (begin_cnt)); - if (note) - remove_note (lp, note); - } - else - { - remove_insn (lp); - goto failure; - } - } - XEXP (XVECEXP (PATTERN (insn), 0, 4), 0) = num; - XEXP (XVECEXP (PATTERN (lp), 0, 4), 0) = num; - if (next == lp) - XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const2_rtx; - else if (!lp_simple) - XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const1_rtx; - else if (prev != lp) - { - remove_insn (lp); - add_insn_after (lp, prev, NULL); - } - if (!lp_simple) - { - XEXP (XVECEXP (PATTERN (lp), 0, 7), 0) - = gen_rtx_LABEL_REF (Pmode, top_label); - add_reg_note (lp, REG_LABEL_OPERAND, top_label); - LABEL_NUSES (top_label)++; - } - /* We can avoid tedious loop start / end setting for empty loops - be merely setting the loop count to its final value. */ - if (next_active_insn (top_label) == insn) - { - rtx lc_set - = gen_rtx_SET (XEXP (XVECEXP (PATTERN (lp), 0, 3), 0), - const0_rtx); - - rtx_insn *lc_set_insn = emit_insn_before (lc_set, insn); - delete_insn (lp); - delete_insn (insn); - insn = lc_set_insn; - } - /* If the loop is non-empty with zero length, we can't make it - a zero-overhead loop. That can happen for empty asms. */ - else - { - rtx_insn *scan; + df_analyze (); - for (scan = top_label; - (scan && scan != insn - && (!NONJUMP_INSN_P (scan) || !get_attr_length (scan))); - scan = NEXT_INSN (scan)); - if (scan == insn) - { - remove_insn (lp); - goto failure; - } - } - } - else - { - /* Sometimes the loop optimizer makes a complete hash of the - loop. If it were only that the loop is not entered at the - top, we could fix this up by setting LP_START with SR . - However, if we can't find the loop begin were it should be, - chances are that it does not even dominate the loop, but is - inside the loop instead. Using SR there would kill - performance. - We use the doloop_fallback pattern here, which executes - in two cycles on the ARC700 when predicted correctly. */ - failure: - if (!REG_P (op0)) - { - rtx op3 = XEXP (XVECEXP (PATTERN (insn), 0, 5), 0); + /* Doloop optimization. */ + arc_reorg_loops (); - emit_insn_before (gen_move_insn (op3, op0), insn); - PATTERN (insn) - = gen_doloop_fallback_m (op3, JUMP_LABEL (insn), op0); - } - else - XVEC (PATTERN (insn), 0) - = gen_rtvec (2, XVECEXP (PATTERN (insn), 0, 0), - XVECEXP (PATTERN (insn), 0, 1)); - INSN_CODE (insn) = -1; - } - } - } + workaround_arc_anomaly (); /* FIXME: should anticipate ccfsm action, generate special patterns for to-be-deleted branches that have no delay slot and have at least the @@ -7774,11 +7938,11 @@ arc_register_move_cost (machine_mode, return 6; } - /* The ARC700 stalls for 3 cycles when *reading* from lp_count. */ - if (TARGET_ARC700 - && (from_class == LPCOUNT_REG || from_class == ALL_CORE_REGS - || from_class == WRITABLE_CORE_REGS)) - return 8; + /* Using lp_count as scratch reg is a VERY bad idea. */ + if (from_class == LPCOUNT_REG) + return 1000; + if (to_class == LPCOUNT_REG) + return 6; /* Force an attempt to 'mov Dy,Dx' to spill. */ if ((TARGET_ARC700 || TARGET_EM) && TARGET_DPFP @@ -8220,14 +8384,6 @@ arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ) { if (!TARGET_ARC600) return 0; - /* If SUCC is a doloop_end_i with a preceding label, we must output a nop - in front of SUCC anyway, so there will be separation between PRED and - SUCC. */ - if (recog_memoized (succ) == CODE_FOR_doloop_end_i - && LABEL_P (prev_nonnote_insn (succ))) - return 0; - if (recog_memoized (succ) == CODE_FOR_doloop_begin_i) - return 0; if (GET_CODE (PATTERN (pred)) == SEQUENCE) pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1); if (GET_CODE (PATTERN (succ)) == SEQUENCE) @@ -8301,76 +8457,6 @@ arc_asm_insn_p (rtx x) return 0; } -/* We might have a CALL to a non-returning function before a loop end. - ??? Although the manual says that's OK (the target is outside the - loop, and the loop counter unused there), the assembler barfs on - this for ARC600, so we must insert a nop before such a call too. - For ARC700, and ARCv2 is not allowed to have the last ZOL - instruction a jump to a location where lp_count is modified. */ - -static bool -arc_loop_hazard (rtx_insn *pred, rtx_insn *succ) -{ - rtx_insn *jump = NULL; - rtx label_rtx = NULL_RTX; - rtx_insn *label = NULL; - basic_block succ_bb; - - if (recog_memoized (succ) != CODE_FOR_doloop_end_i) - return false; - - /* Phase 1: ARC600 and ARCv2HS doesn't allow any control instruction - (i.e., jump/call) as the last instruction of a ZOL. */ - if (TARGET_ARC600 || TARGET_HS) - if (JUMP_P (pred) || CALL_P (pred) - || arc_asm_insn_p (PATTERN (pred)) - || GET_CODE (PATTERN (pred)) == SEQUENCE) - return true; - - /* Phase 2: Any architecture, it is not allowed to have the last ZOL - instruction a jump to a location where lp_count is modified. */ - - /* Phase 2a: Dig for the jump instruction. */ - if (JUMP_P (pred)) - jump = pred; - else if (GET_CODE (PATTERN (pred)) == SEQUENCE - && JUMP_P (XVECEXP (PATTERN (pred), 0, 0))) - jump = as_a <rtx_insn *> (XVECEXP (PATTERN (pred), 0, 0)); - else - return false; - - /* Phase 2b: Make sure is not a millicode jump. */ - if ((GET_CODE (PATTERN (jump)) == PARALLEL) - && (XVECEXP (PATTERN (jump), 0, 0) == ret_rtx)) - return false; - - label_rtx = JUMP_LABEL (jump); - if (!label_rtx) - return false; - - /* Phase 2c: Make sure is not a return. */ - if (ANY_RETURN_P (label_rtx)) - return false; - - /* Pahse 2d: Go to the target of the jump and check for aliveness of - LP_COUNT register. */ - label = safe_as_a <rtx_insn *> (label_rtx); - succ_bb = BLOCK_FOR_INSN (label); - if (!succ_bb) - { - gcc_assert (NEXT_INSN (label)); - if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (label))) - succ_bb = NOTE_BASIC_BLOCK (NEXT_INSN (label)); - else - succ_bb = BLOCK_FOR_INSN (NEXT_INSN (label)); - } - - if (succ_bb && REGNO_REG_SET_P (df_get_live_out (succ_bb), LP_COUNT)) - return true; - - return false; -} - /* For ARC600: A write to a core reg greater or equal to 32 must not be immediately followed by a use. Anticipate the length requirement to insert a nop @@ -8382,9 +8468,6 @@ arc_hazard (rtx_insn *pred, rtx_insn *succ) if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ)) return 0; - if (arc_loop_hazard (pred, succ)) - return 4; - if (TARGET_ARC600) return arc600_corereg_hazard (pred, succ); @@ -8402,24 +8485,6 @@ arc_adjust_insn_length (rtx_insn *insn, int len, bool) if (GET_CODE (PATTERN (insn)) == SEQUENCE) return len; - /* It is impossible to jump to the very end of a Zero-Overhead Loop, as - the ZOL mechanism only triggers when advancing to the end address, - so if there's a label at the end of a ZOL, we need to insert a nop. - The ARC600 ZOL also has extra restrictions on jumps at the end of a - loop. */ - if (recog_memoized (insn) == CODE_FOR_doloop_end_i) - { - rtx_insn *prev = prev_nonnote_insn (insn); - - return ((LABEL_P (prev) - || (TARGET_ARC600 - && (JUMP_P (prev) - || CALL_P (prev) /* Could be a noreturn call. */ - || (NONJUMP_INSN_P (prev) - && GET_CODE (PATTERN (prev)) == SEQUENCE)))) - ? len + 4 : len); - } - /* Check for return with but one preceding insn since function start / call. */ if (TARGET_PAD_RETURN @@ -9755,27 +9820,9 @@ arc_scheduling_not_expected (void) return cfun->machine->arc_reorg_started; } -/* Oddly enough, sometimes we get a zero overhead loop that branch - shortening doesn't think is a loop - observed with compile/pr24883.c - -O3 -fomit-frame-pointer -funroll-loops. Make sure to include the - alignment visible for branch shortening (we actually align the loop - insn before it, but that is equivalent since the loop insn is 4 byte - long.) */ - int arc_label_align (rtx_insn *label) { - int loop_align = LOOP_ALIGN (LABEL); - - if (loop_align > align_labels_log) - { - rtx_insn *prev = prev_nonnote_insn (label); - - if (prev && NONJUMP_INSN_P (prev) - && GET_CODE (PATTERN (prev)) == PARALLEL - && recog_memoized (prev) == CODE_FOR_doloop_begin_i) - return loop_align; - } /* Code has a minimum p2 alignment of 1, which we must restore after an ADDR_DIFF_VEC. */ if (align_labels_log < 1) diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h index 2c06f9f..ad26de7 100644 --- a/gcc/config/arc/arc.h +++ b/gcc/config/arc/arc.h @@ -581,15 +581,15 @@ enum reg_class {0x0000f00f, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'q', r0-r3, r12-r15 */ \ {0x1000f00f, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'e', r0-r3, r12-r15, sp */ \ {0x1c001fff, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* "Rsc", r0-r12 */ \ - {0x9fffffff, 0xc0000000, 0x00000000, 0x00000000, 0x00000000}, /* 'r', r0-r28, blink, ap and pcl */ \ + {0x9fffffff, 0x80000000, 0x00000000, 0x00000000, 0x00000000}, /* 'r', r0-r28, blink, ap and pcl */ \ {0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'W', r0-r31 */ \ /* Include ap / pcl in WRITABLE_CORE_REGS for sake of symmetry. As these \ registers are fixed, it does not affect the literal meaning of the \ constraints, but it makes it a superset of GENERAL_REGS, thus \ enabling some operations that would otherwise not be possible. */ \ - {0xffffffff, 0xd0000000, 0x00000000, 0x00000000, 0x00000000}, /* 'w', r0-r31, r60 */ \ - {0xffffffff, 0xdfffffff, 0x00000000, 0x00000000, 0x00000000}, /* 'c', r0-r60, ap, pcl */ \ - {0xffffffff, 0xdfffffff, 0x00000000, 0x00000000, 0x00000000}, /* 'Rac', r0-r60, ap, pcl */ \ + {0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'w', r0-r31, r60 */ \ + {0xffffffff, 0x9fffffff, 0x00000000, 0x00000000, 0x00000000}, /* 'c', r0-r60, ap, pcl */ \ + {0xffffffff, 0x9fffffff, 0x00000000, 0x00000000, 0x00000000}, /* 'Rac', r0-r60, ap, pcl */ \ {0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'Rcd', r0-r3 */ \ {0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'Rsd', r0-r1 */ \ {0x9fffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'h', r0-28, r30 */ \ @@ -1351,7 +1351,7 @@ do { \ of a loop. */ /* On the ARC, align loops to 4 byte boundaries unless doing all-out size optimization. */ -#define LOOP_ALIGN JUMP_ALIGN +#define LOOP_ALIGN(X) 0 #define LABEL_ALIGN(LABEL) (arc_label_align (LABEL)) diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index b67733a..c766306 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -554,6 +554,11 @@ (eq_attr "annul_ret_delay_insn" "yes") (eq_attr "cond_ret_delay_insn" "yes")]) +(define_delay (eq_attr "type" "loop_end") + [(eq_attr "in_delay_slot" "true") + (eq_attr "in_delay_slot" "true") + (nil)]) + ;; For ARC600, unexposing the delay sloy incurs a penalty also in the ;; non-taken case, so the only meaningful way to have an annull-true ;; filled delay slot is to conditionalize the delay slot insn. @@ -618,8 +623,8 @@ ; The iscompact attribute allows the epilogue expander to know for which ; insns it should lengthen the return insn. (define_insn "*movqi_insn" - [(set (match_operand:QI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h, w,w,???w,h, w,Rcq, S,!*x, r,r, Ucm,m,???m, m,Usc") - (match_operand:QI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1,cL,I,?Rac,i,?i, T,Rcq,Usd,Ucm,m,?Rac,c,?Rac,Cm3,i"))] + [(set (match_operand:QI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h,w*l,w*l,???w,h,w*l,Rcq, S,!*x, r,r, Ucm,m,???m, m,Usc") + (match_operand:QI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1, cL, I,?Rac,i, ?i, T,Rcq,Usd,Ucm,m,?Rac,c,?Rac,Cm3,i"))] "register_operand (operands[0], QImode) || register_operand (operands[1], QImode)" "@ @@ -655,8 +660,8 @@ "if (prepare_move_operands (operands, HImode)) DONE;") (define_insn "*movhi_insn" - [(set (match_operand:HI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h, w,w,???w,Rcq#q,h, w,Rcq, S, r,r, Ucm,m,???m, m,VUsc") - (match_operand:HI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1,cL,I,?Rac, i,i,?i, T,Rcq,Ucm,m,?Rac,c,?Rac,Cm3,i"))] + [(set (match_operand:HI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h,w*l,w*l,???w,Rcq#q,h,w*l,Rcq, S, r,r, Ucm,m,???m, m,VUsc") + (match_operand:HI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1, cL, I,?Rac, i,i, ?i, T,Rcq,Ucm,m,?Rac,c,?Rac,Cm3,i"))] "register_operand (operands[0], HImode) || register_operand (operands[1], HImode) || (CONSTANT_P (operands[1]) @@ -706,9 +711,9 @@ ; the iscompact attribute allows the epilogue expander to know for which ; insns it should lengthen the return insn. ; N.B. operand 1 of alternative 7 expands into pcl,symbol@gotpc . -(define_insn "*movsi_insn" ; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 - [(set (match_operand:SI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h, w,w, w, w, w, w,???w, ?w, w,Rcq#q, h, w,Rcq, S, Us<,RcqRck,!*x, r,!*Rsd,!*Rcd,r,Ucm, Usd,m,???m, m,VUsc") - (match_operand:SI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1,cL,I,Crr,Clo,Chi,Cbi,?Rac,Cpc,Clb, ?Cal,Cal,?Cal,Uts,Rcq,RcqRck, Us>,Usd,Ucm, Usd, Ucd,m, w,!*Rzd,c,?Rac,Cm3, C32"))] +(define_insn "*movsi_insn" ; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 + [(set (match_operand:SI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h,w*l,w*l, w, w, w, w, ???w, ?w, w,Rcq#q, h, w*l,Rcq, S, Us<,RcqRck,!*x, r,!*Rsd,!*Rcd,r,Ucm, Usd,m,???m, m,VUsc") + (match_operand:SI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1, cL, I,Crr,Clo,Chi,Cbi,?Rac*l,Cpc,Clb, ?Cal,Cal,?Cal,Uts,Rcq,RcqRck, Us>,Usd,Ucm, Usd, Ucd,m, w,!*Rzd,c,?Rac,Cm3, C32"))] "register_operand (operands[0], SImode) || register_operand (operands[1], SImode) || (CONSTANT_P (operands[1]) @@ -5106,317 +5111,123 @@ xtr, const0_rtx); }) +;; ------------------------------------------------------------------- +;; Hardware loop +;; ------------------------------------------------------------------- + ; operand 0 is the loop count pseudo register -; operand 1 is the loop end pattern -(define_expand "doloop_begin" - [(use (match_operand 0 "register_operand" "")) - (use (match_operand 1 "" ""))] +; operand 1 is the label to jump to at the top of the loop +(define_expand "doloop_end" + [(parallel [(set (pc) + (if_then_else + (ne (match_operand 0 "" "") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_dup 0) (plus (match_dup 0) (const_int -1))) + (unspec [(const_int 0)] UNSPEC_ARC_LP) + (clobber (match_dup 2))])] "" { - /* Using the INSN_UID of the loop end pattern to identify it causes - trouble with -fcompare-debug, so allocate a debug-independent - id instead. We use negative numbers so that we can use the same - slot in doloop_end_i where we later store a CODE_LABEL_NUMBER, and - still be able to tell what kind of number this is. */ - static HOST_WIDE_INT loop_end_id = 0; - - rtx id = GEN_INT (--loop_end_id); - XEXP (XVECEXP (PATTERN (operands[1]), 0, 4), 0) = id; - emit_insn (gen_doloop_begin_i (operands[0], const0_rtx, id, - const0_rtx, const0_rtx)); - DONE; + if (GET_MODE (operands[0]) != SImode) + FAIL; + operands[2] = gen_rtx_SCRATCH (SImode); }) -; ??? can't describe the insn properly as then the optimizers try to -; hoist the SETs. -;(define_insn "doloop_begin_i" -; [(set (reg:SI LP_START) (pc)) -; (set (reg:SI LP_END) (unspec:SI [(pc)] UNSPEC_ARC_LP)) -; (use (match_operand 0 "const_int_operand" "n"))] -; "" -; "lp .L__GCC__LP%0" -;) - -; The operands of doloop_end_i are also read / written by arc_reorg with -; XVECEXP (PATTERN (lp, 0, N), so if you want to change the pattern, you -; might have to adjust arc_reorg. -; operands 0 / 2 are supplied by the expander, 1, 3 and 4 are filled in -; by arc_reorg. arc_reorg might also alter operand 0. -; -; N in XVECEXP PATTERN (lp, 0 N) -; V rtl purpose -; 0 unspec UNSPEC_ARC_LP identify pattern -; 1 clobber LP_START show LP_START is set -; 2 clobber LP_END show LP_END is set -; 3 use operand0 loop count pseudo register -; 4 use operand1 before arc_reorg: -id -; after : CODE_LABEL_NUMBER of loop top label -; 5 use operand2 INSN_UID of loop end insn -; 6 use operand3 loop setup not at start (1 above, 2 below) -; 7 use operand4 LABEL_REF of top label, if not -; immediately following -; If operand1 is still zero after arc_reorg, this is an orphaned loop -; instruction that was not at the start of the loop. -; There is no point is reloading this insn - then lp_count would still not -; be available for the loop end. -(define_insn "doloop_begin_i" - [(unspec:SI [(pc)] UNSPEC_ARC_LP) - (clobber (reg:SI LP_START)) - (clobber (reg:SI LP_END)) - (use (match_operand:SI 0 "register_operand" "l,l,????*X")) - (use (match_operand 1 "const_int_operand" "n,n,C_0")) - (use (match_operand 2 "const_int_operand" "n,n,X")) - (use (match_operand 3 "const_int_operand" "C_0,n,X")) - (use (match_operand 4 "const_int_operand" "C_0,X,X"))] +(define_insn "arc_lp" + [(unspec:SI [(match_operand:SI 0 "register_operand" "l")] + UNSPEC_ARC_LP) + (use (label_ref (match_operand 1 "" ""))) + (use (label_ref (match_operand 2 "" "")))] "" -{ - rtx_insn *scan; - int len, size = 0; - int n_insns = 0; - rtx loop_start = operands[4]; - - if (CONST_INT_P (loop_start)) - loop_start = NULL_RTX; - /* Size implications of the alignment will be taken care of by the - alignment inserted at the loop start. */ - if (LOOP_ALIGN (0) && INTVAL (operands[1])) - { - asm_fprintf (asm_out_file, "\t.p2align %d\\n", LOOP_ALIGN (0)); - arc_clear_unalign (); - } - if (!INTVAL (operands[1])) - return "; LITTLE LOST LOOP"; - if (loop_start && flag_pic) - { - /* ??? Can do better for when a scratch register - is known. But that would require extra testing. */ - return "push_s r0\;add r0,pcl,%4@pcl\;sr r0,[2]; LP_START\;add r0,pcl,.L__GCC__LP%1@pcl\;sr r0,[3]; LP_END\;pop_s r0"; - } - /* Check if the loop end is in range to be set by the lp instruction. */ - size = INTVAL (operands[3]) < 2 ? 0 : 2048; - for (scan = insn; scan && size < 2048; scan = NEXT_INSN (scan)) - { - if (!INSN_P (scan)) - continue; - if (recog_memoized (scan) == CODE_FOR_doloop_end_i - && (XEXP (XVECEXP (PATTERN (scan), 0, 4), 0) - == XEXP (XVECEXP (PATTERN (insn), 0, 4), 0))) - break; - len = get_attr_length (scan); - size += len; - } - /* Try to verify that there are at least three instruction fetches - between the loop setup and the first encounter of the loop end. */ - for (scan = NEXT_INSN (insn); scan && n_insns < 3; scan = NEXT_INSN (scan)) - { - if (!INSN_P (scan)) - continue; - if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (scan))) - scan = seq->insn (0); - if (JUMP_P (scan)) - { - if (recog_memoized (scan) != CODE_FOR_doloop_end_i) - { - n_insns += 2; - if (simplejump_p (scan)) - { - scan = as_a <rtx_insn *> (XEXP (SET_SRC (PATTERN (scan)), 0)); - continue; - } - - rtx lab = JUMP_LABEL (scan); - if (!lab) - break; - - rtx_insn *next_scan - = next_active_insn (NEXT_INSN (PREV_INSN (scan))); - if (next_scan - && recog_memoized (next_scan) != CODE_FOR_doloop_begin_i) - break; - - /* JUMP_LABEL might be simple_return instead if an insn. */ - if (!INSN_P (lab)) - { - n_insns++; - break; - } - - rtx_insn *next_lab = next_active_insn (as_a<rtx_insn *> (lab)); - if (next_lab - && recog_memoized (next_lab) != CODE_FOR_doloop_begin_i) - break; - - n_insns++; - } - break; - } - len = get_attr_length (scan); - /* Size estimation of asms assumes that each line which is nonempty - codes an insn, and that each has a long immediate. For minimum insn - count, assume merely that a nonempty asm has at least one insn. */ - if (GET_CODE (PATTERN (scan)) == ASM_INPUT - || asm_noperands (PATTERN (scan)) >= 0) - n_insns += (len != 0); - else - n_insns += (len > 4 ? 2 : (len ? 1 : 0)); - } - if (LOOP_ALIGN (0)) - { - asm_fprintf (asm_out_file, "\t.p2align %d\\n", LOOP_ALIGN (0)); - arc_clear_unalign (); - } - gcc_assert (n_insns || GET_CODE (next_nonnote_insn (insn)) == CODE_LABEL); - if (size >= 2048 || (TARGET_ARC600 && n_insns == 1) || loop_start) - { - if (flag_pic) - { - /* ??? Can do better for when a scratch register - is known. But that would require extra testing. */ - arc_clear_unalign (); - return ".p2align 2\;push_s r0\;add r0,pcl,24\;sr r0,[2]; LP_START\;add r0,pcl,.L__GCC__LP%1@pcl\;sr r0,[3]; LP_END\;pop_s r0"; - } - output_asm_insn ((size < 2048 - ? "lp .L__GCC__LP%1" : "sr .L__GCC__LP%1,[3]; LP_END"), - operands); - output_asm_insn (loop_start - ? "sr %4,[2]; LP_START" : "sr 0f,[2]; LP_START", - operands); - if (TARGET_ARC600 && n_insns < 1) - output_asm_insn ("nop", operands); - return (TARGET_ARC600 && n_insns < 3) ? "nop_s\;nop_s\;0:" : "0:"; - } - else if (TARGET_ARC600 && n_insns < 3) - { - /* At least four instructions are needed between the setting of LP_COUNT - and the loop end - but the lp instruction qualifies as one. */ - rtx_insn *prev = prev_nonnote_insn (insn); - - if (!INSN_P (prev) || dead_or_set_regno_p (prev, LP_COUNT)) - output_asm_insn ("nop", operands); - } - return "lp .L__GCC__LP%1"; -} + "lp\\t@%l2\\t; %0:@%l1->@%l2" [(set_attr "type" "loop_setup") - (set_attr_alternative "length" -; FIXME: length is usually 4, but we need branch shortening -; to get this right. -; [(if_then_else (match_test "TARGET_ARC600") (const_int 16) (const_int 4)) - [(if_then_else (match_test "flag_pic") (const_int 24) (const_int 16)) - (if_then_else (match_test "flag_pic") (const_int 28) (const_int 16)) - (const_int 0)])] - ;; ??? we should really branch shorten this insn, but then we'd - ;; need a proper label first. N.B. the end label can not only go out - ;; of range when it is far away, but also when it precedes the loop - - ;; which, unfortunately, it sometimes does, when the loop "optimizer" - ;; messes things up. -) - -; operand 0 is the loop count pseudo register -; operand 1 is the label to jump to at the top of the loop -; Use this for the ARC600 and ARC700. -; ??? ARC600 might want to check if the loop has few iteration and only a -; single insn - loop setup is expensive then. -(define_expand "doloop_end" - [(use (match_operand 0 "register_operand" "")) - (use (label_ref (match_operand 1 "" "")))] - "!TARGET_ARC601" -{ - /* We could do smaller bivs with biv widening, and wider bivs by having - a high-word counter in an outer loop - but punt on this for now. */ - if (GET_MODE (operands[0]) != SImode) - FAIL; - emit_jump_insn (gen_doloop_end_i (operands[0], operands[1], const0_rtx)); - DONE; -}) + (set_attr "length" "4")]) -(define_insn_and_split "doloop_end_i" +;; if by any chance the lp_count is not used, then use an 'r' +;; register, instead of going to memory. +(define_insn "loop_end" [(set (pc) - (if_then_else (ne (match_operand:SI 0 "shouldbe_register_operand" "+l,*c,*m") - (const_int 1)) + (if_then_else (ne (match_operand:SI 2 "nonimmediate_operand" "0,0") + (const_int 1)) (label_ref (match_operand 1 "" "")) (pc))) - (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1))) - (use (reg:SI LP_START)) - (use (reg:SI LP_END)) - (use (match_operand 2 "const_int_operand" "n,???Cn0,???X")) - (clobber (match_scratch:SI 3 "=X,X,&????r"))] + (set (match_operand:SI 0 "nonimmediate_operand" "=l!r,m") + (plus (match_dup 2) (const_int -1))) + (unspec [(const_int 0)] UNSPEC_ARC_LP) + (clobber (match_scratch:SI 3 "=X,&r"))] "" - "* -{ - rtx_insn *prev = prev_nonnote_insn (insn); - - /* If there is an immediately preceding label, we must output a nop, - lest a branch to that label will fall out of the loop. - ??? We could try to avoid this by claiming to have a delay slot if there - is a preceding label, and outputting the delay slot insn instead, if - present. - Or we could have some optimization that changes the source edge to update - the loop count and jump to the loop start instead. */ - /* For ARC600, we must also prevent jumps inside the loop and jumps where - the loop counter value is live at the target from being directly at the - loop end. Being sure that the loop counter is dead at the target is - too much hair - we can't rely on data flow information at this point - - so insert a nop for all branches. - The ARC600 also can't read the loop counter in the last insn of a loop. */ - if (LABEL_P (prev)) - output_asm_insn (\"nop%?\", operands); - return \"\\n.L__GCC__LP%2: ; loop end, start is %1\"; -}" - "&& memory_operand (operands[0], SImode)" - [(pc)] -{ - emit_move_insn (operands[3], operands[0]); - emit_jump_insn (gen_doloop_fallback_m (operands[3], operands[1], operands[0])); - DONE; -} - [(set_attr "type" "loop_end") - (set (attr "length") - (if_then_else (match_test "LABEL_P (prev_nonnote_insn (insn))") - (const_int 4) (const_int 0)))] -) + "\\t;%0 %1 %2" + [(set_attr "length" "0") + (set_attr "predicable" "no") + (set_attr "type" "loop_end")]) -; This pattern is generated by arc_reorg when there is no recognizable -; loop start. -(define_insn "*doloop_fallback" - [(set (pc) (if_then_else (ne (match_operand:SI 0 "register_operand" "+r,!w") - (const_int 1)) - (label_ref (match_operand 1 "" "")) - (pc))) - (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))] - ; avoid fooling the loop optimizer into assuming this is a special insn. - "reload_completed" - "*return get_attr_length (insn) == 8 - ? \"brne.d %0,1,%1\;sub %0,%0,1\" - : \"breq %0,1,0f\;b.d %1\;sub %0,%0,1\\n0:\";" - [(set (attr "length") - (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -256)) - (le (minus (match_dup 1) (pc)) (const_int 244))) - (const_int 8) (const_int 12))) - (set_attr "type" "brcc_no_delay_slot") - (set_attr "cond" "nocond")] -) +;; split pattern for the very slim chance when the loop register is +;; memory. +(define_split + [(set (pc) + (if_then_else (ne (match_operand:SI 0 "memory_operand") + (const_int 1)) + (label_ref (match_operand 1 "")) + (pc))) + (set (match_dup 0) (plus (match_dup 0) (const_int -1))) + (unspec [(const_int 0)] UNSPEC_ARC_LP) + (clobber (match_scratch:SI 2))] + "memory_operand (operands[0], SImode)" + [(set (match_dup 2) (match_dup 0)) + (set (match_dup 2) (plus:SI (match_dup 2) (const_int -1))) + (set (match_dup 0) (match_dup 2)) + (set (reg:CC CC_REG) (compare:CC (match_dup 2) (const_int 0))) + (set (pc) + (if_then_else (ne (reg:CC CC_REG) + (const_int 0)) + (label_ref (match_dup 1)) + (pc)))] + "") -; reload can't make output reloads for jump insns, so we have to do this by hand. -(define_insn "doloop_fallback_m" - [(set (pc) (if_then_else (ne (match_operand:SI 0 "register_operand" "+&r") - (const_int 1)) - (label_ref (match_operand 1 "" "")) - (pc))) - (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1))) - (set (match_operand:SI 2 "memory_operand" "=m") - (plus:SI (match_dup 0) (const_int -1)))] - ; avoid fooling the loop optimizer into assuming this is a special insn. - "reload_completed" - "*return get_attr_length (insn) == 12 - ? \"sub %0,%0,1\;brne.d %0,0,%1\;st%U2%V2 %0,%2\" - : \"sub %0,%0,1\;breq %0,0,0f\;b.d %1\\n0:\tst%U2%V2 %0,%2\";" - [(set (attr "length") - (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -252)) - (le (minus (match_dup 1) (pc)) (const_int 244))) - (const_int 12) (const_int 16))) - (set_attr "type" "brcc_no_delay_slot") - (set_attr "cond" "nocond")] -) +(define_insn "loop_fail" + [(set (reg:SI LP_COUNT) + (plus:SI (reg:SI LP_COUNT) (const_int -1))) + (set (reg:CC_ZN CC_REG) + (compare:CC_ZN (plus:SI (reg:SI LP_COUNT) (const_int -1)) + (const_int 0)))] + "" + "sub.f%?\\tlp_count,lp_count,1" + [(set_attr "iscompact" "false") + (set_attr "type" "compare") + (set_attr "cond" "set_zn") + (set_attr "length" "4") + (set_attr "predicable" "yes")]) + +(define_insn_and_split "dbnz" + [(set (pc) + (if_then_else + (ne (plus:SI (match_operand:SI 0 "nonimmediate_operand" "+r!l,m") + (const_int -1)) + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_dup 0) + (plus:SI (match_dup 0) + (const_int -1))) + (clobber (match_scratch:SI 2 "=X,r"))] + "TARGET_V2" + "@ + dbnz%#\\t%0,%l1 + #" + "TARGET_V2 && reload_completed && memory_operand (operands[0], SImode)" + [(set (match_dup 2) (match_dup 0)) + (set (match_dup 2) (plus:SI (match_dup 2) (const_int -1))) + (set (reg:CC CC_REG) (compare:CC (match_dup 2) (const_int 0))) + (set (match_dup 0) (match_dup 2)) + (set (pc) (if_then_else (ge (reg:CC CC_REG) + (const_int 0)) + (label_ref (match_dup 1)) + (pc)))] + "" + [(set_attr "iscompact" "false") + (set_attr "type" "loop_end") + (set_attr "length" "4,20")]) (define_expand "movmemsi" [(match_operand:BLK 0 "" "") diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt index ad2df26..d1ebd44 100644 --- a/gcc/config/arc/arc.opt +++ b/gcc/config/arc/arc.opt @@ -494,3 +494,28 @@ Specifies the registers that the processor saves on an interrupt entry and exit. mrgf-banked-regs= Target RejectNegative Joined Var(arc_deferred_options) Defer Specifies the number of registers replicated in second register bank on entry to fast interrupt. + +mlpc-width= +Target RejectNegative Joined Enum(arc_lpc) Var(arc_lpcwidth) Init(32) +Sets LP_COUNT register width. Possible values are 8, 16, 20, 24, 28, and 32. + +Enum +Name(arc_lpc) Type(int) + +EnumValue +Enum(arc_lpc) String(8) Value(8) + +EnumValue +Enum(arc_lpc) String(16) Value(16) + +EnumValue +Enum(arc_lpc) String(20) Value(20) + +EnumValue +Enum(arc_lpc) String(24) Value(24) + +EnumValue +Enum(arc_lpc) String(28) Value(28) + +EnumValue +Enum(arc_lpc) String(32) Value(32) diff --git a/gcc/config/arc/predicates.md b/gcc/config/arc/predicates.md index 3e4ff80..3dfe0ca 100644 --- a/gcc/config/arc/predicates.md +++ b/gcc/config/arc/predicates.md @@ -362,6 +362,8 @@ else if (TARGET_MUL64_SET && (REGNO (op) == 57 || REGNO(op) == 58 || REGNO(op) == 59 )) return 0; + else if (REGNO (op) == LP_COUNT) + return 1; else return dest_reg_operand (op, mode); case SUBREG : |