aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBernd Schmidt <bernds@redhat.com>2000-12-21 18:26:07 +0000
committerBernd Schmidt <bernds@gcc.gnu.org>2000-12-21 18:26:07 +0000
commit2130b7fb30f2ed6ea7b1e7326058e06d2e604e89 (patch)
tree1117103e48ca7bedfe39afe37a9ddcaad38397de
parent5f446d2172c1ca3e776b91a40127de9efa2f62d9 (diff)
downloadgcc-2130b7fb30f2ed6ea7b1e7326058e06d2e604e89.zip
gcc-2130b7fb30f2ed6ea7b1e7326058e06d2e604e89.tar.gz
gcc-2130b7fb30f2ed6ea7b1e7326058e06d2e604e89.tar.bz2
ia64 specific scheduling bits
From-SVN: r38419
-rw-r--r--gcc/ChangeLog51
-rw-r--r--gcc/Makefile.in3
-rw-r--r--gcc/config/ia64/ia64-protos.h8
-rw-r--r--gcc/config/ia64/ia64.c1590
-rw-r--r--gcc/config/ia64/ia64.h53
-rw-r--r--gcc/config/ia64/ia64.md203
-rw-r--r--gcc/rtl.h3
-rw-r--r--gcc/rtlanal.c5
8 files changed, 1674 insertions, 242 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 4ebc220..3343904 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,54 @@
+2000-12-21 Bernd Schmidt <bernds@redhat.com>
+
+ * Makefile.in (out_object_file): Depend on sched-int.h.
+ * rtl.h (single_set_1): New macro.
+ (single_set_2): Renamed from single_set_1 and extra argument added.
+ * rtlanal.c (single_set_2): Likewise.
+
+ * config/ia64/ia64-protos.h (get_bundle_name, ia64_issue_rate,
+ ia64_adjust_cost, ia64_sched_init, ia64_sched_finish,
+ ia64_sched_reorder, ia64_sched_reorder2, ia64_variable_issue):
+ Declare.
+ * config/ia64/ia64.c: Include "sched-int.h".
+ (hard_regno_rename_ok): Also disallow renaming from the various
+ reg_save_* regs.
+ (ia64_safe_itanium_requiers_unit0, ia64_safe_itanium_class,
+ ia64_safe_type, init_insn_group_barriers, group_barrier_needed_p,
+ safe_group_barrier_needed_p, fixup_errata): New static functions.
+ (rtx_needs_barrier): Handle bundle selector and cycle display
+ insns.
+ (emit_insn_group_barriers): Accept additional FILE * arg. All
+ callers changed. Rework to only generate stop bits between
+ basic blocks that haven't been inserted by scheduling.
+ (struct bundle, struct ia64_packet): New structures.
+ (NR_BUNDLES, NR_PACKETS): New macros.
+ (bundle, packets, type_names): New static arrays.
+ (ia64_final_schedule): New variable.
+ (ia64_single_set, insn_matches_slot, ia64_emit_insn_before,
+ gen_nop_type, finish_last_head, rotate_one_bundle, rotate_two_bundles,
+ cycle_end_fill_slots, packet_matches_p, get_split, find_best_insn,
+ find_best_packet, itanium_reorder, dump_current_packet, schedule_stop):
+ New static functions.
+ (ia64_issue_rate, ia64_sched_init, ia64_sched_reorder,
+ ia64_sched_finish, ia64_sched_reorder2, ia64_variable_issue): New
+ functions.
+ (ia64_reorg): Perform a final scheduling pass.
+ * config/ia64/ia64.h (CONST_COSTS): Slightly increase SYMBOL_REF costs.
+ (MAX_CONDITIONAL_EXECUTE, ADJUST_COST, ISSUE_RATE, MD_SCHED_INIT,
+ MD_SCHED_REORDER, MD_SCHED_REORDER2, MD_SCHED_FINISH,
+ MD_SCHED_VARIABLE_ISSUE): Define macros.
+ (ia64_final_schedule): Declare variable.
+ * config/ia64/ia64.md (attr itanium_class): Add some more classes.
+ (attr type): Account for them.
+ (itanium_requires_unit0): New attribute.
+ (function units): Rewrite.
+ (some splitters): Don't create scheduling barriers here.
+ (gr_spill_internal, gr_restore_internal): Don't predicate the
+ pseudo-op.
+ (nop_m, nop_i, nop_f, nop_b, nop_x, cycle_display, cycle_display_1,
+ bundle_selector): New patterns.
+ (insn_group_barrier): Now has an operand.
+
2000-12-21 DJ Delorie <dj@redhat.com>
* dwarf2out.c (simple_decl_align_in_bits): new
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index fec7c15..9639457 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1499,7 +1499,8 @@ dependence.o : dependence.c $(CONFIG_H) system.h $(RTL_H) $(TREE_H) \
$(out_object_file): $(out_file) $(CONFIG_H) $(TREE_H) $(GGC_H) \
$(RTL_H) $(REGS_H) hard-reg-set.h real.h insn-config.h conditions.h \
- insn-flags.h output.h $(INSN_ATTR_H) insn-codes.h system.h toplev.h function.h
+ insn-flags.h output.h $(INSN_ATTR_H) insn-codes.h system.h toplev.h \
+ function.h sched-int.h
$(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(out_file) $(OUTPUT_OPTION)
diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h
index fc1dff1..cb076c7 100644
--- a/gcc/config/ia64/ia64-protos.h
+++ b/gcc/config/ia64/ia64-protos.h
@@ -92,6 +92,14 @@ extern enum reg_class ia64_secondary_reload_class PARAMS((enum reg_class,
rtx));
extern void ia64_reorg PARAMS((rtx));
extern void process_for_unwind_directive PARAMS ((FILE *, rtx));
+extern const char *get_bundle_name PARAMS ((int));
+extern int ia64_issue_rate PARAMS ((void));
+extern int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
+extern void ia64_sched_init PARAMS ((FILE *, int, int));
+extern void ia64_sched_finish PARAMS ((FILE *, int));
+extern int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
+extern int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
+extern int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
#endif /* RTX_CODE */
#ifdef TREE_CODE
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index e523eef..3478883 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -42,6 +42,7 @@ Boston, MA 02111-1307, USA. */
#include "ggc.h"
#include "basic-block.h"
#include "toplev.h"
+#include "sched-int.h"
/* This is used for communication between ASM_OUTPUT_LABEL and
ASM_OUTPUT_LABELREF. */
@@ -114,7 +115,7 @@ static void fix_range PARAMS ((const char *));
static void ia64_add_gc_roots PARAMS ((void));
static void ia64_init_machine_status PARAMS ((struct function *));
static void ia64_mark_machine_status PARAMS ((struct function *));
-static void emit_insn_group_barriers PARAMS ((rtx));
+static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
static void emit_predicate_relation_info PARAMS ((void));
static int process_set PARAMS ((FILE *, rtx));
@@ -127,7 +128,6 @@ static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
tree, rtx));
static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
-
/* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
@@ -2401,6 +2401,14 @@ ia64_hard_regno_rename_ok (from, to)
|| to == current_frame_info.reg_save_ar_lc)
return 0;
+ if (from == current_frame_info.reg_fp
+ || from == current_frame_info.reg_save_b0
+ || from == current_frame_info.reg_save_pr
+ || from == current_frame_info.reg_save_ar_pfs
+ || from == current_frame_info.reg_save_ar_unat
+ || from == current_frame_info.reg_save_ar_lc)
+ return 0;
+
/* Don't use output registers outside the register frame. */
if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
return 0;
@@ -3674,6 +3682,40 @@ ia64_override_options ()
ia64_add_gc_roots ();
}
+static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
+static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
+static enum attr_type ia64_safe_type PARAMS((rtx));
+
+static enum attr_itanium_requires_unit0
+ia64_safe_itanium_requires_unit0 (insn)
+ rtx insn;
+{
+ if (recog_memoized (insn) >= 0)
+ return get_attr_itanium_requires_unit0 (insn);
+ else
+ return ITANIUM_REQUIRES_UNIT0_NO;
+}
+
+static enum attr_itanium_class
+ia64_safe_itanium_class (insn)
+ rtx insn;
+{
+ if (recog_memoized (insn) >= 0)
+ return get_attr_itanium_class (insn);
+ else
+ return ITANIUM_CLASS_UNKNOWN;
+}
+
+static enum attr_type
+ia64_safe_type (insn)
+ rtx insn;
+{
+ if (recog_memoized (insn) >= 0)
+ return get_attr_type (insn);
+ else
+ return TYPE_UNKNOWN;
+}
+
/* The following collection of routines emit instruction group stop bits as
necessary to avoid dependencies. */
@@ -3744,6 +3786,9 @@ static void rws_update PARAMS ((struct reg_write_state *, int,
static int rws_access_regno PARAMS ((int, struct reg_flags, int));
static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
+static void init_insn_group_barriers PARAMS ((void));
+static int group_barrier_needed_p PARAMS ((rtx));
+static int safe_group_barrier_needed_p PARAMS ((rtx));
/* Update *RWS for REGNO, which is being written by the current instruction,
with predicate PRED, and associated register flags in FLAGS. */
@@ -4189,6 +4234,8 @@ rtx_needs_barrier (x, flags, pred)
case 19: /* fetchadd_acq */
case 20: /* mov = ar.bsp */
case 21: /* flushrs */
+ case 22: /* bundle selector */
+ case 23: /* cycle display */
break;
case 5: /* recip_approx */
@@ -4279,6 +4326,179 @@ rtx_needs_barrier (x, flags, pred)
return need_barrier;
}
+/* Clear out the state for group_barrier_needed_p at the start of a
+ sequence of insns. */
+
+static void
+init_insn_group_barriers ()
+{
+ memset (rws_sum, 0, sizeof (rws_sum));
+}
+
+/* Cumulative info for the current instruction group. */
+struct reg_write_state rws_sum[NUM_REGS];
+
+/* Given the current state, recorded by previous calls to this function,
+ determine whether a group barrier (a stop bit) is necessary before INSN.
+ Return nonzero if so. */
+
+static int
+group_barrier_needed_p (insn)
+ rtx insn;
+{
+ rtx pat;
+ int need_barrier = 0;
+ struct reg_flags flags;
+
+ memset (&flags, 0, sizeof (flags));
+ switch (GET_CODE (insn))
+ {
+ case NOTE:
+ break;
+
+ case BARRIER:
+ /* A barrier doesn't imply an instruction group boundary. */
+ break;
+
+ case CODE_LABEL:
+ memset (rws_insn, 0, sizeof (rws_insn));
+ return 1;
+
+ case CALL_INSN:
+ flags.is_branch = 1;
+ flags.is_sibcall = SIBLING_CALL_P (insn);
+ memset (rws_insn, 0, sizeof (rws_insn));
+ need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
+ break;
+
+ case JUMP_INSN:
+ flags.is_branch = 1;
+ /* FALLTHRU */
+
+ case INSN:
+ if (GET_CODE (PATTERN (insn)) == USE
+ || GET_CODE (PATTERN (insn)) == CLOBBER)
+ /* Don't care about USE and CLOBBER "insns"---those are used to
+ indicate to the optimizer that it shouldn't get rid of
+ certain operations. */
+ break;
+
+ pat = PATTERN (insn);
+
+ /* Ug. Hack hacks hacked elsewhere. */
+ switch (recog_memoized (insn))
+ {
+ /* We play dependency tricks with the epilogue in order
+ to get proper schedules. Undo this for dv analysis. */
+ case CODE_FOR_epilogue_deallocate_stack:
+ pat = XVECEXP (pat, 0, 0);
+ break;
+
+ /* The pattern we use for br.cloop confuses the code above.
+ The second element of the vector is representative. */
+ case CODE_FOR_doloop_end_internal:
+ pat = XVECEXP (pat, 0, 1);
+ break;
+
+ /* Doesn't generate code. */
+ case CODE_FOR_pred_rel_mutex:
+ return 0;
+
+ default:
+ break;
+ }
+
+ memset (rws_insn, 0, sizeof (rws_insn));
+ need_barrier = rtx_needs_barrier (pat, flags, 0);
+
+ /* Check to see if the previous instruction was a volatile
+ asm. */
+ if (! need_barrier)
+ need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
+
+ break;
+
+ default:
+ abort ();
+ }
+ return need_barrier;
+}
+
+/* Like group_barrier_needed_p, but do not clobber the current state. */
+
+static int
+safe_group_barrier_needed_p (insn)
+ rtx insn;
+{
+ struct reg_write_state rws_saved[NUM_REGS];
+ int t;
+ memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
+ t = group_barrier_needed_p (insn);
+ memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
+ return t;
+}
+
+/* INSNS is an chain of instructions. Scan the chain, and insert stop bits
+ as necessary to eliminate dependendencies. */
+
+static void
+emit_insn_group_barriers (dump, insns)
+ FILE *dump;
+ rtx insns;
+{
+ rtx insn;
+ rtx last_label = 0;
+ int insns_since_last_label = 0;
+
+ init_insn_group_barriers ();
+
+ for (insn = insns; insn; insn = NEXT_INSN (insn))
+ {
+ if (GET_CODE (insn) == CODE_LABEL)
+ {
+ if (insns_since_last_label)
+ last_label = insn;
+ insns_since_last_label = 0;
+ }
+ else if (GET_CODE (insn) == NOTE
+ && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
+ {
+ if (insns_since_last_label)
+ last_label = insn;
+ insns_since_last_label = 0;
+ }
+ else if (GET_CODE (insn) == INSN
+ && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
+ && XINT (PATTERN (insn), 1) == 2)
+ {
+ init_insn_group_barriers ();
+ last_label = 0;
+ }
+ else if (INSN_P (insn))
+ {
+ insns_since_last_label = 1;
+
+ if (group_barrier_needed_p (insn))
+ {
+ if (last_label)
+ {
+ if (dump)
+ fprintf (dump, "Emitting stop before label %d\n",
+ INSN_UID (last_label));
+ emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
+ insn = last_label;
+ }
+ init_insn_group_barriers ();
+ last_label = 0;
+ }
+ }
+ }
+}
+
+static int errata_find_address_regs PARAMS ((rtx *, void *));
+static void errata_emit_nops PARAMS ((rtx));
+static void fixup_errata PARAMS ((void));
+
/* This structure is used to track some details about the previous insns
groups so we can determine if it may be necessary to insert NOPs to
workaround hardware errata. */
@@ -4291,20 +4511,6 @@ static struct group
/* Index into the last_group array. */
static int group_idx;
-static void emit_group_barrier_after PARAMS ((rtx));
-static int errata_find_address_regs PARAMS ((rtx *, void *));
-static void errata_emit_nops PARAMS ((rtx));
-
-/* Create a new group barrier, emit it after AFTER, and advance group_idx. */
-static void
-emit_group_barrier_after (after)
- rtx after;
-{
- emit_insn_after (gen_insn_group_barrier (), after);
- group_idx = (group_idx + 1) % 3;
- memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
-}
-
/* Called through for_each_rtx; determines if a hard register that was
conditionally set in the previous group is used as an address register.
It ensures that for_each_rtx returns 1 in that case. */
@@ -4395,194 +4601,1246 @@ errata_emit_nops (insn)
}
if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
{
- emit_insn_before (gen_insn_group_barrier (), insn);
+ emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
emit_insn_before (gen_nop (), insn);
- emit_insn_before (gen_insn_group_barrier (), insn);
+ emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
}
}
-/* INSNS is an chain of instructions. Scan the chain, and insert stop bits
- as necessary to eliminate dependendencies. */
+/* Emit extra nops if they are required to work around hardware errata. */
static void
-emit_insn_group_barriers (insns)
- rtx insns;
+fixup_errata ()
{
- rtx insn, prev_insn;
-
- memset (rws_sum, 0, sizeof (rws_sum));
+ rtx insn;
group_idx = 0;
memset (last_group, 0, sizeof last_group);
- prev_insn = 0;
- for (insn = insns; insn; insn = NEXT_INSN (insn))
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
- int need_barrier = 0;
- struct reg_flags flags;
-
+ if (INSN_P (insn) && ia64_safe_type (insn) == TYPE_S)
+ {
+ group_idx = (group_idx + 1) % 3;
+ memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
+ }
if ((TARGET_B_STEP || TARGET_A_STEP) && INSN_P (insn))
errata_emit_nops (insn);
+ }
+}
+
+/* Instruction scheduling support. */
+/* Describe one bundle. */
+
+struct bundle
+{
+ /* Zero if there's no possibility of a stop in this bundle other than
+ at the end, otherwise the position of the optional stop bit. */
+ int possible_stop;
+ /* The types of the three slots. */
+ enum attr_type t[3];
+ /* The pseudo op to be emitted into the assembler output. */
+ const char *name;
+};
- memset (&flags, 0, sizeof (flags));
- switch (GET_CODE (insn))
+#define NR_BUNDLES 10
+
+/* A list of all available bundles. */
+
+static const struct bundle bundle[NR_BUNDLES] =
+{
+ { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
+ { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
+ { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
+ { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
+#if NR_BUNDLES == 10
+ { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
+ { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
+#endif
+ { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
+ { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
+ { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
+ /* .mfi needs to occur earlier than .mlx, so that we only generate it if
+ it matches an L type insn. Otherwise we'll try to generate L type
+ nops. */
+ { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
+};
+
+/* Describe a packet of instructions. Packets consist of two bundles that
+ are visible to the hardware in one scheduling window. */
+
+struct ia64_packet
+{
+ const struct bundle *t1, *t2;
+ /* Precomputed value of the first split issue in this packet if a cycle
+ starts at its beginning. */
+ int first_split;
+ /* For convenience, the insn types are replicated here so we don't have
+ to go through T1 and T2 all the time. */
+ enum attr_type t[6];
+};
+
+/* An array containing all possible packets. */
+#define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
+static struct ia64_packet packets[NR_PACKETS];
+
+/* Map attr_type to a string with the name. */
+
+static const char *type_names[] =
+{
+ "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
+};
+
+/* Nonzero if we should insert stop bits into the schedule. */
+int ia64_final_schedule = 0;
+
+static rtx ia64_single_set PARAMS ((rtx));
+static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
+static void ia64_emit_insn_before PARAMS ((rtx, rtx));
+static rtx gen_nop_type PARAMS ((enum attr_type));
+static void finish_last_head PARAMS ((FILE *, int));
+static void rotate_one_bundle PARAMS ((FILE *));
+static void rotate_two_bundles PARAMS ((FILE *));
+static void cycle_end_fill_slots PARAMS ((FILE *));
+static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
+static int get_split PARAMS ((const struct ia64_packet *, int));
+static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
+ const struct ia64_packet *, int));
+static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
+ rtx *, enum attr_type *, int));
+static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
+static void dump_current_packet PARAMS ((FILE *));
+static void schedule_stop PARAMS ((FILE *));
+
+/* Map a bundle number to its pseudo-op. */
+
+const char *
+get_bundle_name (b)
+ int b;
+{
+ return bundle[b].name;
+}
+
+/* Compute the slot which will cause a split issue in packet P if the
+ current cycle begins at slot BEGIN. */
+
+static int
+itanium_split_issue (p, begin)
+ const struct ia64_packet *p;
+ int begin;
+{
+ int type_count[TYPE_S];
+ int i;
+ int split = 6;
+
+ if (begin < 3)
+ {
+ /* Always split before and after MMF. */
+ if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
+ return 3;
+ if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
+ return 3;
+ /* Always split after MBB and BBB. */
+ if (p->t[1] == TYPE_B)
+ return 3;
+ /* Split after first bundle in MIB BBB combination. */
+ if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
+ return 3;
+ }
+
+ memset (type_count, 0, sizeof type_count);
+ for (i = begin; i < split; i++)
+ {
+ enum attr_type t0 = p->t[i];
+ /* An MLX bundle reserves the same units as an MFI bundle. */
+ enum attr_type t = (t0 == TYPE_L ? TYPE_F
+ : t0 == TYPE_X ? TYPE_I
+ : t0);
+ int max = (t == TYPE_B ? 3 : t == TYPE_F ? 1 : 2);
+ if (type_count[t] == max)
+ return i;
+ type_count[t]++;
+ }
+ return split;
+}
+
+/* Return the maximum number of instructions a cpu can issue. */
+
+int
+ia64_issue_rate ()
+{
+ return 6;
+}
+
+/* Helper function - like single_set, but look inside COND_EXEC. */
+
+static rtx
+ia64_single_set (insn)
+ rtx insn;
+{
+ rtx x = PATTERN (insn);
+ if (GET_CODE (x) == COND_EXEC)
+ x = COND_EXEC_CODE (x);
+ if (GET_CODE (x) == SET)
+ return x;
+ return single_set_2 (insn, x);
+}
+
+/* Adjust the cost of a scheduling dependency. Return the new cost of
+ a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
+
+int
+ia64_adjust_cost (insn, link, dep_insn, cost)
+ rtx insn, link, dep_insn;
+ int cost;
+{
+ enum attr_type dep_type;
+ enum attr_itanium_class dep_class;
+ enum attr_itanium_class insn_class;
+ rtx dep_set, set, src, addr;
+
+ if (GET_CODE (PATTERN (insn)) == CLOBBER
+ || GET_CODE (PATTERN (insn)) == USE
+ || GET_CODE (PATTERN (dep_insn)) == CLOBBER
+ || GET_CODE (PATTERN (dep_insn)) == USE
+ /* @@@ Not accurate for indirect calls. */
+ || GET_CODE (insn) == CALL_INSN
+ || ia64_safe_type (insn) == TYPE_S)
+ return 0;
+
+ if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
+ || REG_NOTE_KIND (link) == REG_DEP_ANTI)
+ return 0;
+
+ dep_type = ia64_safe_type (dep_insn);
+ dep_class = ia64_safe_itanium_class (dep_insn);
+ insn_class = ia64_safe_itanium_class (insn);
+
+ /* Compares that feed a conditional branch can execute in the same
+ cycle. */
+ dep_set = ia64_single_set (dep_insn);
+ set = ia64_single_set (insn);
+
+ if (dep_type != TYPE_F
+ && dep_set
+ && GET_CODE (SET_DEST (dep_set)) == REG
+ && PR_REG (REGNO (SET_DEST (dep_set)))
+ && GET_CODE (insn) == JUMP_INSN)
+ return 0;
+
+ if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
+ {
+ /* ??? Can't find any information in the documenation about whether
+ a sequence
+ st [rx] = ra
+ ld rb = [ry]
+ splits issue. Assume it doesn't. */
+ return 0;
+ }
+
+ src = set ? SET_SRC (set) : 0;
+ addr = 0;
+ if (set && GET_CODE (SET_DEST (set)) == MEM)
+ addr = XEXP (SET_DEST (set), 0);
+ else if (set && GET_CODE (src) == MEM)
+ addr = XEXP (src, 0);
+ else if (set && GET_CODE (src) == ZERO_EXTEND
+ && GET_CODE (XEXP (src, 0)) == MEM)
+ addr = XEXP (XEXP (src, 0), 0);
+ else if (set && GET_CODE (src) == UNSPEC
+ && XVECLEN (XEXP (src, 0), 0) > 0
+ && GET_CODE (XVECEXP (src, 0, 0)) == MEM)
+ addr = XEXP (XVECEXP (src, 0, 0), 0);
+ if (addr && GET_CODE (addr) == POST_MODIFY)
+ addr = XEXP (addr, 0);
+
+ set = ia64_single_set (dep_insn);
+
+ if ((dep_class == ITANIUM_CLASS_IALU
+ || dep_class == ITANIUM_CLASS_ILOG
+ || dep_class == ITANIUM_CLASS_LD)
+ && (insn_class == ITANIUM_CLASS_LD
+ || insn_class == ITANIUM_CLASS_ST))
+ {
+ if (! addr || ! set)
+ abort ();
+ /* This isn't completely correct - an IALU that feeds an address has
+ a latency of 1 cycle if it's issued in an M slot, but 2 cycles
+ otherwise. Unfortunately there's no good way to describe this. */
+ if (reg_overlap_mentioned_p (SET_DEST (set), addr))
+ return cost + 1;
+ }
+ if ((dep_class == ITANIUM_CLASS_IALU
+ || dep_class == ITANIUM_CLASS_ILOG
+ || dep_class == ITANIUM_CLASS_LD)
+ && (insn_class == ITANIUM_CLASS_MMMUL
+ || insn_class == ITANIUM_CLASS_MMSHF
+ || insn_class == ITANIUM_CLASS_MMSHFI))
+ return 3;
+ if (dep_class == ITANIUM_CLASS_FMAC
+ && (insn_class == ITANIUM_CLASS_FMISC
+ || insn_class == ITANIUM_CLASS_FCVTFX
+ || insn_class == ITANIUM_CLASS_XMPY))
+ return 7;
+ if ((dep_class == ITANIUM_CLASS_FMAC
+ || dep_class == ITANIUM_CLASS_FMISC
+ || dep_class == ITANIUM_CLASS_FCVTFX
+ || dep_class == ITANIUM_CLASS_XMPY)
+ && insn_class == ITANIUM_CLASS_STF)
+ return 8;
+ if ((dep_class == ITANIUM_CLASS_MMMUL
+ || dep_class == ITANIUM_CLASS_MMSHF
+ || dep_class == ITANIUM_CLASS_MMSHFI)
+ && (insn_class == ITANIUM_CLASS_LD
+ || insn_class == ITANIUM_CLASS_ST
+ || insn_class == ITANIUM_CLASS_IALU
+ || insn_class == ITANIUM_CLASS_ILOG
+ || insn_class == ITANIUM_CLASS_ISHF))
+ return 4;
+
+ return cost;
+}
+
+/* Describe the current state of the Itanium pipeline. */
+static struct
+{
+ /* The first slot that is used in the current cycle. */
+ int first_slot;
+ /* The next slot to fill. */
+ int cur;
+ /* The packet we have selected for the current issue window. */
+ const struct ia64_packet *packet;
+ /* The position of the split issue that occurs due to issue width
+ limitations (6 if there's no split issue). */
+ int split;
+ /* Record data about the insns scheduled so far in the same issue
+ window. The elements up to but not including FIRST_SLOT belong
+ to the previous cycle, the ones starting with FIRST_SLOT belong
+ to the current cycle. */
+ enum attr_type types[6];
+ rtx insns[6];
+ int stopbit[6];
+ /* Nonzero if we decided to schedule a stop bit. */
+ int last_was_stop;
+} sched_data;
+
+/* Temporary arrays; they have enough elements to hold all insns that
+ can be ready at the same time while scheduling of the current block.
+ SCHED_READY can hold ready insns, SCHED_TYPES their types. */
+static rtx *sched_ready;
+static enum attr_type *sched_types;
+
+/* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
+ of packet P. */
+
+static int
+insn_matches_slot (p, itype, slot, insn)
+ const struct ia64_packet *p;
+ enum attr_type itype;
+ int slot;
+ rtx insn;
+{
+ enum attr_itanium_requires_unit0 u0;
+ enum attr_type stype = p->t[slot];
+
+ if (insn)
+ {
+ u0 = ia64_safe_itanium_requires_unit0 (insn);
+ if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
{
- case NOTE:
- /* For very small loops we can wind up with extra stop bits
- inside the loop because of not putting a stop after the
- assignment to ar.lc before the loop label. */
- /* ??? Ideally we'd do this for any register used in the first
- insn group that's been written recently. */
- if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
+ int i;
+ for (i = sched_data.first_slot; i < slot; i++)
+ if (p->t[i] == stype)
+ return 0;
+ }
+ if (GET_CODE (insn) == CALL_INSN)
+ {
+ /* Reject calls in multiway branch packets. We want to limit
+ the number of multiway branches we generate (since the branch
+ predictor is limited), and this seems to work fairly well.
+ (If we didn't do this, we'd have to add another test here to
+ force calls into the third slot of the bundle.) */
+ if (slot < 3)
{
- need_barrier = rws_access_regno (AR_LC_REGNUM, flags, 0);
- if (need_barrier)
- {
- emit_group_barrier_after (insn);
- memset (rws_sum, 0, sizeof(rws_sum));
- prev_insn = NULL_RTX;
- }
+ if (p->t[1] == TYPE_B)
+ return 0;
}
- break;
+ else
+ {
+ if (p->t[4] == TYPE_B)
+ return 0;
+ }
+ }
+ }
+
+ if (itype == stype)
+ return 1;
+ if (itype == TYPE_A)
+ return stype == TYPE_M || stype == TYPE_I;
+ return 0;
+}
+
+/* Like emit_insn_before, but skip cycle_display insns. This makes the
+ assembly output a bit prettier. */
+
+static void
+ia64_emit_insn_before (insn, before)
+ rtx insn, before;
+{
+ rtx prev = PREV_INSN (before);
+ if (prev && GET_CODE (prev) == INSN
+ && GET_CODE (PATTERN (prev)) == UNSPEC
+ && XINT (PATTERN (prev), 1) == 23)
+ before = prev;
+ emit_insn_before (insn, before);
+}
+
+/* Generate a nop insn of the given type. Note we never generate L type
+ nops. */
+
+static rtx
+gen_nop_type (t)
+ enum attr_type t;
+{
+ switch (t)
+ {
+ case TYPE_M:
+ return gen_nop_m ();
+ case TYPE_I:
+ return gen_nop_i ();
+ case TYPE_B:
+ return gen_nop_b ();
+ case TYPE_F:
+ return gen_nop_f ();
+ case TYPE_X:
+ return gen_nop_x ();
+ default:
+ abort ();
+ }
+}
- case CALL_INSN:
- flags.is_branch = 1;
- flags.is_sibcall = SIBLING_CALL_P (insn);
- memset (rws_insn, 0, sizeof (rws_insn));
- need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
+/* When rotating a bundle out of the issue window, insert a bundle selector
+ insn in front of it. DUMP is the scheduling dump file or NULL. START
+ is either 0 or 3, depending on whether we want to emit a bundle selector
+ for the first bundle or the second bundle in the current issue window.
- if (need_barrier)
+ The selector insns are emitted this late because the selected packet can
+ be changed until parts of it get rotated out. */
+
+static void
+finish_last_head (dump, start)
+ FILE *dump;
+ int start;
+{
+ const struct ia64_packet *p = sched_data.packet;
+ const struct bundle *b = start == 0 ? p->t1 : p->t2;
+ int bundle_type = b - bundle;
+ rtx insn;
+ int i;
+
+ if (! ia64_final_schedule)
+ return;
+
+ for (i = start; sched_data.insns[i] == 0; i++)
+ if (i == start + 3)
+ abort ();
+ insn = sched_data.insns[i];
+
+ if (dump)
+ fprintf (dump, "// Emitting template before %d: %s\n",
+ INSN_UID (insn), b->name);
+
+ ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
+}
+
+/* We can't schedule more insns this cycle. Fix up the scheduling state
+ and advance FIRST_SLOT and CUR.
+ We have to distribute the insns that are currently found between
+ FIRST_SLOT and CUR into the slots of the packet we have selected. So
+ far, they are stored successively in the fields starting at FIRST_SLOT;
+ now they must be moved to the correct slots.
+ DUMP is the current scheduling dump file, or NULL. */
+
+static void
+cycle_end_fill_slots (dump)
+ FILE *dump;
+{
+ const struct ia64_packet *packet = sched_data.packet;
+ int slot, i;
+ enum attr_type tmp_types[6];
+ rtx tmp_insns[6];
+
+ memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
+ memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
+
+ for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
+ {
+ enum attr_type t = tmp_types[i];
+ if (t != ia64_safe_type (tmp_insns[i]))
+ abort ();
+ while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
+ {
+ if (slot > sched_data.split)
+ abort ();
+ if (dump)
+ fprintf (dump, "// Packet needs %s, have %s\n", type_names[packet->t[slot]],
+ type_names[t]);
+ sched_data.types[slot] = packet->t[slot];
+ sched_data.insns[slot] = 0;
+ sched_data.stopbit[slot] = 0;
+ slot++;
+ }
+ /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
+ actual slot type later. */
+ sched_data.types[slot] = packet->t[slot];
+ sched_data.insns[slot] = tmp_insns[i];
+ sched_data.stopbit[slot] = 0;
+ slot++;
+ }
+
+ /* This isn't right - there's no need to pad out until the forced split;
+ the CPU will automatically split if an insn isn't ready. */
+#if 0
+ while (slot < sched_data.split)
+ {
+ sched_data.types[slot] = packet->t[slot];
+ sched_data.insns[slot] = 0;
+ sched_data.stopbit[slot] = 0;
+ slot++;
+ }
+#endif
+
+ sched_data.first_slot = sched_data.cur = slot;
+}
+
+/* Bundle rotations, as described in the Itanium optimization manual.
+ We can rotate either one or both bundles out of the issue window.
+ DUMP is the current scheduling dump file, or NULL. */
+
+static void
+rotate_one_bundle (dump)
+ FILE *dump;
+{
+ if (dump)
+ fprintf (dump, "// Rotating one bundle.\n");
+
+ finish_last_head (dump, 0);
+ if (sched_data.cur > 3)
+ {
+ sched_data.cur -= 3;
+ sched_data.first_slot -= 3;
+ memmove (sched_data.types,
+ sched_data.types + 3,
+ sched_data.cur * sizeof *sched_data.types);
+ memmove (sched_data.stopbit,
+ sched_data.stopbit + 3,
+ sched_data.cur * sizeof *sched_data.stopbit);
+ memmove (sched_data.insns,
+ sched_data.insns + 3,
+ sched_data.cur * sizeof *sched_data.insns);
+ }
+ else
+ {
+ sched_data.cur = 0;
+ sched_data.first_slot = 0;
+ }
+}
+
+static void
+rotate_two_bundles (dump)
+ FILE *dump;
+{
+ if (dump)
+ fprintf (dump, "// Rotating two bundles.\n");
+
+ if (sched_data.cur == 0)
+ return;
+
+ finish_last_head (dump, 0);
+ if (sched_data.cur > 3)
+ finish_last_head (dump, 3);
+ sched_data.cur = 0;
+ sched_data.first_slot = 0;
+}
+
+/* We're beginning a new block. Initialize data structures as necessary. */
+
+void
+ia64_sched_init (dump, sched_verbose, max_ready)
+ FILE *dump ATTRIBUTE_UNUSED;
+ int sched_verbose ATTRIBUTE_UNUSED;
+ int max_ready;
+{
+ static int initialized = 0;
+
+ if (! initialized)
+ {
+ int b1, b2, i;
+
+ initialized = 1;
+
+ for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
+ {
+ const struct bundle *t1 = bundle + b1;
+ for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
{
- /* PREV_INSN null can happen if the very first insn is a
- volatile asm. */
- if (prev_insn)
- emit_group_barrier_after (prev_insn);
- memcpy (rws_sum, rws_insn, sizeof (rws_sum));
+ const struct bundle *t2 = bundle + b2;
+
+ packets[i].t1 = t1;
+ packets[i].t2 = t2;
}
+ }
+ for (i = 0; i < NR_PACKETS; i++)
+ {
+ int j;
+ for (j = 0; j < 3; j++)
+ packets[i].t[j] = packets[i].t1->t[j];
+ for (j = 0; j < 3; j++)
+ packets[i].t[j + 3] = packets[i].t2->t[j];
+ packets[i].first_split = itanium_split_issue (packets + i, 0);
+ }
+
+ }
- /* A call must end a bundle, otherwise the assembler might pack
- it in with a following branch and then the function return
- goes to the wrong place. Do this unconditionally for
- unconditional calls, simply because it (1) looks nicer and
- (2) keeps the data structures more accurate for the insns
- following the call. */
- /* ??? A call doesn't have to end a bundle if it is followed by
- a mutex call or branch. Two mutex calls/branches can be put in
- the same bundle. */
+ init_insn_group_barriers ();
- need_barrier = 1;
- if (GET_CODE (PATTERN (insn)) == COND_EXEC)
+ memset (&sched_data, 0, sizeof sched_data);
+ sched_types = (enum attr_type *) xmalloc (max_ready
+ * sizeof (enum attr_type));
+ sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
+}
+
+/* See if the packet P can match the insns we have already scheduled. Return
+ nonzero if so. In *PSLOT, we store the first slot that is available for
+ more instructions if we choose this packet.
+ SPLIT holds the last slot we can use, there's a split issue after it so
+ scheduling beyond it would cause us to use more than one cycle. */
+
+static int
+packet_matches_p (p, split, pslot)
+ const struct ia64_packet *p;
+ int split;
+ int *pslot;
+{
+ int filled = sched_data.cur;
+ int first = sched_data.first_slot;
+ int i, slot;
+
+ /* First, check if the first of the two bundles must be a specific one (due
+ to stop bits). */
+ if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
+ return 0;
+ if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
+ return 0;
+
+ for (i = 0; i < first; i++)
+ if (! insn_matches_slot (p, sched_data.types[i], i,
+ sched_data.insns[i]))
+ return 0;
+ for (i = slot = first; i < filled; i++)
+ {
+ while (slot < split)
+ {
+ if (insn_matches_slot (p, sched_data.types[i], slot,
+ sched_data.insns[i]))
+ break;
+ slot++;
+ }
+ if (slot == split)
+ return 0;
+ slot++;
+ }
+
+ if (pslot)
+ *pslot = slot;
+ return 1;
+}
+
+/* A frontend for itanium_split_issue. For a packet P and a slot
+ number FIRST that describes the start of the current clock cycle,
+ return the slot number of the first split issue. This function
+ uses the cached number found in P if possible. */
+
+static int
+get_split (p, first)
+ const struct ia64_packet *p;
+ int first;
+{
+ if (first == 0)
+ return p->first_split;
+ return itanium_split_issue (p, first);
+}
+
+/* Given N_READY insns in the array READY, whose types are found in the
+ corresponding array TYPES, return the insn that is best suited to be
+ scheduled in slot SLOT of packet P. */
+
+static int
+find_best_insn (ready, types, n_ready, p, slot)
+ rtx *ready;
+ enum attr_type *types;
+ int n_ready;
+ const struct ia64_packet *p;
+ int slot;
+{
+ int best = -1;
+ int best_pri = 0;
+ while (n_ready-- > 0)
+ {
+ rtx insn = ready[n_ready];
+ if (! insn)
+ continue;
+ if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
+ break;
+ /* If we have equally good insns, one of which has a stricter
+ slot requirement, prefer the one with the stricter requirement. */
+ if (best >= 0 && types[n_ready] == TYPE_A)
+ continue;
+ if (insn_matches_slot (p, types[n_ready], slot, insn))
+ {
+ best = n_ready;
+ best_pri = INSN_PRIORITY (ready[best]);
+
+ /* If there's no way we could get a stricter requirement, stop
+ looking now. */
+ if (types[n_ready] != TYPE_A
+ && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
+ break;
+ break;
+ }
+ }
+ return best;
+}
+
+/* Select the best packet to use given the current scheduler state and the
+ current ready list.
+ READY is an array holding N_READY ready insns; TYPES is a corresponding
+ array that holds their types. Store the best packet in *PPACKET and the
+ number of insns that can be scheduled in the current cycle in *PBEST. */
+
+static void
+find_best_packet (pbest, ppacket, ready, types, n_ready)
+ int *pbest;
+ const struct ia64_packet **ppacket;
+ rtx *ready;
+ enum attr_type *types;
+ int n_ready;
+{
+ int first = sched_data.first_slot;
+ int best = 0;
+ int lowest_end = 6;
+ const struct ia64_packet *best_packet;
+ int i;
+
+ for (i = 0; i < NR_PACKETS; i++)
+ {
+ const struct ia64_packet *p = packets + i;
+ int slot;
+ int split = get_split (p, first);
+ int win = 0;
+ int first_slot, last_slot;
+ int b_nops = 0;
+
+ if (! packet_matches_p (p, split, &first_slot))
+ continue;
+
+ memcpy (sched_ready, ready, n_ready * sizeof (rtx));
+
+ win = 0;
+ last_slot = 6;
+ for (slot = first_slot; slot < split; slot++)
+ {
+ int insn_nr;
+
+ /* Disallow a degenerate case where the first bundle doesn't
+ contain anything but NOPs! */
+ if (first_slot == 0 && win == 0 && slot == 3)
{
- rtx next_insn = insn;
- enum attr_type type = TYPE_A;
-
- do
- next_insn = next_nonnote_insn (next_insn);
- while (next_insn
- && GET_CODE (next_insn) == INSN
- && (GET_CODE (PATTERN (next_insn)) == USE
- || GET_CODE (PATTERN (next_insn)) == CLOBBER));
-
- /* A call ends a bundle if there is a stop bit after it,
- or if it is followed by a non-B-type instruction.
- In the later case, we can elide the stop bit, and get faster
- code when the predicate is false. */
- /* ??? The proper solution for this problem is to make gcc
- explicitly bundle instructions. Then we don't need to
- emit stop bits to force the assembler to start a new
- bundle. */
-
- /* Check the instruction type if it is not a branch or call. */
- if (next_insn && GET_CODE (next_insn) == INSN)
- type = get_attr_type (next_insn);
-
- if (next_insn && GET_CODE (next_insn) != JUMP_INSN
- && GET_CODE (next_insn) != CALL_INSN
- && type != TYPE_B && type != TYPE_UNKNOWN)
- need_barrier = 0;
+ win = -1;
+ break;
}
- if (need_barrier)
+
+ insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
+ if (insn_nr >= 0)
{
- emit_group_barrier_after (insn);
- memset (rws_sum, 0, sizeof (rws_sum));
- prev_insn = NULL_RTX;
+ sched_ready[insn_nr] = 0;
+ last_slot = slot;
+ win++;
}
- else
- prev_insn = insn;
- break;
-
- case JUMP_INSN:
- flags.is_branch = 1;
- /* FALLTHRU */
-
- case INSN:
- if (GET_CODE (PATTERN (insn)) == USE)
- /* Don't care about USE "insns"---those are used to
- indicate to the optimizer that it shouldn't get rid of
- certain operations. */
- break;
- else
- {
- rtx pat = PATTERN (insn);
+ else if (p->t[slot] == TYPE_B)
+ b_nops++;
+ }
+ /* We must disallow MBB/BBB packets if any of their B slots would be
+ filled with nops. */
+ if (last_slot < 3)
+ {
+ if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
+ win = -1;
+ }
+ else
+ {
+ if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
+ win = -1;
+ }
- /* Ug. Hack hacks hacked elsewhere. */
- switch (recog_memoized (insn))
- {
- /* We play dependency tricks with the epilogue in order
- to get proper schedules. Undo this for dv analysis. */
- case CODE_FOR_epilogue_deallocate_stack:
- pat = XVECEXP (pat, 0, 0);
- break;
+ if (win > best
+ || (win == best && last_slot < lowest_end))
+ {
+ best = win;
+ lowest_end = last_slot;
+ best_packet = p;
+ }
+ }
+ *pbest = best;
+ *ppacket = best_packet;
+}
- /* The pattern we use for br.cloop confuses the code above.
- The second element of the vector is representative. */
- case CODE_FOR_doloop_end_internal:
- pat = XVECEXP (pat, 0, 1);
- break;
+/* Reorder the ready list so that the insns that can be issued in this cycle
+ are found in the correct order at the end of the list.
+ DUMP is the scheduling dump file, or NULL. READY points to the start,
+ E_READY to the end of the ready list. MAY_FAIL determines what should be
+ done if no insns can be scheduled in this cycle: if it is zero, we abort,
+ otherwise we return 0.
+ Return 1 if any insns can be scheduled in this cycle. */
+
+static int
+itanium_reorder (dump, ready, e_ready, may_fail)
+ FILE *dump;
+ rtx *ready;
+ rtx *e_ready;
+ int may_fail;
+{
+ const struct ia64_packet *best_packet;
+ int n_ready = e_ready - ready;
+ int first = sched_data.first_slot;
+ int i, best, best_split, filled;
+
+ for (i = 0; i < n_ready; i++)
+ sched_types[i] = ia64_safe_type (ready[i]);
+
+ find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
+
+ if (best == 0)
+ {
+ if (may_fail)
+ return 0;
+ abort ();
+ }
+
+ if (dump)
+ {
+ fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
+ best_packet->t1->name,
+ best_packet->t2 ? best_packet->t2->name : NULL, best);
+ }
+
+ best_split = itanium_split_issue (best_packet, first);
+ packet_matches_p (best_packet, best_split, &filled);
+
+ for (i = filled; i < best_split; i++)
+ {
+ int insn_nr;
+
+ insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
+ if (insn_nr >= 0)
+ {
+ rtx insn = ready[insn_nr];
+ memmove (ready + insn_nr, ready + insn_nr + 1,
+ (n_ready - insn_nr - 1) * sizeof (rtx));
+ memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
+ (n_ready - insn_nr - 1) * sizeof (enum attr_type));
+ ready[--n_ready] = insn;
+ }
+ }
+
+ sched_data.packet = best_packet;
+ sched_data.split = best_split;
+ return 1;
+}
+
+/* Dump information about the current scheduling state to file DUMP. */
+
+static void
+dump_current_packet (dump)
+ FILE *dump;
+{
+ int i;
+ fprintf (dump, "// %d slots filled:", sched_data.cur);
+ for (i = 0; i < sched_data.first_slot; i++)
+ {
+ rtx insn = sched_data.insns[i];
+ fprintf (dump, " %s", type_names[sched_data.types[i]]);
+ if (insn)
+ fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
+ if (sched_data.stopbit[i])
+ fprintf (dump, " ;;");
+ }
+ fprintf (dump, " :::");
+ for (i = sched_data.first_slot; i < sched_data.cur; i++)
+ {
+ rtx insn = sched_data.insns[i];
+ enum attr_type t = ia64_safe_type (insn);
+ fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
+ }
+ fprintf (dump, "\n");
+}
+
+/* Schedule a stop bit. DUMP is the current scheduling dump file, or
+ NULL. */
+
+static void
+schedule_stop (dump)
+ FILE *dump;
+{
+ const struct ia64_packet *best = sched_data.packet;
+ int i;
+ int best_stop = 6;
+
+ if (dump)
+ fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
+
+ if (sched_data.cur == 0)
+ {
+ if (dump)
+ fprintf (dump, "// At start of bundle, so nothing to do.\n");
+
+ rotate_two_bundles (NULL);
+ return;
+ }
+
+ for (i = -1; i < NR_PACKETS; i++)
+ {
+ /* This is a slight hack to give the current packet the first chance.
+ This is done to avoid e.g. switching from MIB to MBB bundles. */
+ const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
+ int split = get_split (p, sched_data.first_slot);
+ const struct bundle *compare;
+ int next, stoppos;
+
+ if (! packet_matches_p (p, split, &next))
+ continue;
+
+ compare = next > 3 ? p->t2 : p->t1;
+
+ stoppos = 3;
+ if (compare->possible_stop)
+ stoppos = compare->possible_stop;
+ if (next > 3)
+ stoppos += 3;
+
+ if (stoppos < next || stoppos >= best_stop)
+ {
+ if (compare->possible_stop == 0)
+ continue;
+ stoppos = (next > 3 ? 6 : 3);
+ }
+ if (stoppos < next || stoppos >= best_stop)
+ continue;
+
+ if (dump)
+ fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
+ best->t1->name, best->t2->name, p->t1->name, p->t2->name,
+ stoppos);
+
+ best_stop = stoppos;
+ best = p;
+ }
+
+ sched_data.packet = best;
+ cycle_end_fill_slots (dump);
+ while (sched_data.cur < best_stop)
+ {
+ sched_data.types[sched_data.cur] = best->t[sched_data.cur];
+ sched_data.insns[sched_data.cur] = 0;
+ sched_data.stopbit[sched_data.cur] = 0;
+ sched_data.cur++;
+ }
+ sched_data.stopbit[sched_data.cur - 1] = 1;
+ sched_data.first_slot = best_stop;
+
+ if (dump)
+ dump_current_packet (dump);
+}
+
+/* We are about to being issuing insns for this clock cycle.
+ Override the default sort algorithm to better slot instructions. */
+
+int
+ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, reorder_type)
+ FILE *dump ATTRIBUTE_UNUSED;
+ int sched_verbose ATTRIBUTE_UNUSED;
+ rtx *ready;
+ int *pn_ready;
+ int reorder_type;
+{
+ int n_ready = *pn_ready;
+ rtx *e_ready = ready + n_ready;
+ rtx *insnp;
+ rtx highest;
- /* Doesn't generate code. */
- case CODE_FOR_pred_rel_mutex:
- continue;
+ if (sched_verbose)
+ {
+ fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
+ dump_current_packet (dump);
+ }
+
+ /* First, move all USEs, CLOBBERs and other crud out of the way. */
+ highest = ready[n_ready - 1];
+ for (insnp = ready; insnp < e_ready; insnp++)
+ if (insnp < e_ready)
+ {
+ rtx insn = *insnp;
+ enum attr_type t = ia64_safe_type (insn);
+ if (t == TYPE_UNKNOWN)
+ {
+ highest = ready[n_ready - 1];
+ ready[n_ready - 1] = insn;
+ *insnp = highest;
+ if (group_barrier_needed_p (insn))
+ {
+ schedule_stop (sched_verbose ? dump : NULL);
+ sched_data.last_was_stop = 1;
+ }
+ return 1;
+ }
+ }
- default:
+ if (ia64_final_schedule)
+ {
+ int nr_need_stop = 0;
+
+ for (insnp = ready; insnp < e_ready; insnp++)
+ if (safe_group_barrier_needed_p (*insnp))
+ nr_need_stop++;
+
+ /* Schedule a stop bit if
+ - all insns require a stop bit, or
+ - we are starting a new cycle and _any_ insns require a stop bit.
+ The reason for the latter is that if our schedule is accurate, then
+ the additional stop won't decrease performance at this point (since
+ there's a split issue at this point anyway), but it gives us more
+ freedom when scheduling the currently ready insns. */
+ if ((reorder_type == 0 && nr_need_stop)
+ || (reorder_type == 1 && n_ready == nr_need_stop))
+ {
+ schedule_stop (sched_verbose ? dump : NULL);
+ sched_data.last_was_stop = 1;
+ if (reorder_type == 1)
+ return 0;
+ }
+ else
+ {
+ int deleted = 0;
+ insnp = e_ready;
+ /* Move down everything that needs a stop bit, preserving relative
+ order. */
+ while (insnp-- > ready + deleted)
+ while (insnp >= ready + deleted)
+ {
+ rtx insn = *insnp;
+ if (! safe_group_barrier_needed_p (insn))
break;
- }
+ memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
+ *ready = insn;
+ deleted++;
+ }
+ n_ready -= deleted;
+ ready += deleted;
+ if (deleted != nr_need_stop)
+ abort ();
+ }
+ }
- memset (rws_insn, 0, sizeof (rws_insn));
- need_barrier |= rtx_needs_barrier (pat, flags, 0);
+ if (reorder_type == 0)
+ {
+ if (sched_data.cur == 6)
+ rotate_two_bundles (sched_verbose ? dump : NULL);
+ else if (sched_data.cur >= 3)
+ rotate_one_bundle (sched_verbose ? dump : NULL);
+ sched_data.first_slot = sched_data.cur;
+ }
- /* Check to see if the previous instruction was a volatile
- asm. */
- if (! need_barrier)
- need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
+ return itanium_reorder (sched_verbose ? dump : NULL,
+ ready, e_ready, reorder_type == 1);
+}
- if (need_barrier)
- {
- /* PREV_INSN null can happen if the very first insn is a
- volatile asm. */
- if (prev_insn)
- emit_group_barrier_after (prev_insn);
- memcpy (rws_sum, rws_insn, sizeof (rws_sum));
- }
- prev_insn = insn;
+/* Like ia64_sched_reorder, but called after issuing each insn.
+ Override the default sort algorithm to better slot instructions. */
+
+int
+ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
+ FILE *dump ATTRIBUTE_UNUSED;
+ int sched_verbose ATTRIBUTE_UNUSED;
+ rtx *ready;
+ int *pn_ready;
+ int clock_var ATTRIBUTE_UNUSED;
+{
+ if (sched_data.last_was_stop)
+ return 0;
+
+ /* Detect one special case and try to optimize it.
+ If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
+ then we can get better code by transforming this to 1.MFB;; 2.MIx. */
+ if (sched_data.first_slot == 1
+ && sched_data.stopbit[0]
+ && ((sched_data.cur == 4
+ && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
+ && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
+ && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
+ || (sched_data.cur == 3
+ && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
+ && (sched_data.types[2] != TYPE_M && sched_data.types[2] != TYPE_I
+ && sched_data.types[2] != TYPE_A))))
+
+ {
+ int i, best;
+ rtx stop = PREV_INSN (sched_data.insns[1]);
+ rtx pat;
+
+ sched_data.stopbit[0] = 0;
+ sched_data.stopbit[2] = 1;
+ if (GET_CODE (stop) != INSN)
+ abort ();
+
+ pat = PATTERN (stop);
+ /* Ignore cycle displays. */
+ if (GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 23)
+ stop = PREV_INSN (stop);
+ pat = PATTERN (stop);
+ if (GET_CODE (pat) != UNSPEC_VOLATILE
+ || XINT (pat, 1) != 2
+ || INTVAL (XVECEXP (pat, 0, 0)) != 1)
+ abort ();
+ XVECEXP (pat, 0, 0) = GEN_INT (3);
+
+ sched_data.types[5] = sched_data.types[3];
+ sched_data.types[4] = sched_data.types[2];
+ sched_data.types[3] = sched_data.types[1];
+ sched_data.insns[5] = sched_data.insns[3];
+ sched_data.insns[4] = sched_data.insns[2];
+ sched_data.insns[3] = sched_data.insns[1];
+ sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
+ sched_data.cur += 2;
+ sched_data.first_slot = 3;
+ for (i = 0; i < NR_PACKETS; i++)
+ {
+ const struct ia64_packet *p = packets + i;
+ if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
+ {
+ sched_data.packet = p;
+ break;
}
- break;
+ }
+ rotate_one_bundle (sched_verbose ? dump : NULL);
- case BARRIER:
- /* A barrier doesn't imply an instruction group boundary. */
- break;
+ best = 6;
+ for (i = 0; i < NR_PACKETS; i++)
+ {
+ const struct ia64_packet *p = packets + i;
+ int split = get_split (p, sched_data.first_slot);
+ int next;
- case CODE_LABEL:
- /* Leave prev_insn alone so the barrier gets generated in front
- of the label, if one is needed. */
- break;
+ /* Disallow multiway branches here. */
+ if (p->t[1] == TYPE_B)
+ continue;
- default:
- abort ();
+ if (packet_matches_p (p, split, &next) && next < best)
+ {
+ best = next;
+ sched_data.packet = p;
+ sched_data.split = split;
+ }
}
+ if (best == 6)
+ abort ();
+ }
+
+ if (*pn_ready > 0)
+ {
+ int more = ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, 1);
+ if (more)
+ return more;
+ /* Did we schedule a stop? If so, finish this cycle. */
+ if (sched_data.cur == sched_data.first_slot)
+ return 0;
}
+
+ if (sched_verbose)
+ fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
+
+ cycle_end_fill_slots (sched_verbose ? dump : NULL);
+ if (sched_verbose)
+ dump_current_packet (dump);
+ return 0;
}
+/* We are about to issue INSN. Return the number of insns left on the
+ ready queue that can be issued this cycle. */
+
+int
+ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
+ FILE *dump;
+ int sched_verbose;
+ rtx insn;
+ int can_issue_more ATTRIBUTE_UNUSED;
+{
+ enum attr_type t = ia64_safe_type (insn);
+
+ if (sched_data.last_was_stop)
+ {
+ int t = sched_data.first_slot;
+ if (t == 0)
+ t = 3;
+ ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
+ init_insn_group_barriers ();
+ sched_data.last_was_stop = 0;
+ }
+
+ if (t == TYPE_UNKNOWN)
+ {
+ if (sched_verbose)
+ fprintf (dump, "// Ignoring type %s\n", type_names[t]);
+ return 1;
+ }
+
+ /* This is _not_ just a sanity check. group_barrier_needed_p will update
+ important state info. Don't delete this test. */
+ if (ia64_final_schedule
+ && group_barrier_needed_p (insn))
+ abort ();
+
+ sched_data.stopbit[sched_data.cur] = 0;
+ sched_data.insns[sched_data.cur] = insn;
+ sched_data.types[sched_data.cur] = t;
+
+ sched_data.cur++;
+ if (sched_verbose)
+ fprintf (dump, "// Scheduling insn %d of type %s\n",
+ INSN_UID (insn), type_names[t]);
+
+ if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
+ {
+ schedule_stop (sched_verbose ? dump : NULL);
+ sched_data.last_was_stop = 1;
+ }
+
+ return 1;
+}
+
+/* Free data allocated by ia64_sched_init. */
+
+void
+ia64_sched_finish (dump, sched_verbose)
+ FILE *dump;
+ int sched_verbose;
+{
+ if (sched_verbose)
+ fprintf (dump, "// Finishing schedule.\n");
+ rotate_two_bundles (NULL);
+ free (sched_types);
+ free (sched_ready);
+}
+
/* Emit pseudo-ops for the assembler to describe predicate relations.
At present this assumes that we only consider predicate pairs to
be mutex, and that the assembler can deduce proper values from
@@ -4660,9 +5918,17 @@ ia64_reorg (insns)
/* Make sure the CFG and global_live_at_start are correct
for emit_predicate_relation_info. */
find_basic_blocks (insns, max_reg_num (), NULL);
- life_analysis (insns, NULL, 0);
+ life_analysis (insns, NULL, PROP_DEATH_NOTES);
+
+ ia64_final_schedule = 1;
+ schedule_ebbs (rtl_dump_file);
+ ia64_final_schedule = 0;
+
+ /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
+ place as they were during scheduling. */
+ emit_insn_group_barriers (rtl_dump_file, insns);
- emit_insn_group_barriers (insns);
+ fixup_errata ();
emit_predicate_relation_info ();
}
diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h
index 3bd6618..424fa77 100644
--- a/gcc/config/ia64/ia64.h
+++ b/gcc/config/ia64/ia64.h
@@ -1849,7 +1849,7 @@ do { \
case CONST: \
case SYMBOL_REF: \
case LABEL_REF: \
- return COSTS_N_INSNS (2);
+ return COSTS_N_INSNS (3);
/* Like `CONST_COSTS' but applies to nonconstant RTL expressions. */
@@ -1916,19 +1916,6 @@ do { \
#define NO_FUNCTION_CSE
-/* A C statement (sans semicolon) to update the integer variable COST based on
- the relationship between INSN that is dependent on DEP_INSN through the
- dependence LINK. */
-
-/* ??? Investigate. */
-/* #define ADJUST_COST(INSN, LINK, DEP_INSN, COST) */
-
-/* A C statement (sans semicolon) to update the integer scheduling
- priority `INSN_PRIORITY(INSN)'. */
-
-/* ??? Investigate. */
-/* #define ADJUST_PRIORITY (INSN) */
-
/* Dividing the output into sections. */
@@ -2816,13 +2803,43 @@ do { \
BRANCH_COST+1 is the default if the machine does not use
cc0, and 1 if it does use cc0. */
/* ??? Investigate. */
-/* #define MAX_CONDITIONAL_EXECUTE */
+#define MAX_CONDITIONAL_EXECUTE 12
+
+/* A C statement (sans semicolon) to update the integer scheduling
+ priority `INSN_PRIORITY(INSN)'. */
+
+/* ??? Investigate. */
+/* #define ADJUST_PRIORITY (INSN) */
+
+/* A C statement (sans semicolon) to update the integer variable COST
+ based on the relationship between INSN that is dependent on
+ DEP_INSN through the dependence LINK. The default is to make no
+ adjustment to COST. This can be used for example to specify to
+ the scheduler that an output- or anti-dependence does not incur
+ the same cost as a data-dependence. */
+
+#define ADJUST_COST(insn,link,dep_insn,cost) \
+ (cost) = ia64_adjust_cost(insn, link, dep_insn, cost)
+
+#define ISSUE_RATE ia64_issue_rate ()
+
+#define MD_SCHED_INIT(DUMP, SCHED_VERBOSE, MAX_READY) \
+ ia64_sched_init (DUMP, SCHED_VERBOSE, MAX_READY)
+
+#define MD_SCHED_REORDER(DUMP, SCHED_VERBOSE, READY, N_READY, CLOCK, CIM) \
+ (CIM) = ia64_sched_reorder (DUMP, SCHED_VERBOSE, READY, &N_READY, 0)
+
+#define MD_SCHED_REORDER2(DUMP, SCHED_VERBOSE, READY, N_READY, CLOCK, CIM) \
+ (CIM) = ia64_sched_reorder2 (DUMP, SCHED_VERBOSE, READY, &N_READY, 1)
-/* Indicate how many instructions can be issued at the same time. */
+#define MD_SCHED_FINISH(DUMP, SCHED_VERBOSE) \
+ ia64_sched_finish (DUMP, SCHED_VERBOSE)
-/* ??? For now, we just schedule to fill bundles. */
+#define MD_SCHED_VARIABLE_ISSUE(DUMP, SCHED_VERBOSE, INSN, CAN_ISSUE_MORE) \
+ ((CAN_ISSUE_MORE) \
+ = ia64_variable_issue (DUMP, SCHED_VERBOSE, INSN, CAN_ISSUE_MORE))
-#define ISSUE_RATE 3
+extern int ia64_final_schedule;
#define IA64_UNWIND_INFO 1
#define HANDLER_SECTION fprintf (asm_out_file, "\t.personality\t__ia64_personality_v1\n\t.handlerdata\n");
diff --git a/gcc/config/ia64/ia64.md b/gcc/config/ia64/ia64.md
index a79b879..25660fe 100644
--- a/gcc/config/ia64/ia64.md
+++ b/gcc/config/ia64/ia64.md
@@ -68,6 +68,8 @@
;; 19 fetchadd_acq
;; 20 bsp_value
;; 21 flushrs
+;; 22 bundle selector
+;; 23 cycle display
;;
;; unspec_volatile:
;; 0 alloc
@@ -99,23 +101,35 @@
;; multiple instructions, patterns which emit 0 instructions, and patterns
;; which emit instruction that can go in any slot (e.g. nop).
-(define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld,fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,ld,long_i,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,st,syst_m0,syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd"
+(define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld,fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,ld,chk_s,long_i,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,st,syst_m0,syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop_b,nop_f,nop_i,nop_m,nop_x"
(const_string "unknown"))
-(define_attr "type" "unknown,A,I,M,F,B,L,S"
- (cond [(eq_attr "itanium_class" "ld,st,fld,stf,sem") (const_string "M")
+;; chk_s has an I and an M form; use type A for convenience.
+(define_attr "type" "unknown,A,I,M,F,B,L,X,S"
+ (cond [(eq_attr "itanium_class" "ld,st,fld,stf,sem,nop_m") (const_string "M")
(eq_attr "itanium_class" "rse_m,syst_m,syst_m0") (const_string "M")
(eq_attr "itanium_class" "frar_m,toar_m,frfr,tofr") (const_string "M")
- (eq_attr "itanium_class" "ialu,icmp,ilog") (const_string "A")
- (eq_attr "itanium_class" "fmisc,fmac,fcmp,xmpy,fcvtfx") (const_string "F")
+ (eq_attr "itanium_class" "chk_s,ialu,icmp,ilog") (const_string "A")
+ (eq_attr "itanium_class" "fmisc,fmac,fcmp,xmpy") (const_string "F")
+ (eq_attr "itanium_class" "fcvtfx,nop_f") (const_string "F")
(eq_attr "itanium_class" "frar_i,toar_i,frbr,tobr") (const_string "I")
(eq_attr "itanium_class" "frpr,topr,ishf,xtd,tbit") (const_string "I")
- (eq_attr "itanium_class" "mmmul,mmshf,mmshfi") (const_string "I")
- (eq_attr "itanium_class" "br,scall") (const_string "B")
+ (eq_attr "itanium_class" "mmmul,mmshf,mmshfi,nop_i") (const_string "I")
+ (eq_attr "itanium_class" "br,scall,nop_b") (const_string "B")
(eq_attr "itanium_class" "stop_bit") (const_string "S")
+ (eq_attr "itanium_class" "nop_x") (const_string "X")
(eq_attr "itanium_class" "long_i") (const_string "L")]
(const_string "unknown")))
+(define_attr "itanium_requires_unit0" "no,yes"
+ (cond [(eq_attr "itanium_class" "syst_m0,sem,frfr,rse_m") (const_string "yes")
+ (eq_attr "itanium_class" "toar_m,frar_m") (const_string "yes")
+ (eq_attr "itanium_class" "frbr,tobr,mmmul") (const_string "yes")
+ (eq_attr "itanium_class" "tbit,ishf,topr,frpr") (const_string "yes")
+ (eq_attr "itanium_class" "toar_i,frar_i") (const_string "yes")
+ (eq_attr "itanium_class" "fmisc,fcmp") (const_string "yes")]
+ (const_string "no")))
+
;; Predication. True iff this instruction can be predicated.
(define_attr "predicable" "no,yes" (const_string "yes"))
@@ -127,47 +141,70 @@
;; ::
;; ::::::::::::::::::::
-;; Each usage of a function units by a class of insns is specified with a
-;; `define_function_unit' expression, which looks like this:
-;; (define_function_unit NAME MULTIPLICITY SIMULTANEITY TEST READY-DELAY
-;; ISSUE-DELAY [CONFLICT-LIST])
-
-;; This default scheduling info seeks to pack instructions into bundles
-;; efficiently to reduce code size, so we just list how many of each
-;; instruction type can go in a bundle. ISSUE_RATE is set to 3.
-
-;; ??? Add scheduler ready-list hook (MD_SCHED_REORDER) that orders
-;; instructions, so that the next instruction can fill the next bundle slot.
-;; This really needs to know where the stop bits are though.
-
-;; ??? Use MD_SCHED_REORDER to put alloc first instead of using an unspec
-;; volatile. Use ADJUST_PRIORITY to set the priority of alloc very high to
-;; make it schedule first.
-
-;; ??? Modify the md_reorg code that emits stop bits so that instead of putting
-;; them in the last possible place, we put them in places where bundles allow
-;; them. This should reduce code size, but may decrease performance if we end
-;; up with more stop bits than the minimum we need.
-
-;; Alu instructions can execute on either the integer or memory function
-;; unit. We indicate this by defining an alu function unit, and then marking
-;; it as busy everytime we issue a integer or memory type instruction.
-
-(define_function_unit "alu" 3 1 (eq_attr "type" "A,I,M") 1 0)
-
-(define_function_unit "integer" 2 1 (eq_attr "type" "I") 1 0)
-
-(define_function_unit "memory" 3 1 (eq_attr "type" "M") 1 0)
-
-(define_function_unit "floating_point" 1 1 (eq_attr "type" "F") 1 0)
-
-(define_function_unit "branch" 3 1 (eq_attr "type" "B") 1 0)
-
-;; ??? This isn't quite right, because we can only fit two insns in a bundle
-;; when using an L type instruction. That isn't modeled currently.
-
-(define_function_unit "long_immediate" 1 1 (eq_attr "type" "L") 1 0)
-
+;; We define 6 "dummy" functional units. All the real work to decide which
+;; insn uses which unit is done by our MD_SCHED_REORDER hooks. We only
+;; have to ensure here that there are enough copies of the dummy unit so
+;; that the scheduler doesn't get confused by MD_SCHED_REORDER.
+;; Other than the 6 dummies for normal insns, we also add a single dummy unit
+;; for stop bits.
+
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "br") 0 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "scall") 0 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fcmp") 2 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fcvtfx") 7 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fld") 9 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fmac") 5 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fmisc") 5 0)
+
+;; There is only one insn `mov = ar.bsp' for frar_i:
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frar_i") 13 0)
+;; There is only ony insn `mov = ar.unat' for frar_m:
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frar_m") 6 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frbr") 2 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frfr") 2 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frpr") 2 0)
+
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ialu") 1 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "icmp") 1 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ilog") 1 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ishf") 1 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ld") 2 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "long_i") 1 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmmul") 2 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmshf") 2 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmshfi") 2 0)
+
+;; Now we have only one insn (flushrs) of such class. We assume that flushrs
+;; is the 1st syllable of the bundle after stop bit.
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "rse_m") 0 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "sem") 11 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "stf") 1 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "st") 1 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "syst_m0") 1 0)
+;; Now we use only one insn `mf'. Therfore latency time is set up to 0.
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "syst_m") 0 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tbit") 1 0)
+
+;; There is only one insn `mov ar.pfs =' for toar_i therefore we use
+;; latency time equal to 0:
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "toar_i") 0 0)
+;; There are only ony 2 insns `mov ar.ccv =' and `mov ar.unat =' for toar_m:
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "toar_m") 5 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tobr") 1 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tofr") 9 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "topr") 1 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "xmpy") 7 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "xtd") 1 0)
+
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_m") 0 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_i") 0 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_f") 0 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_b") 0 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_x") 0 0)
+
+(define_function_unit "stop_bit" 1 1 (eq_attr "itanium_class" "stop_bit") 0 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ignore") 0 0)
+(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "unknown") 0 0)
;; ::::::::::::::::::::
;; ::
@@ -1411,7 +1448,6 @@
(clobber (match_operand:DI 2 "register_operand" ""))]
"reload_completed"
[(set (match_dup 3) (ashift:DI (match_dup 1) (const_int 32)))
- (unspec_volatile [(const_int 0)] 2)
(set (zero_extract:DI (match_dup 0) (const_int 32) (const_int 0))
(lshiftrt:DI (match_dup 3) (const_int 32)))]
"operands[3] = operands[2];")
@@ -2408,9 +2444,6 @@
"#"
[(set_attr "itanium_class" "unknown")])
-;; ??? Need to emit an instruction group barrier here because this gets split
-;; after md_reorg.
-
(define_split
[(set (match_operand:DI 0 "register_operand" "")
(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "")
@@ -2422,9 +2455,7 @@
[(parallel [(set (match_dup 5) (plus:DI (mult:DI (match_dup 1) (match_dup 2))
(match_dup 3)))
(clobber (match_dup 0))])
- (unspec_volatile [(const_int 0)] 2)
(set (match_dup 0) (match_dup 5))
- (unspec_volatile [(const_int 0)] 2)
(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
"")
@@ -5122,7 +5153,10 @@
(match_operand:DI 2 "const_int_operand" "")] 1))
(clobber (match_operand:DI 3 "register_operand" ""))]
""
- ".mem.offset %2, 0\;st8.spill %0 = %1%P0"
+ "*
+{
+ return \".mem.offset %2, 0\;%,st8.spill %0 = %1%P0\";
+}"
[(set_attr "itanium_class" "st")])
;; Reads ar.unat
@@ -5140,7 +5174,10 @@
(match_operand:DI 2 "const_int_operand" "")] 2))
(use (match_operand:DI 3 "register_operand" ""))]
""
- ".mem.offset %2, 0\;ld8.fill %0 = %1%P1"
+ "*
+{
+ return \".mem.offset %2, 0\;%,ld8.fill %0 = %1%P1\";
+}"
[(set_attr "itanium_class" "ld")])
(define_insn "fr_spill"
@@ -5193,6 +5230,58 @@
"nop 0"
[(set_attr "itanium_class" "unknown")])
+(define_insn "nop_m"
+ [(const_int 1)]
+ ""
+ "nop.m 0"
+ [(set_attr "itanium_class" "nop_m")])
+
+(define_insn "nop_i"
+ [(const_int 2)]
+ ""
+ "nop.i 0"
+ [(set_attr "itanium_class" "nop_i")])
+
+(define_insn "nop_f"
+ [(const_int 3)]
+ ""
+ "nop.f 0"
+ [(set_attr "itanium_class" "nop_f")])
+
+(define_insn "nop_b"
+ [(const_int 4)]
+ ""
+ "nop.b 0"
+ [(set_attr "itanium_class" "nop_b")])
+
+(define_insn "nop_x"
+ [(const_int 5)]
+ ""
+ ""
+ [(set_attr "itanium_class" "nop_x")])
+
+(define_expand "cycle_display"
+ [(unspec [(match_operand 0 "const_int_operand" "")] 23)]
+ "ia64_final_schedule"
+ "")
+
+(define_insn "*cycle_display_1"
+ [(unspec [(match_operand 0 "const_int_operand" "")] 23)]
+ ""
+ "// cycle %0"
+ [(set_attr "itanium_class" "ignore")
+ (set_attr "predicable" "no")])
+
+(define_insn "bundle_selector"
+ [(unspec [(match_operand 0 "const_int_operand" "")] 22)]
+ ""
+ "*
+{
+ return get_bundle_name (INTVAL (operands[0]));
+}"
+ [(set_attr "itanium_class" "ignore")
+ (set_attr "predicable" "no")])
+
;; Pseudo instruction that prevents the scheduler from moving code above this
;; point.
(define_insn "blockage"
@@ -5203,7 +5292,7 @@
(set_attr "predicable" "no")])
(define_insn "insn_group_barrier"
- [(unspec_volatile [(const_int 0)] 2)]
+ [(unspec_volatile [(match_operand 0 "const_int_operand" "")] 2)]
""
";;"
[(set_attr "itanium_class" "stop_bit")
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 397ea98..e1fd687 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -1346,6 +1346,7 @@ extern void set_unique_reg_note PARAMS ((rtx, enum reg_note, rtx));
? (GET_CODE (PATTERN (I)) == SET \
? PATTERN (I) : single_set_1 (I)) \
: NULL_RTX)
+#define single_set_1(I) single_set_2 (I, PATTERN (I))
extern int rtx_unstable_p PARAMS ((rtx));
extern int rtx_varies_p PARAMS ((rtx));
@@ -1365,7 +1366,7 @@ extern int no_jumps_between_p PARAMS ((rtx, rtx));
extern int modified_in_p PARAMS ((rtx, rtx));
extern int insn_dependent_p PARAMS ((rtx, rtx));
extern int reg_set_p PARAMS ((rtx, rtx));
-extern rtx single_set_1 PARAMS ((rtx));
+extern rtx single_set_2 PARAMS ((rtx, rtx));
extern int multiple_sets PARAMS ((rtx));
extern rtx find_last_value PARAMS ((rtx, rtx *, rtx, int));
extern int refers_to_regno_p PARAMS ((unsigned int, unsigned int,
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 073f37c..00aebf4 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -860,12 +860,11 @@ insn_dependent_p_1 (x, pat, data)
will not be used, which we ignore. */
rtx
-single_set_1 (insn)
- rtx insn;
+single_set_2 (insn, pat)
+ rtx insn, pat;
{
rtx set = NULL;
int set_verified = 1;
- rtx pat = PATTERN (insn);
int i;
if (GET_CODE (pat) == PARALLEL)