diff options
author | Dorit Naishlos <dorit@il.ibm.com> | 2003-10-20 15:36:19 +0000 |
---|---|---|
committer | Dorit Nuzman <dorit@gcc.gnu.org> | 2003-10-20 15:36:19 +0000 |
commit | cbe26ab89a7101f6e39d5e40ba0b5d50c27affd9 (patch) | |
tree | af6c71aa4ff6785fe2bd30618d1b7b973850084d /gcc | |
parent | 8d36c4999a10bf225ae5edf44a53f2cae2051922 (diff) | |
download | gcc-cbe26ab89a7101f6e39d5e40ba0b5d50c27affd9.zip gcc-cbe26ab89a7101f6e39d5e40ba0b5d50c27affd9.tar.gz gcc-cbe26ab89a7101f6e39d5e40ba0b5d50c27affd9.tar.bz2 |
rs6000.h: (rs6000_sched_insert_nops): support new flag -minsert-sched-nops.
* config/rs6000/rs6000.h: (rs6000_sched_insert_nops):
support new flag -minsert-sched-nops.
(DEFAULT_SCHED_FINISH_NOP_INSERTION_SCHEME): Define.
* config/rs6000/rs6000.c: (rs6000_sched_insert_nops):
support new flag -minsert-sched-nops.
(is_cracked_insn, is_microcoded_insn): New functions.
(rs6000_sched_finish): New function.
(rs6000_issue_rate): Return 5 for power4.
(get_next_active_insn, insn_terminates_group_p): New
functions.
(is_costly_group, force_new_group): New functions.
(redefine_groups, pad_groups): New functions.
(rs6000_variable_issue): Use new functions.
* doc/invoke.texi (-minsert-sched-nops): Document new
option.
From-SVN: r72707
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 18 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 559 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.h | 26 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 15 |
4 files changed, 596 insertions, 22 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 90f3af7..46406b5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2003-10-20 Dorit Naishlos <dorit@il.ibm.com> + + * config/rs6000/rs6000.h: (rs6000_sched_insert_nops): + support new flag -minsert-sched-nops. + (DEFAULT_SCHED_FINISH_NOP_INSERTION_SCHEME): Define. + * config/rs6000/rs6000.c: (rs6000_sched_insert_nops): + support new flag -minsert-sched-nops. + (is_cracked_insn, is_microcoded_insn): New functions. + (rs6000_sched_finish): New function. + (rs6000_issue_rate): Return 5 for power4. + (get_next_active_insn, insn_terminates_group_p): New + functions. + (is_costly_group, force_new_group): New functions. + (redefine_groups, pad_groups): New functions. + (rs6000_variable_issue): Use new functions. + * doc/invoke.texi (-minsert-sched-nops): Document new + option. + 2003-10-20 David S. Miller <davem@redhat.com> * config/sparc/sparc.md (type attribute): Add new insn types diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 1639fea..1680775 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -90,6 +90,10 @@ int rs6000_sched_restricted_insns_priority; const char *rs6000_sched_costly_dep_str; enum rs6000_dependence_cost rs6000_sched_costly_dep; +/* Support for -minsert-sched-nops option. */ +const char *rs6000_sched_insert_nops_str; +enum rs6000_nop_insertion rs6000_sched_insert_nops; + /* Size of long double */ const char *rs6000_long_double_size_string; int rs6000_long_double_type_size; @@ -279,10 +283,20 @@ static int rs6000_use_dfa_pipeline_interface (void); static int rs6000_variable_issue (FILE *, int, rtx, int); static bool rs6000_rtx_costs (rtx, int, int, int *); static int rs6000_adjust_cost (rtx, rtx, rtx, int); +static bool is_microcoded_insn (rtx); static int is_dispatch_slot_restricted (rtx); +static bool is_cracked_insn (rtx); +static bool is_branch_slot_insn (rtx); static int rs6000_adjust_priority (rtx, int); static int rs6000_issue_rate (void); static bool rs6000_is_costly_dependence (rtx, rtx, rtx, int, int); +static rtx get_next_active_insn (rtx, rtx); +static bool insn_terminates_group_p (rtx , enum group_termination); +static bool is_costly_group (rtx *, rtx); +static int force_new_group (int, FILE *, rtx *, rtx, bool *, int, int *); +static int redefine_groups (FILE *, int, rtx, rtx); +static int pad_groups (FILE *, int, rtx, rtx); +static void rs6000_sched_finish (FILE *, int); static int rs6000_use_sched_lookahead (void); static void rs6000_init_builtins (void); @@ -477,6 +491,8 @@ static const char alt_reg_names[][8] = #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence +#undef TARGET_SCHED_FINISH +#define TARGET_SCHED_FINISH rs6000_sched_finish #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead @@ -866,7 +882,7 @@ rs6000_override_options (const char *default_cpu) rs6000_default_long_calls = (base[0] != 'n'); } - /* Handle -mprioritize-restrcted-insns option. */ + /* Handle -mprioritize-restricted-insns option. */ rs6000_sched_restricted_insns_priority = DEFAULT_RESTRICTED_INSNS_PRIORITY; if (rs6000_sched_restricted_insns_priority_str) rs6000_sched_restricted_insns_priority = @@ -884,7 +900,22 @@ rs6000_override_options (const char *default_cpu) rs6000_sched_costly_dep = true_store_to_load_dep_costly; else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load")) rs6000_sched_costly_dep = store_to_load_dep_costly; - else rs6000_sched_costly_dep = atoi (rs6000_sched_costly_dep_str); + else + rs6000_sched_costly_dep = atoi (rs6000_sched_costly_dep_str); + } + + /* Handle -minsert-sched-nops option. */ + rs6000_sched_insert_nops = DEFAULT_SCHED_FINISH_NOP_INSERTION_SCHEME; + if (rs6000_sched_insert_nops_str) + { + if (! strcmp (rs6000_sched_insert_nops_str, "no")) + rs6000_sched_insert_nops = sched_finish_none; + else if (! strcmp (rs6000_sched_insert_nops_str, "pad")) + rs6000_sched_insert_nops = sched_finish_pad_groups; + else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact")) + rs6000_sched_insert_nops = sched_finish_regroup_exact; + else + rs6000_sched_insert_nops = atoi (rs6000_sched_insert_nops_str); } #ifdef TARGET_REGNAMES @@ -13241,20 +13272,10 @@ rs6000_variable_issue (FILE *stream ATTRIBUTE_UNUSED, if (rs6000_cpu == PROCESSOR_POWER4) { - enum attr_type type = get_attr_type (insn); - if (type == TYPE_LOAD_EXT_U || type == TYPE_LOAD_EXT_UX - || type == TYPE_LOAD_UX || type == TYPE_STORE_UX - || type == TYPE_MFCR) - return 0; - else if (type == TYPE_LOAD_U || type == TYPE_STORE_U - || type == TYPE_FPLOAD_U || type == TYPE_FPSTORE_U - || type == TYPE_FPLOAD_UX || type == TYPE_FPSTORE_UX - || type == TYPE_LOAD_EXT || type == TYPE_DELAYED_CR - || type == TYPE_COMPARE || type == TYPE_DELAYED_COMPARE - || type == TYPE_IMUL_COMPARE || type == TYPE_LMUL_COMPARE - || type == TYPE_IDIV || type == TYPE_LDIV - || type == TYPE_INSERT_WORD) - return more > 2 ? more - 2 : 0; + if (is_microcoded_insn (insn)) + return 0; + else if (is_cracked_insn (insn)) + return more > 2 ? more - 2 : 0; } return more - 1; @@ -13318,13 +13339,38 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn ATTRIBUTE_UNUSED, return cost; } +/* The function returns a true if INSN is microcoded. + Return false ptherwise. */ + +static bool +is_microcoded_insn (rtx insn) +{ + if (!insn || !INSN_P (insn) + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return false; + + if (rs6000_cpu == PROCESSOR_POWER4) + { + enum attr_type type = get_attr_type (insn); + if (type == TYPE_LOAD_EXT_U + || type == TYPE_LOAD_EXT_UX + || type == TYPE_LOAD_UX + || type == TYPE_STORE_UX + || type == TYPE_MFCR) + return true; + } + + return false; +} + /* The function returns a non-zero value if INSN can be scheduled only - as the first insn in a dispatch group ("dispatch-slot restricted"). - In this case, the returned value indicates how many dispatch slots - the insn occupies (at the beginning of the group). + as the first insn in a dispatch group ("dispatch-slot restricted"). + In this case, the returned value indicates how many dispatch slots + the insn occupies (at the beginning of the group). Return 0 otherwise. */ -static int +static int is_dispatch_slot_restricted (rtx insn) { enum attr_type type; @@ -13358,6 +13404,55 @@ is_dispatch_slot_restricted (rtx insn) } } +/* The function returns true if INSN is cracked into 2 instructions + by the processor (and therefore occupies 2 issue slots). */ + +static bool +is_cracked_insn (rtx insn) +{ + if (!insn || !INSN_P (insn) + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return false; + + if (rs6000_cpu == PROCESSOR_POWER4) + { + enum attr_type type = get_attr_type (insn); + if (type == TYPE_LOAD_U || type == TYPE_STORE_U + || type == TYPE_FPLOAD_U || type == TYPE_FPSTORE_U + || type == TYPE_FPLOAD_UX || type == TYPE_FPSTORE_UX + || type == TYPE_LOAD_EXT || type == TYPE_DELAYED_CR + || type == TYPE_COMPARE || type == TYPE_DELAYED_COMPARE + || type == TYPE_IMUL_COMPARE || type == TYPE_LMUL_COMPARE + || type == TYPE_IDIV || type == TYPE_LDIV + || type == TYPE_INSERT_WORD) + return true; + } + + return false; +} + +/* The function returns true if INSN can be issued only from + the branch slot. */ + +static bool +is_branch_slot_insn (rtx insn) +{ + if (!insn || !INSN_P (insn) + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return false; + + if (rs6000_cpu == PROCESSOR_POWER4) + { + enum attr_type type = get_attr_type (insn); + if (type == TYPE_BRANCH || type == TYPE_JMPREG) + return true; + return false; + } + + return false; +} /* A C statement (sans semicolon) to update the integer scheduling priority INSN_PRIORITY (INSN). Increase the priority to execute the @@ -13446,8 +13541,9 @@ rs6000_issue_rate (void) case CPU_PPC604E: case CPU_PPC620: case CPU_PPC630: - case CPU_POWER4: return 4; + case CPU_POWER4: + return 5; default: return 1; } @@ -13602,7 +13698,428 @@ rs6000_is_costly_dependence (rtx insn, rtx next, rtx link, int cost, int distanc return false; } +/* Return the next insn after INSN that is found before TAIL is reached, + skipping any "non-active" insns - insns that will not actually occupy + an issue slot. Return NULL_RTX if such an insn is not found. */ + +static rtx +get_next_active_insn (rtx insn, rtx tail) +{ + rtx next_insn; + + if (!insn || insn == tail) + return NULL_RTX; + + next_insn = NEXT_INSN (insn); + + while (next_insn + && next_insn != tail + && (GET_CODE(next_insn) == NOTE + || GET_CODE (PATTERN (next_insn)) == USE + || GET_CODE (PATTERN (next_insn)) == CLOBBER)) + { + next_insn = NEXT_INSN (next_insn); + } + + if (!next_insn || next_insn == tail) + return NULL_RTX; + + return next_insn; +} + +/* Return whether the presence of INSN causes a dispatch group terminatation + of group WHICH_GROUP. + + If WHICH_GROUP == current_group, this function will return true if INSN + causes the termination of the current group (i.e, the dispatch group to + which INSN belongs). This means that INSN will be the last insn in the + group it belongs to. + + If WHICH_GROUP == previous_group, this function will return true if INSN + causes the termination of the previous group (i.e, the dispatch group that + precedes the group to which INSN belongs). This means that INSN will be + the first insn in the group it belongs to). */ + +static bool +insn_terminates_group_p (rtx insn, enum group_termination which_group) +{ + enum attr_type type; + + if (! insn) + return false; + type = get_attr_type (insn); + + if (is_microcoded_insn (insn)) + return true; + + if (which_group == current_group) + { + if (is_branch_slot_insn (insn)) + return true; + return false; + } + else if (which_group == previous_group) + { + if (is_dispatch_slot_restricted (insn)) + return true; + return false; + } + + return false; +} + +/* Return true if it is recommended to keep NEXT_INSN "far" (in a seperate + dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */ + +static bool +is_costly_group (rtx *group_insns, rtx next_insn) +{ + int i; + rtx link; + int cost; + int issue_rate = rs6000_issue_rate (); + + for (i = 0; i < issue_rate; i++) + { + rtx insn = group_insns[i]; + if (!insn) + continue; + for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1)) + { + rtx next = XEXP (link, 0); + if (next == next_insn) + { + cost = insn_cost (insn, link, next_insn); + if (rs6000_is_costly_dependence (insn, next_insn, link, cost, 0)) + return true; + } + } + } + + return false; +} + +/* Utility of the function redefine_groups. + Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS + in the same dispatch group. If so, insert nops before NEXT_INSN, in order + to keep it "far" (in a separate group) from GROUP_INSNS, following + one of the following schemes, depending on the value of the flag + -minsert_sched_nops = X: + (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed + in order to force NEXT_INSN into a seperate group. + (2) X < sched_finish_regroup_exact: insert exactly X nops. + GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop + insertion (has a group just ended, how many vacant issue slots remain in the + last group, and how many dispatch groups were encountered so far). */ + +static int +force_new_group (int sched_verbose, FILE *dump, rtx *group_insns, rtx next_insn, + bool *group_end, int can_issue_more, int *group_count) +{ + rtx nop; + bool force; + int issue_rate = rs6000_issue_rate (); + bool end = *group_end; + int i; + + if (next_insn == NULL_RTX) + return can_issue_more; + + if (rs6000_sched_insert_nops > sched_finish_regroup_exact) + return can_issue_more; + + force = is_costly_group (group_insns, next_insn); + if (!force) + return can_issue_more; + + if (sched_verbose > 6) + fprintf (dump,"force: group count = %d, can_issue_more = %d\n", + *group_count ,can_issue_more); + + if (rs6000_sched_insert_nops == sched_finish_regroup_exact) + { + if (*group_end) + can_issue_more = 0; + + /* Since only a branch can be issued in the last issue_slot, it is + sufficient to insert 'can_issue_more - 1' nops if next_insn is not + a branch. If next_insn is a branch, we insert 'can_issue_more' nops; + in this case the last nop will start a new group and the branch will be + forced to the new group. */ + if (can_issue_more && !is_branch_slot_insn (next_insn)) + can_issue_more--; + + while (can_issue_more > 0) + { + nop = gen_nop(); + emit_insn_before (nop, next_insn); + can_issue_more--; + } + + *group_end = true; + return 0; + } + + if (rs6000_sched_insert_nops < sched_finish_regroup_exact) + { + int n_nops = rs6000_sched_insert_nops; + + /* Nops can't be issued from the branch slot, so the effective + issue_rate for nops is 'issue_rate - 1'. */ + if (can_issue_more == 0) + can_issue_more = issue_rate; + can_issue_more--; + if (can_issue_more == 0) + { + can_issue_more = issue_rate - 1; + (*group_count)++; + end = true; + for (i = 0; i < issue_rate; i++) + { + group_insns[i] = 0; + } + } + + while (n_nops > 0) + { + nop = gen_nop (); + emit_insn_before (nop, next_insn); + if (can_issue_more == issue_rate - 1) /* new group begins */ + end = false; + can_issue_more--; + if (can_issue_more == 0) + { + can_issue_more = issue_rate - 1; + (*group_count)++; + end = true; + for (i = 0; i < issue_rate; i++) + { + group_insns[i] = 0; + } + } + n_nops--; + } + + /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */ + can_issue_more++; + + *group_end = /* Is next_insn going to start a new group? */ + (end + || (can_issue_more == 1 && !is_branch_slot_insn (next_insn)) + || (can_issue_more <= 2 && is_cracked_insn (next_insn)) + || (can_issue_more < issue_rate && + insn_terminates_group_p (next_insn, previous_group))); + if (*group_end && end) + (*group_count)--; + + if (sched_verbose > 6) + fprintf (dump, "done force: group count = %d, can_issue_more = %d\n", + *group_count, can_issue_more); + return can_issue_more; + } + + return can_issue_more; +} + +/* This function tries to synch the dispatch groups that the compiler "sees" + with the dispatch groups that the processor dispatcher is expected to + form in practice. It tries to achieve this synchronization by forcing the + estimated processor grouping on the compiler (as opposed to the function + 'pad_goups' which tries to force the scheduler's grouping on the processor). + + The function scans the insn sequence between PREV_HEAD_INSN and TAIL and + examines the (estimated) dispatch groups that will be formed by the processor + dispatcher. It marks these group boundaries to reflect the estimated + processor grouping, overriding the grouping that the scheduler had marked. + Depending on the value of the flag '-minsert-sched-nops' this function can + force certain insns into separate groups or force a certain distance between + them by inserting nops, for example, if there exists a "costly dependence" + between the insns. + + The function estimates the group boundaries that the processor will form as + folllows: It keeps track of how many vacant issue slots are available after + each insn. A subsequent insn will start a new group if one of the following + 4 cases applies: + - no more vacant issue slots remain in the current dispatch group. + - only the last issue slot, which is the branch slot, is vacant, but the next + insn is not a branch. + - only the last 2 or less issue slots, including the branch slot, are vacant, + which means that a cracked insn (which occupies two issue slots) can't be + issued in this group. + - less than 'issue_rate' slots are vacant, and the next insn always needs to + start a new group. */ + +static int +redefine_groups (FILE *dump, int sched_verbose, rtx prev_head_insn, rtx tail) +{ + rtx insn, next_insn; + int issue_rate; + int can_issue_more; + int slot, i; + bool group_end; + int group_count = 0; + rtx *group_insns; + + /* Initialize. */ + issue_rate = rs6000_issue_rate (); + group_insns = alloca (issue_rate * sizeof (rtx)); + for (i = 0; i < issue_rate; i++) + { + group_insns[i] = 0; + } + can_issue_more = issue_rate; + slot = 0; + insn = get_next_active_insn (prev_head_insn, tail); + group_end = false; + + while (insn != NULL_RTX) + { + slot = (issue_rate - can_issue_more); + group_insns[slot] = insn; + can_issue_more = + rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more); + if (insn_terminates_group_p (insn, current_group)) + can_issue_more = 0; + + next_insn = get_next_active_insn (insn, tail); + if (next_insn == NULL_RTX) + return group_count + 1; + + group_end = /* Is next_insn going to start a new group? */ + (can_issue_more == 0 + || (can_issue_more == 1 && !is_branch_slot_insn (next_insn)) + || (can_issue_more <= 2 && is_cracked_insn (next_insn)) + || (can_issue_more < issue_rate && + insn_terminates_group_p (next_insn, previous_group))); + + can_issue_more = force_new_group (sched_verbose, dump, group_insns, + next_insn, &group_end, can_issue_more, &group_count); + + if (group_end) + { + group_count++; + can_issue_more = 0; + for (i = 0; i < issue_rate; i++) + { + group_insns[i] = 0; + } + } + + if (GET_MODE (next_insn) == TImode && can_issue_more) + PUT_MODE(next_insn, VOIDmode); + else if (!can_issue_more && GET_MODE (next_insn) != TImode) + PUT_MODE (next_insn, TImode); + + insn = next_insn; + if (can_issue_more == 0) + can_issue_more = issue_rate; + } /* while */ + + return group_count; +} + +/* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the + dispatch group boundaries that the scheduler had marked. Pad with nops + any dispatch groups which have vacant issue slots, in order to force the + scheduler's grouping on the processor dispatcher. The function + returns the number of dispatch groups found. */ + +static int +pad_groups (FILE *dump, int sched_verbose, rtx prev_head_insn, rtx tail) +{ + rtx insn, next_insn; + rtx nop; + int issue_rate; + int can_issue_more; + int group_end; + int group_count = 0; + + /* Initialize issue_rate. */ + issue_rate = rs6000_issue_rate (); + can_issue_more = issue_rate; + + insn = get_next_active_insn (prev_head_insn, tail); + next_insn = get_next_active_insn (insn, tail); + + while (insn != NULL_RTX) + { + can_issue_more = + rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more); + + group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode); + + if (next_insn == NULL_RTX) + break; + + if (group_end) + { + /* If the scheduler had marked group termination at this location + (between insn and next_indn), and neither insn nor next_insn will + force group termination, pad the group with nops to force group + termination. */ + if (can_issue_more + && (rs6000_sched_insert_nops == sched_finish_pad_groups) + && !insn_terminates_group_p (insn, current_group) + && !insn_terminates_group_p (next_insn, previous_group)) + { + if (!is_branch_slot_insn(next_insn)) + can_issue_more--; + + while (can_issue_more) + { + nop = gen_nop (); + emit_insn_before (nop, next_insn); + can_issue_more--; + } + } + + can_issue_more = issue_rate; + group_count++; + } + + insn = next_insn; + next_insn = get_next_active_insn (insn, tail); + } + + return group_count; +} + +/* The following function is called at the end of scheduling BB. + After reload, it inserts nops at insn group bundling. */ + +static void +rs6000_sched_finish (dump, sched_verbose) + FILE *dump; + int sched_verbose; +{ + int n_groups; + + if (sched_verbose) + fprintf (dump, "=== Finishing schedule.\n"); + + if (reload_completed && rs6000_cpu == PROCESSOR_POWER4) + { + if (rs6000_sched_insert_nops == sched_finish_none) + return; + + if (rs6000_sched_insert_nops == sched_finish_pad_groups) + n_groups = pad_groups (dump, sched_verbose, + current_sched_info->prev_head, + current_sched_info->next_tail); + else + n_groups = redefine_groups (dump, sched_verbose, + current_sched_info->prev_head, + current_sched_info->next_tail); + + if (sched_verbose >= 6) + { + fprintf (dump, "ngroups = %d\n", n_groups); + print_rtl (dump, current_sched_info->prev_head); + fprintf (dump, "Done finish_sched\n"); + } + } +} /* Length in units of the trampoline for entering a nested function. */ diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 3aa2de3..8ab42b1 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -386,6 +386,21 @@ enum rs6000_dependence_cost store_to_load_dep_costly }; +/* Types of nop insertion schemes in sched target hook sched_finish. */ +enum rs6000_nop_insertion + { + sched_finish_regroup_exact = 1000, + sched_finish_pad_groups, + sched_finish_none + }; + +/* Dispatch group termination caused by an insn. */ +enum group_termination + { + current_group, + previous_group + }; + /* This is meant to be overridden in target specific files. */ #define SUBTARGET_OPTIONS @@ -413,7 +428,9 @@ enum rs6000_dependence_cost N_("Avoid all range limits on call instructions"), 0}, \ {"no-longcall", &rs6000_longcall_switch, "", 0}, \ {"sched-costly-dep=", &rs6000_sched_costly_dep_str, \ - N_("determine which dependences between insns are considered costly"), 0}, \ + N_("Determine which dependences between insns are considered costly"), 0}, \ + {"insert-sched-nops=", &rs6000_sched_insert_nops_str, \ + N_("Specify which post scheduling nop insertion scheme to apply"), 0}, \ {"align-", &rs6000_alignment_string, \ N_("Specify alignment of structure fields default/natural"), 0}, \ {"prioritize-restricted-insns=", &rs6000_sched_restricted_insns_priority_str, \ @@ -475,6 +492,8 @@ extern const char *rs6000_sched_restricted_insns_priority_str; extern int rs6000_sched_restricted_insns_priority; extern const char *rs6000_sched_costly_dep_str; extern enum rs6000_dependence_cost rs6000_sched_costly_dep; +extern const char *rs6000_sched_insert_nops_str; +extern enum rs6000_nop_insertion rs6000_sched_insert_nops; /* Alignment options for fields in structures for sub-targets following AIX-like ABI. @@ -501,6 +520,11 @@ extern enum rs6000_dependence_cost rs6000_sched_costly_dep; /* Define if the target has restricted dispatch slot instructions. */ #define DEFAULT_RESTRICTED_INSNS_PRIORITY (rs6000_cpu == PROCESSOR_POWER4 ? 1 : 0) +/* Set a default value for post scheduling nop insertion scheme + (used by taget hook sched_finish). */ +#define DEFAULT_SCHED_FINISH_NOP_INSERTION_SCHEME \ + (rs6000_cpu == PROCESSOR_POWER4 ? sched_finish_regroup_exact : sched_finish_none) + /* Define TARGET_MFCRF if the target assembler supports the optional field operand for mfcr and the target processor supports the instruction. */ diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index c99af2c..0689af7 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -435,6 +435,7 @@ in the following sections. -mdynamic-no-pic @gol -mprioritize-restricted-insns=@var{priority} @gol -msched-costly-dep=@var{dependence_type} @gol +-minsert-sched-nops=@var{scheme} @gol -mcall-sysv -mcall-netbsd @gol -maix-struct-return -msvr4-struct-return @gol -mabi=altivec -mabi=no-altivec @gol @@ -7590,6 +7591,20 @@ by the target during instruction scheduling. The argument @var{store_to_load}: any dependence from store to load is costly, @var{number}: any dependence which latency >= @var{number} is costly. +@item -minsert-sched-nops=@var{scheme} +@opindex minsert-sched-nops +This option controls which nop insertion scheme will be used during +the second scheduling pass. The argument @var{scheme} takes one of the +following values: +@var{no}: Don't insert nops. +@var{pad}: Pad with nops any dispatch group which has vacant issue slots, +according to the scheduler's grouping. +@var{regroup_exact}: Insert nops to force costly dependent insns into +separate groups. Insert exactly as many nops as needed to force an insn +to a new group, according to the estimatied processor grouping. +@var{number}: Insert nops to force costly dependent insns into +separate groups. Insert @var{number} nops to force an insn to a new group. + @item -mcall-sysv @opindex mcall-sysv On System V.4 and embedded PowerPC systems compile code using calling |