diff options
Diffstat (limited to 'gcc/config/rs6000/rs6000.c')
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 945 |
1 files changed, 822 insertions, 123 deletions
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index e82b6d5..9d784ae 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -145,6 +145,9 @@ static GTY(()) bool rs6000_always_hint; /* Schedule instructions for group formation. */ static GTY(()) bool rs6000_sched_groups; +/* Align branch targets. */ +static GTY(()) bool rs6000_align_branch_targets; + /* Support for -msched-costly-dep option. */ const char *rs6000_sched_costly_dep_str; enum rs6000_dependence_cost rs6000_sched_costly_dep; @@ -235,6 +238,10 @@ static enum { int toc_initialized; char toc_label_name[10]; +/* Cached value of rs6000_variable_issue. This is cached in + rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */ +static short cached_can_issue_more; + static GTY(()) section *read_only_data_section; static GTY(()) section *private_data_section; static GTY(()) section *read_only_private_data_section; @@ -572,6 +579,21 @@ struct processor_costs power4_cost = { COSTS_N_INSNS (17), /* ddiv */ }; +/* Instruction costs on POWER6 processors. */ +static const +struct processor_costs power6_cost = { + COSTS_N_INSNS (8), /* mulsi */ + COSTS_N_INSNS (8), /* mulsi_const */ + COSTS_N_INSNS (8), /* mulsi_const9 */ + COSTS_N_INSNS (8), /* muldi */ + COSTS_N_INSNS (22), /* divsi */ + COSTS_N_INSNS (28), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (13), /* sdiv */ + COSTS_N_INSNS (16), /* ddiv */ +}; + static bool rs6000_function_ok_for_sibcall (tree, tree); static const char *rs6000_invalid_within_doloop (rtx); @@ -647,20 +669,28 @@ static void rs6000_xcoff_file_end (void); static int rs6000_variable_issue (FILE *, int, rtx, int); static bool rs6000_rtx_costs (rtx, int, int, int *); static int rs6000_adjust_cost (rtx, rtx, rtx, int); +static void rs6000_sched_init (FILE *, int, int); static bool is_microcoded_insn (rtx); -static int is_dispatch_slot_restricted (rtx); static bool is_cracked_insn (rtx); static bool is_branch_slot_insn (rtx); +static bool is_load_insn (rtx); +static bool is_store_insn (rtx); +static bool set_to_load_agen (rtx,rtx); +static bool adjacent_mem_locations (rtx,rtx); static int rs6000_adjust_priority (rtx, int); static int rs6000_issue_rate (void); static bool rs6000_is_costly_dependence (rtx, rtx, rtx, int, int); static rtx get_next_active_insn (rtx, rtx); static bool insn_terminates_group_p (rtx , enum group_termination); +static bool insn_must_be_first_in_group (rtx); +static bool insn_must_be_last_in_group (rtx); static bool is_costly_group (rtx *, rtx); static int force_new_group (int, FILE *, rtx *, rtx, bool *, int, int *); static int redefine_groups (FILE *, int, rtx, rtx); static int pad_groups (FILE *, int, rtx, rtx); static void rs6000_sched_finish (FILE *, int); +static int rs6000_sched_reorder (FILE *, int, rtx *, int *, int); +static int rs6000_sched_reorder2 (FILE *, int, rtx *, int *, int); static int rs6000_use_sched_lookahead (void); static tree rs6000_builtin_mask_for_load (void); @@ -908,8 +938,14 @@ static const char alt_reg_names[][8] = #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence +#undef TARGET_SCHED_INIT +#define TARGET_SCHED_INIT rs6000_sched_init #undef TARGET_SCHED_FINISH #define TARGET_SCHED_FINISH rs6000_sched_finish +#undef TARGET_SCHED_REORDER +#define TARGET_SCHED_REORDER rs6000_sched_reorder +#undef TARGET_SCHED_REORDER2 +#define TARGET_SCHED_REORDER2 rs6000_sched_reorder2 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead @@ -1194,9 +1230,12 @@ rs6000_override_options (const char *default_cpu) {"power5+", PROCESSOR_POWER5, POWERPC_BASE_MASK | MASK_POWERPC64 | MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND}, - {"power6", PROCESSOR_POWER5, + {"power6", PROCESSOR_POWER6, POWERPC_7400_MASK | MASK_POWERPC64 | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND}, + {"power6x", PROCESSOR_POWER6, + POWERPC_7400_MASK | MASK_POWERPC64 | MASK_MFCRF | MASK_POPCNTB + | MASK_FPRND | MASK_MFPGPR}, {"powerpc", PROCESSOR_POWERPC, POWERPC_BASE_MASK}, {"powerpc64", PROCESSOR_POWERPC64, POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64}, @@ -1223,7 +1262,7 @@ rs6000_override_options (const char *default_cpu) POWERPC_MASKS = (POWERPC_BASE_MASK | MASK_PPC_GPOPT | MASK_PPC_GFXOPT | MASK_POWERPC64 | MASK_ALTIVEC | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_MULHW - | MASK_DLMZB) + | MASK_DLMZB | MASK_MFPGPR) }; rs6000_init_hard_regno_mode_ok (); @@ -1399,9 +1438,13 @@ rs6000_override_options (const char *default_cpu) } rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4 - && rs6000_cpu != PROCESSOR_POWER5); + && rs6000_cpu != PROCESSOR_POWER5 + && rs6000_cpu != PROCESSOR_POWER6); rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5); + rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4 + || rs6000_cpu == PROCESSOR_POWER5 + || rs6000_cpu == PROCESSOR_POWER6); rs6000_sched_restricted_insns_priority = (rs6000_sched_groups ? 1 : 0); @@ -1470,7 +1513,7 @@ rs6000_override_options (const char *default_cpu) /* Set branch target alignment, if not optimizing for size. */ if (!optimize_size) { - if (rs6000_sched_groups) + if (rs6000_align_branch_targets) { if (align_functions <= 0) align_functions = 16; @@ -1569,6 +1612,10 @@ rs6000_override_options (const char *default_cpu) rs6000_cost = &power4_cost; break; + case PROCESSOR_POWER6: + rs6000_cost = &power6_cost; + break; + default: gcc_unreachable (); } @@ -16461,6 +16508,16 @@ output_function_profiler (FILE *file, int labelno) } + +/* The following variable value is the last issued insn. */ + +static rtx last_scheduled_insn; + +/* The following variable helps to balance issuing of load and + store instructions */ + +static int load_store_pendulum; + /* Power4 load update and store update instructions are cracked into a load or store and an integer insn which are executed in the same cycle. Branches have their own dispatch slot which does not count against the @@ -16472,19 +16529,34 @@ rs6000_variable_issue (FILE *stream ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, rtx insn, int more) { + last_scheduled_insn = insn; if (GET_CODE (PATTERN (insn)) == USE || GET_CODE (PATTERN (insn)) == CLOBBER) - return more; + { + cached_can_issue_more = more; + return cached_can_issue_more; + } + + if (insn_terminates_group_p (insn, current_group)) + { + cached_can_issue_more = 0; + return cached_can_issue_more; + } if (rs6000_sched_groups) { if (is_microcoded_insn (insn)) - return 0; + cached_can_issue_more = 0; else if (is_cracked_insn (insn)) - return more > 2 ? more - 2 : 0; + cached_can_issue_more = more > 2 ? more - 2 : 0; + else + cached_can_issue_more = more - 1; + + return cached_can_issue_more; } - return more - 1; + cached_can_issue_more = more - 1; + return cached_can_issue_more; } /* Adjust the cost of a scheduling dependency. Return the new cost of @@ -16493,64 +16565,285 @@ rs6000_variable_issue (FILE *stream ATTRIBUTE_UNUSED, static int rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) { + enum attr_type attr_type; + if (! recog_memoized (insn)) return 0; - if (REG_NOTE_KIND (link) != 0) - return 0; + switch (REG_NOTE_KIND (link)) + { + case REG_DEP_TRUE: + { + /* Data dependency; DEP_INSN writes a register that INSN reads + some cycles later. */ + + /* Separate a load from a narrower, dependent store. */ + if (rs6000_sched_groups + && GET_CODE (PATTERN (insn)) == SET + && GET_CODE (PATTERN (dep_insn)) == SET + && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM + && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM + && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1))) + > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0))))) + return cost + 14; + + attr_type = get_attr_type (insn); + + switch (attr_type) + { + case TYPE_JMPREG: + /* Tell the first scheduling pass about the latency between + a mtctr and bctr (and mtlr and br/blr). The first + scheduling pass will not know about this latency since + the mtctr instruction, which has the latency associated + to it, will be generated by reload. */ + return TARGET_POWER ? 5 : 4; + case TYPE_BRANCH: + /* Leave some extra cycles between a compare and its + dependent branch, to inhibit expensive mispredicts. */ + if ((rs6000_cpu_attr == CPU_PPC603 + || rs6000_cpu_attr == CPU_PPC604 + || rs6000_cpu_attr == CPU_PPC604E + || rs6000_cpu_attr == CPU_PPC620 + || rs6000_cpu_attr == CPU_PPC630 + || rs6000_cpu_attr == CPU_PPC750 + || rs6000_cpu_attr == CPU_PPC7400 + || rs6000_cpu_attr == CPU_PPC7450 + || rs6000_cpu_attr == CPU_POWER4 + || rs6000_cpu_attr == CPU_POWER5) + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0)) + + switch (get_attr_type (dep_insn)) + { + case TYPE_CMP: + case TYPE_COMPARE: + case TYPE_DELAYED_COMPARE: + case TYPE_IMUL_COMPARE: + case TYPE_LMUL_COMPARE: + case TYPE_FPCOMPARE: + case TYPE_CR_LOGICAL: + case TYPE_DELAYED_CR: + return cost + 2; + default: + break; + } + break; + + case TYPE_STORE: + case TYPE_STORE_U: + case TYPE_STORE_UX: + case TYPE_FPSTORE: + case TYPE_FPSTORE_U: + case TYPE_FPSTORE_UX: + if ((rs6000_cpu == PROCESSOR_POWER6) + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0)) + { + + if (GET_CODE (PATTERN (insn)) != SET) + /* If this happens, we have to extend this to schedule + optimally. Return default for now. */ + return cost; + + /* Adjust the cost for the case where the value written + by a fixed point operation is used as the address + gen value on a store. */ + switch (get_attr_type (dep_insn)) + { + case TYPE_LOAD: + case TYPE_LOAD_U: + case TYPE_LOAD_UX: + case TYPE_CNTLZ: + { + if (! store_data_bypass_p (dep_insn, insn)) + return 4; + break; + } + case TYPE_LOAD_EXT: + case TYPE_LOAD_EXT_U: + case TYPE_LOAD_EXT_UX: + case TYPE_VAR_SHIFT_ROTATE: + case TYPE_VAR_DELAYED_COMPARE: + { + if (! store_data_bypass_p (dep_insn, insn)) + return 6; + break; + } + case TYPE_INTEGER: + case TYPE_COMPARE: + case TYPE_FAST_COMPARE: + case TYPE_EXTS: + case TYPE_SHIFT: + case TYPE_INSERT_WORD: + case TYPE_INSERT_DWORD: + case TYPE_FPLOAD_U: + case TYPE_FPLOAD_UX: + case TYPE_STORE_U: + case TYPE_STORE_UX: + case TYPE_FPSTORE_U: + case TYPE_FPSTORE_UX: + { + if (! store_data_bypass_p (dep_insn, insn)) + return 3; + break; + } + case TYPE_IMUL: + case TYPE_IMUL2: + case TYPE_IMUL3: + case TYPE_LMUL: + case TYPE_IMUL_COMPARE: + case TYPE_LMUL_COMPARE: + { + if (! store_data_bypass_p (dep_insn, insn)) + return 17; + break; + } + case TYPE_IDIV: + { + if (! store_data_bypass_p (dep_insn, insn)) + return 45; + break; + } + case TYPE_LDIV: + { + if (! store_data_bypass_p (dep_insn, insn)) + return 57; + break; + } + default: + break; + } + } + break; + + case TYPE_LOAD: + case TYPE_LOAD_U: + case TYPE_LOAD_UX: + case TYPE_LOAD_EXT: + case TYPE_LOAD_EXT_U: + case TYPE_LOAD_EXT_UX: + if ((rs6000_cpu == PROCESSOR_POWER6) + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0)) + { + + /* Adjust the cost for the case where the value written + by a fixed point instruction is used within the address + gen portion of a subsequent load(u)(x) */ + switch (get_attr_type (dep_insn)) + { + case TYPE_LOAD: + case TYPE_LOAD_U: + case TYPE_LOAD_UX: + case TYPE_CNTLZ: + { + if (set_to_load_agen (dep_insn, insn)) + return 4; + break; + } + case TYPE_LOAD_EXT: + case TYPE_LOAD_EXT_U: + case TYPE_LOAD_EXT_UX: + case TYPE_VAR_SHIFT_ROTATE: + case TYPE_VAR_DELAYED_COMPARE: + { + if (set_to_load_agen (dep_insn, insn)) + return 6; + break; + } + case TYPE_INTEGER: + case TYPE_COMPARE: + case TYPE_FAST_COMPARE: + case TYPE_EXTS: + case TYPE_SHIFT: + case TYPE_INSERT_WORD: + case TYPE_INSERT_DWORD: + case TYPE_FPLOAD_U: + case TYPE_FPLOAD_UX: + case TYPE_STORE_U: + case TYPE_STORE_UX: + case TYPE_FPSTORE_U: + case TYPE_FPSTORE_UX: + { + if (set_to_load_agen (dep_insn, insn)) + return 3; + break; + } + case TYPE_IMUL: + case TYPE_IMUL2: + case TYPE_IMUL3: + case TYPE_LMUL: + case TYPE_IMUL_COMPARE: + case TYPE_LMUL_COMPARE: + { + if (set_to_load_agen (dep_insn, insn)) + return 17; + break; + } + case TYPE_IDIV: + { + if (set_to_load_agen (dep_insn, insn)) + return 45; + break; + } + case TYPE_LDIV: + { + if (set_to_load_agen (dep_insn, insn)) + return 57; + break; + } + default: + break; + } + } + break; + + case TYPE_FPLOAD: + if ((rs6000_cpu == PROCESSOR_POWER6) + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0) + && (get_attr_type (dep_insn) == TYPE_MFFGPR)) + return 2; + + default: + break; + } - if (REG_NOTE_KIND (link) == 0) - { - /* Data dependency; DEP_INSN writes a register that INSN reads - some cycles later. */ - - /* Separate a load from a narrower, dependent store. */ - if (rs6000_sched_groups - && GET_CODE (PATTERN (insn)) == SET - && GET_CODE (PATTERN (dep_insn)) == SET - && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM - && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM - && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1))) - > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0))))) - return cost + 14; - - switch (get_attr_type (insn)) - { - case TYPE_JMPREG: - /* Tell the first scheduling pass about the latency between - a mtctr and bctr (and mtlr and br/blr). The first - scheduling pass will not know about this latency since - the mtctr instruction, which has the latency associated - to it, will be generated by reload. */ - return TARGET_POWER ? 5 : 4; - case TYPE_BRANCH: - /* Leave some extra cycles between a compare and its - dependent branch, to inhibit expensive mispredicts. */ - if ((rs6000_cpu_attr == CPU_PPC603 - || rs6000_cpu_attr == CPU_PPC604 - || rs6000_cpu_attr == CPU_PPC604E - || rs6000_cpu_attr == CPU_PPC620 - || rs6000_cpu_attr == CPU_PPC630 - || rs6000_cpu_attr == CPU_PPC750 - || rs6000_cpu_attr == CPU_PPC7400 - || rs6000_cpu_attr == CPU_PPC7450 - || rs6000_cpu_attr == CPU_POWER4 - || rs6000_cpu_attr == CPU_POWER5) - && recog_memoized (dep_insn) - && (INSN_CODE (dep_insn) >= 0) - && (get_attr_type (dep_insn) == TYPE_CMP - || get_attr_type (dep_insn) == TYPE_COMPARE - || get_attr_type (dep_insn) == TYPE_DELAYED_COMPARE - || get_attr_type (dep_insn) == TYPE_IMUL_COMPARE - || get_attr_type (dep_insn) == TYPE_LMUL_COMPARE - || get_attr_type (dep_insn) == TYPE_FPCOMPARE - || get_attr_type (dep_insn) == TYPE_CR_LOGICAL - || get_attr_type (dep_insn) == TYPE_DELAYED_CR)) - return cost + 2; - default: - break; - } /* Fall out to return default cost. */ + } + break; + + case REG_DEP_OUTPUT: + /* Output dependency; DEP_INSN writes a register that INSN writes some + cycles later. */ + if ((rs6000_cpu == PROCESSOR_POWER6) + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0)) + { + attr_type = get_attr_type (insn); + + switch (attr_type) + { + case TYPE_FP: + if (get_attr_type (dep_insn) == TYPE_FP) + return 1; + break; + case TYPE_FPLOAD: + if (get_attr_type (dep_insn) == TYPE_MFFGPR) + return 2; + break; + default: + break; + } + } + case REG_DEP_ANTI: + /* Anti dependency; DEP_INSN reads a register that INSN writes some + cycles later. */ + return 0; + + default: + gcc_unreachable (); } return cost; @@ -16581,55 +16874,6 @@ is_microcoded_insn (rtx insn) return false; } -/* The function returns a nonzero value if INSN can be scheduled only - as the first insn in a dispatch group ("dispatch-slot restricted"). - In this case, the returned value indicates how many dispatch slots - the insn occupies (at the beginning of the group). - Return 0 otherwise. */ - -static int -is_dispatch_slot_restricted (rtx insn) -{ - enum attr_type type; - - if (!rs6000_sched_groups) - return 0; - - if (!insn - || insn == NULL_RTX - || GET_CODE (insn) == NOTE - || GET_CODE (PATTERN (insn)) == USE - || GET_CODE (PATTERN (insn)) == CLOBBER) - return 0; - - type = get_attr_type (insn); - - switch (type) - { - case TYPE_MFCR: - case TYPE_MFCRF: - case TYPE_MTCR: - case TYPE_DELAYED_CR: - case TYPE_CR_LOGICAL: - case TYPE_MTJMPR: - case TYPE_MFJMPR: - return 1; - case TYPE_IDIV: - case TYPE_LDIV: - return 2; - case TYPE_LOAD_L: - case TYPE_STORE_C: - case TYPE_ISYNC: - case TYPE_SYNC: - return 4; - default: - if (rs6000_cpu == PROCESSOR_POWER5 - && is_cracked_insn (insn)) - return 2; - return 0; - } -} - /* The function returns true if INSN is cracked into 2 instructions by the processor (and therefore occupies 2 issue slots). */ @@ -16680,6 +16924,74 @@ is_branch_slot_insn (rtx insn) return false; } +/* The function returns true if out_inst sets a value that is + used in the address generation computation of in_insn */ +static bool +set_to_load_agen (rtx out_insn, rtx in_insn) +{ + rtx out_set, in_set; + + /* For performance reasons, only handle the simple case where + both loads are a single_set. */ + out_set = single_set (out_insn); + if (out_set) + { + in_set = single_set (in_insn); + if (in_set) + return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set)); + } + + return false; +} + +/* The function returns true if the target storage location of + out_insn is adjacent to the target storage location of in_insn */ +/* Return 1 if memory locations are adjacent. */ + +static bool +adjacent_mem_locations (rtx insn1, rtx insn2) +{ + + rtx a = SET_DEST (PATTERN (insn1)); + rtx b = SET_DEST (PATTERN (insn2)); + + if ((GET_CODE (XEXP (a, 0)) == REG + || (GET_CODE (XEXP (a, 0)) == PLUS + && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT)) + && (GET_CODE (XEXP (b, 0)) == REG + || (GET_CODE (XEXP (b, 0)) == PLUS + && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT))) + { + HOST_WIDE_INT val0 = 0, val1 = 0; + rtx reg0, reg1; + int val_diff; + + if (GET_CODE (XEXP (a, 0)) == PLUS) + { + reg0 = XEXP (XEXP (a, 0), 0); + val0 = INTVAL (XEXP (XEXP (a, 0), 1)); + } + else + reg0 = XEXP (a, 0); + + if (GET_CODE (XEXP (b, 0)) == PLUS) + { + reg1 = XEXP (XEXP (b, 0), 0); + val1 = INTVAL (XEXP (XEXP (b, 0), 1)); + } + else + reg1 = XEXP (b, 0); + + val_diff = val1 - val0; + + return ((REGNO (reg0) == REGNO (reg1)) + && (val_diff == INTVAL (MEM_SIZE (a)) + || val_diff == -INTVAL (MEM_SIZE (b)))); + } + + return false; +} + /* A C statement (sans semicolon) to update the integer scheduling priority INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier, reduce the priority to execute INSN later. Do not @@ -16719,7 +17031,7 @@ rs6000_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority) } #endif - if (is_dispatch_slot_restricted (insn) + if (insn_must_be_first_in_group (insn) && reload_completed && current_sched_info->sched_max_insns_priority && rs6000_sched_restricted_insns_priority) @@ -16739,6 +17051,15 @@ rs6000_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority) return (priority + 1); } + if (rs6000_cpu == PROCESSOR_POWER6 + && ((load_store_pendulum == -2 && is_load_insn (insn)) + || (load_store_pendulum == 2 && is_store_insn (insn)))) + /* Attach highest priority to insn if the scheduler has just issued two + stores and this instruction is a load, or two loads and this instruction + is a store. Power6 wants loads and stores scheduled alternately + when possible */ + return current_sched_info->sched_max_insns_priority; + return priority; } @@ -16771,6 +17092,7 @@ rs6000_issue_rate (void) return 4; case CPU_POWER4: case CPU_POWER5: + case CPU_POWER6: return 5; default: return 1; @@ -16955,6 +17277,221 @@ get_next_active_insn (rtx insn, rtx tail) return insn; } +/* We are about to begin issuing insns for this clock cycle. */ + +static int +rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose, + rtx *ready ATTRIBUTE_UNUSED, + int *pn_ready ATTRIBUTE_UNUSED, + int clock_var ATTRIBUTE_UNUSED) +{ + if (sched_verbose) + fprintf (dump, "// rs6000_sched_reorder :\n"); + + if (rs6000_cpu == PROCESSOR_POWER6) + load_store_pendulum = 0; + + return rs6000_issue_rate (); +} + +/* Like rs6000_sched_reorder, but called after issuing each insn. */ + +static int +rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready, + int *pn_ready, int clock_var ATTRIBUTE_UNUSED) +{ + if (sched_verbose) + fprintf (dump, "// rs6000_sched_reorder2 :\n"); + + /* For Power6, we need to handle some special cases to try and keep the + store queue from overflowing and triggering expensive flushes. + + This code monitors how load and store instructions are being issued + and skews the ready list one way or the other to increase the likelihood + that a desired instruction is issued at the proper time. + + A couple of things are done. First, we maintain a "load_store_pendulum" + to track the current state of load/store issue. + + - If the pendulum is at zero, then no loads or stores have been + issued in the current cycle so we do nothing. + + - If the pendulum is 1, then a single load has been issued in this + cycle and we attempt to locate another load in the ready list to + issue with it. + + - If the pedulum is -2, then two stores have already been + issued in this cycle, so we increase the priority of the first load + in the ready list to increase it's likelihood of being chosen first + in the next cycle. + + - If the pendulum is -1, then a single store has been issued in this + cycle and we attempt to locate another store in the ready list to + issue with it, preferring a store to an adjacent memory location to + facilitate store pairing in the store queue. + + - If the pendulum is 2, then two loads have already been + issued in this cycle, so we increase the priority of the first store + in the ready list to increase it's likelihood of being chosen first + in the next cycle. + + - If the pendulum < -2 or > 2, then do nothing. + + Note: This code covers the most common scenarios. There exist non + load/store instructions which make use of the LSU and which + would need to be accounted for to strictly model the behavior + of the machine. Those instructions are currently unaccounted + for to help minimize compile time overhead of this code. + */ + if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn) + { + int pos; + int i; + rtx tmp; + + if (is_store_insn (last_scheduled_insn)) + /* Issuing a store, swing the load_store_pendulum to the left */ + load_store_pendulum--; + else if (is_load_insn (last_scheduled_insn)) + /* Issuing a load, swing the load_store_pendulum to the right */ + load_store_pendulum++; + else + return cached_can_issue_more; + + /* If the pendulum is balanced, or there is only one instruction on + the ready list, then all is well, so return. */ + if ((load_store_pendulum == 0) || (*pn_ready <= 1)) + return cached_can_issue_more; + + if (load_store_pendulum == 1) + { + /* A load has been issued in this cycle. Scan the ready list + for another load to issue with it */ + pos = *pn_ready-1; + + while (pos >= 0) + { + if (is_load_insn (ready[pos])) + { + /* Found a load. Move it to the head of the ready list, + and adjust it's priority so that it is more likely to + stay there */ + tmp = ready[pos]; + for (i=pos; i<*pn_ready-1; i++) + ready[i] = ready[i + 1]; + ready[*pn_ready-1] = tmp; + if INSN_PRIORITY_KNOWN (tmp) + INSN_PRIORITY (tmp)++; + break; + } + pos--; + } + } + else if (load_store_pendulum == -2) + { + /* Two stores have been issued in this cycle. Increase the + priority of the first load in the ready list to favor it for + issuing in the next cycle. */ + pos = *pn_ready-1; + + while (pos >= 0) + { + if (is_load_insn (ready[pos]) + && INSN_PRIORITY_KNOWN (ready[pos])) + { + INSN_PRIORITY (ready[pos])++; + + /* Adjust the pendulum to account for the fact that a load + was found and increased in priority. This is to prevent + increasing the priority of multiple loads */ + load_store_pendulum--; + + break; + } + pos--; + } + } + else if (load_store_pendulum == -1) + { + /* A store has been issued in this cycle. Scan the ready list for + another store to issue with it, preferring a store to an adjacent + memory location */ + int first_store_pos = -1; + + pos = *pn_ready-1; + + while (pos >= 0) + { + if (is_store_insn (ready[pos])) + { + /* Maintain the index of the first store found on the + list */ + if (first_store_pos == -1) + first_store_pos = pos; + + if (is_store_insn (last_scheduled_insn) + && adjacent_mem_locations (last_scheduled_insn,ready[pos])) + { + /* Found an adjacent store. Move it to the head of the + ready list, and adjust it's priority so that it is + more likely to stay there */ + tmp = ready[pos]; + for (i=pos; i<*pn_ready-1; i++) + ready[i] = ready[i + 1]; + ready[*pn_ready-1] = tmp; + if INSN_PRIORITY_KNOWN (tmp) + INSN_PRIORITY (tmp)++; + first_store_pos = -1; + + break; + }; + } + pos--; + } + + if (first_store_pos >= 0) + { + /* An adjacent store wasn't found, but a non-adjacent store was, + so move the non-adjacent store to the front of the ready + list, and adjust its priority so that it is more likely to + stay there. */ + tmp = ready[first_store_pos]; + for (i=first_store_pos; i<*pn_ready-1; i++) + ready[i] = ready[i + 1]; + ready[*pn_ready-1] = tmp; + if INSN_PRIORITY_KNOWN (tmp) + INSN_PRIORITY (tmp)++; + } + } + else if (load_store_pendulum == 2) + { + /* Two loads have been issued in this cycle. Increase the priority + of the first store in the ready list to favor it for issuing in + the next cycle. */ + pos = *pn_ready-1; + + while (pos >= 0) + { + if (is_store_insn (ready[pos]) + && INSN_PRIORITY_KNOWN (ready[pos])) + { + INSN_PRIORITY (ready[pos])++; + + /* Adjust the pendulum to account for the fact that a store + was found and increased in priority. This is to prevent + increasing the priority of multiple stores */ + load_store_pendulum++; + + break; + } + pos--; + } + } + } + + return cached_can_issue_more; +} + /* Return whether the presence of INSN causes a dispatch group termination of group WHICH_GROUP. @@ -16971,28 +17508,179 @@ get_next_active_insn (rtx insn, rtx tail) static bool insn_terminates_group_p (rtx insn, enum group_termination which_group) { - enum attr_type type; + bool first, last; if (! insn) return false; - type = get_attr_type (insn); + first = insn_must_be_first_in_group (insn); + last = insn_must_be_last_in_group (insn); - if (is_microcoded_insn (insn)) + if (first && last) return true; if (which_group == current_group) - { - if (is_branch_slot_insn (insn)) - return true; - return false; - } + return last; else if (which_group == previous_group) + return first; + + return false; +} + + +static bool +insn_must_be_first_in_group (rtx insn) +{ + enum attr_type type; + + if (!insn + || insn == NULL_RTX + || GET_CODE (insn) == NOTE + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return false; + + switch (rs6000_cpu) { - if (is_dispatch_slot_restricted (insn)) - return true; - return false; + case PROCESSOR_POWER5: + if (is_cracked_insn (insn)) + return true; + case PROCESSOR_POWER4: + if (is_microcoded_insn (insn)) + return true; + + if (!rs6000_sched_groups) + return false; + + type = get_attr_type (insn); + + switch (type) + { + case TYPE_MFCR: + case TYPE_MFCRF: + case TYPE_MTCR: + case TYPE_DELAYED_CR: + case TYPE_CR_LOGICAL: + case TYPE_MTJMPR: + case TYPE_MFJMPR: + case TYPE_IDIV: + case TYPE_LDIV: + case TYPE_LOAD_L: + case TYPE_STORE_C: + case TYPE_ISYNC: + case TYPE_SYNC: + return true; + default: + break; + } + break; + case PROCESSOR_POWER6: + type = get_attr_type (insn); + + switch (type) + { + case TYPE_INSERT_DWORD: + case TYPE_EXTS: + case TYPE_CNTLZ: + case TYPE_SHIFT: + case TYPE_VAR_SHIFT_ROTATE: + case TYPE_TRAP: + case TYPE_IMUL: + case TYPE_IMUL2: + case TYPE_IMUL3: + case TYPE_LMUL: + case TYPE_IDIV: + case TYPE_INSERT_WORD: + case TYPE_DELAYED_COMPARE: + case TYPE_IMUL_COMPARE: + case TYPE_LMUL_COMPARE: + case TYPE_FPCOMPARE: + case TYPE_MFCR: + case TYPE_MTCR: + case TYPE_MFJMPR: + case TYPE_MTJMPR: + case TYPE_ISYNC: + case TYPE_SYNC: + case TYPE_LOAD_L: + case TYPE_STORE_C: + case TYPE_LOAD_U: + case TYPE_LOAD_UX: + case TYPE_LOAD_EXT_UX: + case TYPE_STORE_U: + case TYPE_STORE_UX: + case TYPE_FPLOAD_U: + case TYPE_FPLOAD_UX: + case TYPE_FPSTORE_U: + case TYPE_FPSTORE_UX: + return true; + default: + break; + } + break; + default: + break; + } + + return false; +} + +static bool +insn_must_be_last_in_group (rtx insn) +{ + enum attr_type type; + + if (!insn + || insn == NULL_RTX + || GET_CODE (insn) == NOTE + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return false; + + switch (rs6000_cpu) { + case PROCESSOR_POWER4: + case PROCESSOR_POWER5: + if (is_microcoded_insn (insn)) + return true; + + if (is_branch_slot_insn (insn)) + return true; + + break; + case PROCESSOR_POWER6: + type = get_attr_type (insn); + + switch (type) + { + case TYPE_EXTS: + case TYPE_CNTLZ: + case TYPE_SHIFT: + case TYPE_VAR_SHIFT_ROTATE: + case TYPE_TRAP: + case TYPE_IMUL: + case TYPE_IMUL2: + case TYPE_IMUL3: + case TYPE_LMUL: + case TYPE_IDIV: + case TYPE_DELAYED_COMPARE: + case TYPE_IMUL_COMPARE: + case TYPE_LMUL_COMPARE: + case TYPE_FPCOMPARE: + case TYPE_MFCR: + case TYPE_MTCR: + case TYPE_MFJMPR: + case TYPE_MTJMPR: + case TYPE_ISYNC: + case TYPE_SYNC: + case TYPE_LOAD_L: + case TYPE_STORE_C: + return true; + default: + break; } + break; + default: + break; + } return false; } @@ -17317,6 +18005,17 @@ pad_groups (FILE *dump, int sched_verbose, rtx prev_head_insn, rtx tail) return group_count; } +/* We're beginning a new block. Initialize data structures as necessary. */ + +static void +rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED, + int sched_verbose ATTRIBUTE_UNUSED, + int max_ready ATTRIBUTE_UNUSED) +{ + last_scheduled_insn = NULL_RTX; + load_store_pendulum = 0; +} + /* The following function is called at the end of scheduling BB. After reload, it inserts nops at insn group bundling. */ |