aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorAndreas Krebbel <krebbel1@de.ibm.com>2009-08-20 09:21:13 +0000
committerAndreas Krebbel <krebbel@gcc.gnu.org>2009-08-20 09:21:13 +0000
commitb0f86a7e6ba69a2638b00efa758aec522046760a (patch)
treeafda1be14ded3ba00c068699a37d39589fa35754 /gcc
parentf1149235142d434a76ceec242944c85c9749d3d4 (diff)
downloadgcc-b0f86a7e6ba69a2638b00efa758aec522046760a.zip
gcc-b0f86a7e6ba69a2638b00efa758aec522046760a.tar.gz
gcc-b0f86a7e6ba69a2638b00efa758aec522046760a.tar.bz2
s390.c (Z10_PREDICT_DISTANCE): New macro.
2009-08-20 Andreas Krebbel <krebbel1@de.ibm.com> * config/s390/s390.c (Z10_PREDICT_DISTANCE): New macro. (s390_z10_fix_long_loop_prediction): New function. (s390_z10_optimize_cmp): INSN walk moved to callee - s390_reorg. (s390_reorg): Walk over the INSNs and invoke s390_z10_fix_long_loop_prediction and s390_z10_optimize_cmp. From-SVN: r150955
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/config/s390/s390.c231
-rw-r--r--gcc/config/s390/s390.md58
3 files changed, 220 insertions, 77 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index f35ed54..76a3fe4 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,13 @@
2009-08-20 Andreas Krebbel <krebbel1@de.ibm.com>
+ * config/s390/s390.c (Z10_PREDICT_DISTANCE): New macro.
+ (s390_z10_fix_long_loop_prediction): New function.
+ (s390_z10_optimize_cmp): INSN walk moved to callee - s390_reorg.
+ (s390_reorg): Walk over the INSNs and invoke
+ s390_z10_fix_long_loop_prediction and s390_z10_optimize_cmp.
+
+2009-08-20 Andreas Krebbel <krebbel1@de.ibm.com>
+
* config/s390/s390.md ("*brx_stage1_<GPR:mode>", "*brxg_64bit",
"*brx_64bit", "*brx_31bit"): New patterns.
* config/s390/s390.c ('E'): New output modifier.
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 1431bfd..47b939c 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -345,6 +345,10 @@ struct GTY(()) machine_function
#define REGNO_PAIR_OK(REGNO, MODE) \
(HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
+/* That's the read ahead of the dynamic branch prediction unit in
+ bytes on a z10 CPU. */
+#define Z10_PREDICT_DISTANCE 384
+
static enum machine_mode
s390_libgcc_cmp_return_mode (void)
{
@@ -9661,6 +9665,66 @@ s390_optimize_prologue (void)
}
}
+/* On z10 the dynamic branch prediction must see the backward jump in
+ a window of 384 bytes. If not it falls back to the static
+ prediction. This function rearranges the loop backward branch in a
+ way which makes the static prediction always correct. The function
+ returns true if it added an instruction. */
+static bool
+s390_z10_fix_long_loop_prediction (rtx insn)
+{
+ rtx set = single_set (insn);
+ rtx code_label, label_ref, new_label;
+ rtx uncond_jump;
+ rtx cur_insn;
+ rtx tmp;
+ int distance;
+
+ /* This will exclude branch on count and branch on index patterns
+ since these are correctly statically predicted. */
+ if (!set
+ || SET_DEST (set) != pc_rtx
+ || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
+ return false;
+
+ label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
+ XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
+
+ gcc_assert (GET_CODE (label_ref) == LABEL_REF);
+
+ code_label = XEXP (label_ref, 0);
+
+ if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
+ || INSN_ADDRESSES (INSN_UID (insn)) == -1
+ || (INSN_ADDRESSES (INSN_UID (insn))
+ - INSN_ADDRESSES (INSN_UID (code_label)) < Z10_PREDICT_DISTANCE))
+ return false;
+
+ for (distance = 0, cur_insn = PREV_INSN (insn);
+ distance < Z10_PREDICT_DISTANCE - 6;
+ distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
+ if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
+ return false;
+
+ new_label = gen_label_rtx ();
+ uncond_jump = emit_jump_insn_after (
+ gen_rtx_SET (VOIDmode, pc_rtx,
+ gen_rtx_LABEL_REF (VOIDmode, code_label)),
+ insn);
+ emit_label_after (new_label, uncond_jump);
+
+ tmp = XEXP (SET_SRC (set), 1);
+ XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
+ XEXP (SET_SRC (set), 2) = tmp;
+ INSN_CODE (insn) = -1;
+
+ XEXP (label_ref, 0) = new_label;
+ JUMP_LABEL (insn) = new_label;
+ JUMP_LABEL (uncond_jump) = code_label;
+
+ return true;
+}
+
/* Returns 1 if INSN reads the value of REG for purposes not related
to addressing of memory, and 0 otherwise. */
static int
@@ -9743,97 +9807,87 @@ s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx insn)
if that register's value is delivered via a bypass, then the
pipeline recycles, thereby causing significant performance decline.
This function locates such situations and exchanges the two
- operands of the compare. */
-static void
-s390_z10_optimize_cmp (void)
+ operands of the compare. The function return true whenever it
+ added an insn. */
+static bool
+s390_z10_optimize_cmp (rtx insn)
{
- rtx insn, prev_insn, next_insn;
- int added_NOPs = 0;
+ rtx prev_insn, next_insn;
+ bool insn_added_p = false;
+ rtx cond, *op0, *op1;
- for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ if (GET_CODE (PATTERN (insn)) == PARALLEL)
{
- rtx cond, *op0, *op1;
+ /* Handle compare and branch and branch on count
+ instructions. */
+ rtx pattern = single_set (insn);
- if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
- continue;
-
- if (GET_CODE (PATTERN (insn)) == PARALLEL)
- {
- /* Handle compare and branch and branch on count
- instructions. */
- rtx pattern = single_set (insn);
-
- if (!pattern
- || SET_DEST (pattern) != pc_rtx
- || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
- continue;
+ if (!pattern
+ || SET_DEST (pattern) != pc_rtx
+ || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
+ return false;
- cond = XEXP (SET_SRC (pattern), 0);
- op0 = &XEXP (cond, 0);
- op1 = &XEXP (cond, 1);
- }
- else if (GET_CODE (PATTERN (insn)) == SET)
- {
- rtx src, dest;
+ cond = XEXP (SET_SRC (pattern), 0);
+ op0 = &XEXP (cond, 0);
+ op1 = &XEXP (cond, 1);
+ }
+ else if (GET_CODE (PATTERN (insn)) == SET)
+ {
+ rtx src, dest;
- /* Handle normal compare instructions. */
- src = SET_SRC (PATTERN (insn));
- dest = SET_DEST (PATTERN (insn));
+ /* Handle normal compare instructions. */
+ src = SET_SRC (PATTERN (insn));
+ dest = SET_DEST (PATTERN (insn));
- if (!REG_P (dest)
- || !CC_REGNO_P (REGNO (dest))
- || GET_CODE (src) != COMPARE)
- continue;
+ if (!REG_P (dest)
+ || !CC_REGNO_P (REGNO (dest))
+ || GET_CODE (src) != COMPARE)
+ return false;
- /* s390_swap_cmp will try to find the conditional
- jump when passing NULL_RTX as condition. */
- cond = NULL_RTX;
- op0 = &XEXP (src, 0);
- op1 = &XEXP (src, 1);
- }
- else
- continue;
+ /* s390_swap_cmp will try to find the conditional
+ jump when passing NULL_RTX as condition. */
+ cond = NULL_RTX;
+ op0 = &XEXP (src, 0);
+ op1 = &XEXP (src, 1);
+ }
+ else
+ return false;
- if (!REG_P (*op0) || !REG_P (*op1))
- continue;
+ if (!REG_P (*op0) || !REG_P (*op1))
+ return false;
- /* Swap the COMPARE arguments and its mask if there is a
- conflicting access in the previous insn. */
- prev_insn = PREV_INSN (insn);
+ /* Swap the COMPARE arguments and its mask if there is a
+ conflicting access in the previous insn. */
+ prev_insn = PREV_INSN (insn);
+ if (prev_insn != NULL_RTX && INSN_P (prev_insn)
+ && reg_referenced_p (*op1, PATTERN (prev_insn)))
+ s390_swap_cmp (cond, op0, op1, insn);
+
+ /* Check if there is a conflict with the next insn. If there
+ was no conflict with the previous insn, then swap the
+ COMPARE arguments and its mask. If we already swapped
+ the operands, or if swapping them would cause a conflict
+ with the previous insn, issue a NOP after the COMPARE in
+ order to separate the two instuctions. */
+ next_insn = NEXT_INSN (insn);
+ if (next_insn != NULL_RTX && INSN_P (next_insn)
+ && s390_non_addr_reg_read_p (*op1, next_insn))
+ {
if (prev_insn != NULL_RTX && INSN_P (prev_insn)
- && reg_referenced_p (*op1, PATTERN (prev_insn)))
- s390_swap_cmp (cond, op0, op1, insn);
-
- /* Check if there is a conflict with the next insn. If there
- was no conflict with the previous insn, then swap the
- COMPARE arguments and its mask. If we already swapped
- the operands, or if swapping them would cause a conflict
- with the previous insn, issue a NOP after the COMPARE in
- order to separate the two instuctions. */
- next_insn = NEXT_INSN (insn);
- if (next_insn != NULL_RTX && INSN_P (next_insn)
- && s390_non_addr_reg_read_p (*op1, next_insn))
+ && s390_non_addr_reg_read_p (*op0, prev_insn))
{
- if (prev_insn != NULL_RTX && INSN_P (prev_insn)
- && s390_non_addr_reg_read_p (*op0, prev_insn))
- {
- if (REGNO (*op1) == 0)
- emit_insn_after (gen_nop1 (), insn);
- else
- emit_insn_after (gen_nop (), insn);
- added_NOPs = 1;
- }
+ if (REGNO (*op1) == 0)
+ emit_insn_after (gen_nop1 (), insn);
else
- s390_swap_cmp (cond, op0, op1, insn);
+ emit_insn_after (gen_nop (), insn);
+ insn_added_p = true;
}
+ else
+ s390_swap_cmp (cond, op0, op1, insn);
}
-
- /* Adjust branches if we added new instructions. */
- if (added_NOPs)
- shorten_branches (get_insns ());
+ return insn_added_p;
}
-
/* Perform machine-dependent processing. */
static void
@@ -9944,10 +9998,33 @@ s390_reorg (void)
/* Try to optimize prologue and epilogue further. */
s390_optimize_prologue ();
- /* Eliminate z10-specific pipeline recycles related to some compare
- instructions. */
+ /* Walk over the insns and do some z10 specific changes. */
if (s390_tune == PROCESSOR_2097_Z10)
- s390_z10_optimize_cmp ();
+ {
+ rtx insn;
+ bool insn_added_p = false;
+
+ /* The insn lengths and addresses have to be up to date for the
+ following manipulations. */
+ shorten_branches (get_insns ());
+
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
+ continue;
+
+ if (JUMP_P (insn))
+ insn_added_p |= s390_z10_fix_long_loop_prediction (insn);
+
+ if (GET_CODE (PATTERN (insn)) == PARALLEL
+ || GET_CODE (PATTERN (insn)) == SET)
+ insn_added_p |= s390_z10_optimize_cmp (insn);
+ }
+
+ /* Adjust branches if we added new instructions. */
+ if (insn_added_p)
+ shorten_branches (get_insns ());
+ }
}
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 7898dc5..0a4361f 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -1046,6 +1046,64 @@
(const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
; 10 byte for clgr/jg
+; And now the same two patterns as above but with a negated CC mask.
+
+; cij, cgij, crj, cgrj, cfi, cgfi, cr, cgr
+; The following instructions do a complementary access of their second
+; operand (z01 only): crj_c, cgrjc, cr, cgr
+(define_insn "*icmp_and_br_signed_<mode>"
+ [(set (pc)
+ (if_then_else (match_operator 0 "s390_signed_integer_comparison"
+ [(match_operand:GPR 1 "register_operand" "d,d")
+ (match_operand:GPR 2 "nonmemory_operand" "d,C")])
+ (pc)
+ (label_ref (match_operand 3 "" ""))))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_Z10"
+{
+ if (get_attr_length (insn) == 6)
+ return which_alternative ?
+ "c<g>ij%D0\t%1,%c2,%l3" : "c<g>rj%D0\t%1,%2,%l3";
+ else
+ return which_alternative ?
+ "c<g>fi\t%1,%c2\;jg%D0\t%l3" : "c<g>r\t%1,%2\;jg%D0\t%l3";
+}
+ [(set_attr "op_type" "RIE")
+ (set_attr "type" "branch")
+ (set_attr "z10prop" "z10_super_c,z10_super")
+ (set (attr "length")
+ (if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
+ (const_int 6) (const_int 12)))]) ; 8 byte for cr/jg
+ ; 10 byte for cgr/jg
+
+; clij, clgij, clrj, clgrj, clfi, clgfi, clr, clgr
+; The following instructions do a complementary access of their second
+; operand (z10 only): clrj, clgrj, clr, clgr
+(define_insn "*icmp_and_br_unsigned_<mode>"
+ [(set (pc)
+ (if_then_else (match_operator 0 "s390_unsigned_integer_comparison"
+ [(match_operand:GPR 1 "register_operand" "d,d")
+ (match_operand:GPR 2 "nonmemory_operand" "d,I")])
+ (pc)
+ (label_ref (match_operand 3 "" ""))))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_Z10"
+{
+ if (get_attr_length (insn) == 6)
+ return which_alternative ?
+ "cl<g>ij%D0\t%1,%b2,%l3" : "cl<g>rj%D0\t%1,%2,%l3";
+ else
+ return which_alternative ?
+ "cl<g>fi\t%1,%b2\;jg%D0\t%l3" : "cl<g>r\t%1,%2\;jg%D0\t%l3";
+}
+ [(set_attr "op_type" "RIE")
+ (set_attr "type" "branch")
+ (set_attr "z10prop" "z10_super_c,z10_super")
+ (set (attr "length")
+ (if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
+ (const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
+ ; 10 byte for clgr/jg
+
;;
;;- Move instructions.
;;