aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2022-04-26 11:08:55 -0700
committerliuhongt <hongtao.liu@intel.com>2024-07-08 10:47:09 +0800
commita910c30c7c27cd0f6d2d2694544a09fb11d611b9 (patch)
tree76e7f3869fe95113a50976bacd980d2159366380
parent53eef7915ac1a9b9ad672cc2e80509c15a764ca2 (diff)
downloadgcc-a910c30c7c27cd0f6d2d2694544a09fb11d611b9.zip
gcc-a910c30c7c27cd0f6d2d2694544a09fb11d611b9.tar.gz
gcc-a910c30c7c27cd0f6d2d2694544a09fb11d611b9.tar.bz2
x86: Update branch hint for Redwood Cove.
According to Intel® 64 and IA-32 Architectures Optimization Reference Manual[1], Branch Hint is updated for Redwood Cove. --------cut from [1]------------------------- Starting with the Redwood Cove microarchitecture, if the predictor has no stored information about a branch, the branch has the Intel® SSE2 branch taken hint (i.e., instruction prefix 3EH), When the codec decodes the branch, it flips the branch’s prediction from not-taken to taken. It then flushes the pipeline in front of it and steers this pipeline to fetch the taken path of the branch. --------cut end ----------------------------- Split tune branch_prediction_hints into branch_prediction_hints_taken and branch_prediction_hints_not_taken, always generate branch hint for conditional branches, both tunes are disabled by default. [1] https://www.intel.com/content/www/us/en/content-details/821612/intel-64-and-ia-32-architectures-optimization-reference-manual-volume-1.html gcc/ * config/i386/i386.cc (ix86_print_operand): Always generate branch hint for conditional branches. * config/i386/i386.h (TARGET_BRANCH_PREDICTION_HINTS): Split into .. (TARGET_BRANCH_PREDICTION_HINTS_TAKEN): .. this, and .. (TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN): .. this. * config/i386/x86-tune.def (X86_TUNE_BRANCH_PREDICTION_HINTS): Split into .. (X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN): .. this, and .. (X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN): .. this.
-rw-r--r--gcc/config/i386/i386.cc29
-rw-r--r--gcc/config/i386/i386.h6
-rw-r--r--gcc/config/i386/x86-tune.def13
3 files changed, 24 insertions, 24 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index f75250f..17d23bb 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -14057,7 +14057,8 @@ ix86_print_operand (FILE *file, rtx x, int code)
if (!optimize
|| optimize_function_for_size_p (cfun)
- || !TARGET_BRANCH_PREDICTION_HINTS)
+ || (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN
+ && !TARGET_BRANCH_PREDICTION_HINTS_TAKEN))
return;
x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
@@ -14066,25 +14067,13 @@ ix86_print_operand (FILE *file, rtx x, int code)
int pred_val = profile_probability::from_reg_br_prob_note
(XINT (x, 0)).to_reg_br_prob_base ();
- if (pred_val < REG_BR_PROB_BASE * 45 / 100
- || pred_val > REG_BR_PROB_BASE * 55 / 100)
- {
- bool taken = pred_val > REG_BR_PROB_BASE / 2;
- bool cputaken
- = final_forward_branch_p (current_output_insn) == 0;
-
- /* Emit hints only in the case default branch prediction
- heuristics would fail. */
- if (taken != cputaken)
- {
- /* We use 3e (DS) prefix for taken branches and
- 2e (CS) prefix for not taken branches. */
- if (taken)
- fputs ("ds ; ", file);
- else
- fputs ("cs ; ", file);
- }
- }
+ bool taken = pred_val > REG_BR_PROB_BASE / 2;
+ /* We use 3e (DS) prefix for taken branches and
+ 2e (CS) prefix for not taken branches. */
+ if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN)
+ fputs ("ds ; ", file);
+ else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN)
+ fputs ("cs ; ", file);
}
return;
}
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 0c5292e..eabb324 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -309,8 +309,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
#define TARGET_ZERO_EXTEND_WITH_AND \
ix86_tune_features[X86_TUNE_ZERO_EXTEND_WITH_AND]
#define TARGET_UNROLL_STRLEN ix86_tune_features[X86_TUNE_UNROLL_STRLEN]
-#define TARGET_BRANCH_PREDICTION_HINTS \
- ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS]
+#define TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN \
+ ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN]
+#define TARGET_BRANCH_PREDICTION_HINTS_TAKEN \
+ ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN]
#define TARGET_DOUBLE_WITH_ADD ix86_tune_features[X86_TUNE_DOUBLE_WITH_ADD]
#define TARGET_USE_SAHF ix86_tune_features[X86_TUNE_USE_SAHF]
#define TARGET_MOVX ix86_tune_features[X86_TUNE_MOVX]
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 343c32c..3d29bff 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -683,15 +683,24 @@ DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode", m_K6)
DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
m_K8)
+/* X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN, starting with the Redwood Cove
+ microarchitecture, if the predictor has no stored information about a branch,
+ the branch has the Intel® SSE2 branch taken hint
+ (i.e., instruction prefix 3EH), When the codec decodes the branch, it flips
+ the branch’s prediction from not-taken to taken. It then flushes the pipeline
+ in front of it and steers this pipeline to fetch the taken path of the
+ branch. */
+DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN, "branch_prediction_hints_taken", m_NONE)
+
/*****************************************************************************/
/* This never worked well before. */
/*****************************************************************************/
-/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
+/* X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN: Branch hints were put in P4 based
on simulation result. But after P4 was made, no performance benefit
was observed with branch hints. It also increases the code size.
As a result, icc never generates branch hints. */
-DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS, "branch_prediction_hints", m_NONE)
+DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN, "branch_prediction_hints_not_taken", m_NONE)
/* X86_TUNE_QIMODE_MATH: Enable use of 8bit arithmetic. */
DEF_TUNE (X86_TUNE_QIMODE_MATH, "qimode_math", m_ALL)