aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorClaudiu Zissulescu <claziss@synopsys.com>2018-10-31 12:27:46 +0100
committerClaudiu Zissulescu <claziss@gcc.gnu.org>2018-10-31 12:27:46 +0100
commit635aeaa20ffdf6e794f180cc8e053e8dc46db760 (patch)
treeca53d051f9bcfe311d00dfe37c8bae70900042e0
parentaac1c11ce4edd9c2e6af7e9ee8abcaba98d8741f (diff)
downloadgcc-635aeaa20ffdf6e794f180cc8e053e8dc46db760.zip
gcc-635aeaa20ffdf6e794f180cc8e053e8dc46db760.tar.gz
gcc-635aeaa20ffdf6e794f180cc8e053e8dc46db760.tar.bz2
[ARC] Handle store cacheline hazard.
Handle store cacheline hazard for A700 cpus by inserting two NOP_S between ST ST LD or their logical equivalent (like ST ST NOP_S NOP_S J_L.D LD) gcc/ xxxx-xx-xx Claudiu Zissulescu <claziss@synopsys.com> * config/arc/arc-arch.h (ARC_TUNE_ARC7XX): New tune value. * config/arc/arc.c (arc_active_insn): New function. (check_store_cacheline_hazard): Likewise. (workaround_arc_anomaly): Use check_store_cacheline_hazard. (arc_override_options): Disable delay slot scheduler for older A7. (arc_store_addr_hazard_p): New implementation, old one renamed to ... (arc_store_addr_hazard_internal_p): Renamed. (arc_reorg): Don't combine into brcc instructions which are part of hardware hazard solution. * config/arc/arc.md (attr tune): Consider new arc7xx tune value. (tune_arc700): Likewise. * config/arc/arc.opt (arc7xx): New tune value. * config/arc/arc700.md: Improve A7 scheduler. From-SVN: r265676
-rw-r--r--gcc/ChangeLog18
-rw-r--r--gcc/config/arc/arc-arch.h1
-rw-r--r--gcc/config/arc/arc.c143
-rw-r--r--gcc/config/arc/arc.md8
-rw-r--r--gcc/config/arc/arc.opt3
-rw-r--r--gcc/config/arc/arc700.md18
6 files changed, 151 insertions, 40 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 051d462..289bb5a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,23 @@
2018-10-31 Claudiu Zissulescu <claziss@synopsys.com>
+ * config/arc/arc-arch.h (ARC_TUNE_ARC7XX): New tune value.
+ * config/arc/arc.c (arc_active_insn): New function.
+ (check_store_cacheline_hazard): Likewise.
+ (workaround_arc_anomaly): Use check_store_cacheline_hazard.
+ (arc_override_options): Disable delay slot scheduler for older
+ A7.
+ (arc_store_addr_hazard_p): New implementation, old one renamed to
+ ...
+ (arc_store_addr_hazard_internal_p): Renamed.
+ (arc_reorg): Don't combine into brcc instructions which are part
+ of hardware hazard solution.
+ * config/arc/arc.md (attr tune): Consider new arc7xx tune value.
+ (tune_arc700): Likewise.
+ * config/arc/arc.opt (arc7xx): New tune value.
+ * config/arc/arc700.md: Improve A7 scheduler.
+
+2018-10-31 Claudiu Zissulescu <claziss@synopsys.com>
+
* config/arc/arc.c (arc_override_options): Remove
TARGET_COMPACT_CASESI.
* config/arc/arc.h (ASM_OUTPUT_ADDR_DIFF_ELT): Update.
diff --git a/gcc/config/arc/arc-arch.h b/gcc/config/arc/arc-arch.h
index 859af06..ad54060 100644
--- a/gcc/config/arc/arc-arch.h
+++ b/gcc/config/arc/arc-arch.h
@@ -71,6 +71,7 @@ enum arc_tune_attr
{
ARC_TUNE_NONE,
ARC_TUNE_ARC600,
+ ARC_TUNE_ARC7XX,
ARC_TUNE_ARC700_4_2_STD,
ARC_TUNE_ARC700_4_2_XMAC,
ARC_TUNE_CORE_3,
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 00ec77d..277b546 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -1303,6 +1303,10 @@ arc_override_options (void)
if (!global_options_set.x_g_switch_value && !TARGET_NO_SDATA_SET)
g_switch_value = TARGET_LL64 ? 8 : 4;
+ /* A7 has an issue with delay slots. */
+ if (TARGET_ARC700 && (arc_tune != ARC_TUNE_ARC7XX))
+ flag_delayed_branch = 0;
+
/* These need to be done at start up. It's convenient to do them here. */
arc_init ();
}
@@ -7137,11 +7141,90 @@ arc_invalid_within_doloop (const rtx_insn *insn)
return NULL;
}
+/* Return the next active insn, skiping the inline assembly code. */
+
+static rtx_insn *
+arc_active_insn (rtx_insn *insn)
+{
+ rtx_insn *nxt = next_active_insn (insn);
+
+ if (nxt && GET_CODE (PATTERN (nxt)) == ASM_INPUT)
+ nxt = next_active_insn (nxt);
+ return nxt;
+}
+
+/* Search for a sequence made out of two stores and a given number of
+ loads, insert a nop if required. */
+
+static void
+check_store_cacheline_hazard (void)
+{
+ rtx_insn *insn, *succ0, *insn1;
+ bool found = false;
+
+ for (insn = get_insns (); insn; insn = arc_active_insn (insn))
+ {
+ succ0 = arc_active_insn (insn);
+
+ if (!succ0)
+ return;
+
+ if (!single_set (insn) || !single_set (succ0))
+ continue;
+
+ if ((get_attr_type (insn) != TYPE_STORE)
+ || (get_attr_type (succ0) != TYPE_STORE))
+ continue;
+
+ /* Found at least two consecutive stores. Goto the end of the
+ store sequence. */
+ for (insn1 = succ0; insn1; insn1 = arc_active_insn (insn1))
+ if (!single_set (insn1) || get_attr_type (insn1) != TYPE_STORE)
+ break;
+
+ /* Now, check the next two instructions for the following cases:
+ 1. next instruction is a LD => insert 2 nops between store
+ sequence and load.
+ 2. next-next instruction is a LD => inset 1 nop after the store
+ sequence. */
+ if (insn1 && single_set (insn1)
+ && (get_attr_type (insn1) == TYPE_LOAD))
+ {
+ found = true;
+ emit_insn_before (gen_nopv (), insn1);
+ emit_insn_before (gen_nopv (), insn1);
+ }
+ else
+ {
+ if (insn1 && (get_attr_type (insn1) == TYPE_COMPARE))
+ {
+ /* REG_SAVE_NOTE is used by Haifa scheduler, we are in
+ reorg, so it is safe to reuse it for avoiding the
+ current compare insn to be part of a BRcc
+ optimization. */
+ add_reg_note (insn1, REG_SAVE_NOTE, GEN_INT (3));
+ }
+ insn1 = arc_active_insn (insn1);
+ if (insn1 && single_set (insn1)
+ && (get_attr_type (insn1) == TYPE_LOAD))
+ {
+ found = true;
+ emit_insn_before (gen_nopv (), insn1);
+ }
+ }
+
+ insn = insn1;
+ if (found)
+ found = false;
+ }
+}
+
/* Return true if a load instruction (CONSUMER) uses the same address as a
store instruction (PRODUCER). This function is used to avoid st/ld
address hazard in ARC700 cores. */
-bool
-arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer)
+
+static bool
+arc_store_addr_hazard_internal_p (rtx_insn* producer, rtx_insn* consumer)
{
rtx in_set, out_set;
rtx out_addr, in_addr;
@@ -7189,6 +7272,16 @@ arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer)
return false;
}
+/* Return TRUE is we have an store address hazard. */
+
+bool
+arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer)
+{
+ if (TARGET_ARC700 && (arc_tune != ARC_TUNE_ARC7XX))
+ return true;
+ return arc_store_addr_hazard_internal_p (producer, consumer);
+}
+
/* The same functionality as arc_hazard. It is called in machine
reorg before any other optimization. Hence, the NOP size is taken
into account when doing branch shortening. */
@@ -7197,6 +7290,7 @@ static void
workaround_arc_anomaly (void)
{
rtx_insn *insn, *succ0;
+ rtx_insn *succ1;
/* For any architecture: call arc_hazard here. */
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
@@ -7208,27 +7302,30 @@ workaround_arc_anomaly (void)
}
}
- if (TARGET_ARC700)
- {
- rtx_insn *succ1;
+ if (!TARGET_ARC700)
+ return;
- for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
- {
- succ0 = next_real_insn (insn);
- if (arc_store_addr_hazard_p (insn, succ0))
- {
- emit_insn_after (gen_nopv (), insn);
- emit_insn_after (gen_nopv (), insn);
- continue;
- }
+ /* Old A7 are suffering of a cache hazard, and we need to insert two
+ nops between any sequence of stores and a load. */
+ if (arc_tune != ARC_TUNE_ARC7XX)
+ check_store_cacheline_hazard ();
- /* Avoid adding nops if the instruction between the ST and LD is
- a call or jump. */
- succ1 = next_real_insn (succ0);
- if (succ0 && !JUMP_P (succ0) && !CALL_P (succ0)
- && arc_store_addr_hazard_p (insn, succ1))
- emit_insn_after (gen_nopv (), insn);
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ succ0 = next_real_insn (insn);
+ if (arc_store_addr_hazard_internal_p (insn, succ0))
+ {
+ emit_insn_after (gen_nopv (), insn);
+ emit_insn_after (gen_nopv (), insn);
+ continue;
}
+
+ /* Avoid adding nops if the instruction between the ST and LD is
+ a call or jump. */
+ succ1 = next_real_insn (succ0);
+ if (succ0 && !JUMP_P (succ0) && !CALL_P (succ0)
+ && arc_store_addr_hazard_internal_p (insn, succ1))
+ emit_insn_after (gen_nopv (), insn);
}
}
@@ -7866,11 +7963,15 @@ arc_reorg (void)
if (!link_insn)
continue;
else
- /* Check if this is a data dependency. */
{
+ /* Check if this is a data dependency. */
rtx op, cc_clob_rtx, op0, op1, brcc_insn, note;
rtx cmp0, cmp1;
+ /* Make sure we can use it for brcc insns. */
+ if (find_reg_note (link_insn, REG_SAVE_NOTE, GEN_INT (3)))
+ continue;
+
/* Ok this is the set cc. copy args here. */
op = XEXP (pc_target, 0);
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 8afc40f..2c9de8d 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -600,11 +600,13 @@
;; somehow modify them to become inelegible for delay slots if a decision
;; is made that makes conditional execution required.
-(define_attr "tune" "none,arc600,arc700_4_2_std,arc700_4_2_xmac, core_3, \
-archs4x, archs4xd, archs4xd_slow"
+(define_attr "tune" "none,arc600,arc7xx,arc700_4_2_std,arc700_4_2_xmac, \
+core_3, archs4x, archs4xd, archs4xd_slow"
(const
(cond [(symbol_ref "arc_tune == TUNE_ARC600")
(const_string "arc600")
+ (symbol_ref "arc_tune == ARC_TUNE_ARC7XX")
+ (const_string "arc7xx")
(symbol_ref "arc_tune == TUNE_ARC700_4_2_STD")
(const_string "arc700_4_2_std")
(symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC")
@@ -619,7 +621,7 @@ archs4x, archs4xd, archs4xd_slow"
(const_string "none"))))
(define_attr "tune_arc700" "false,true"
- (if_then_else (eq_attr "tune" "arc700_4_2_std, arc700_4_2_xmac")
+ (if_then_else (eq_attr "tune" "arc7xx, arc700_4_2_std, arc700_4_2_xmac")
(const_string "true")
(const_string "false")))
diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt
index ad72d04..d3c283e 100644
--- a/gcc/config/arc/arc.opt
+++ b/gcc/config/arc/arc.opt
@@ -263,6 +263,9 @@ EnumValue
Enum(arc_tune_attr) String(arc601) Value(ARC_TUNE_ARC600)
EnumValue
+Enum(arc_tune_attr) String(arc7xx) Value(ARC_TUNE_ARC7XX)
+
+EnumValue
Enum(arc_tune_attr) String(arc700) Value(ARC_TUNE_ARC700_4_2_STD)
EnumValue
diff --git a/gcc/config/arc/arc700.md b/gcc/config/arc/arc700.md
index a0f9f74..cbb868d 100644
--- a/gcc/config/arc/arc700.md
+++ b/gcc/config/arc/arc700.md
@@ -145,28 +145,14 @@
; no functional unit runs when blockage is reserved
(exclusion_set "blockage" "core, multiplier")
-(define_insn_reservation "data_load_DI" 4
- (and (eq_attr "tune_arc700" "true")
- (eq_attr "type" "load")
- (match_operand:DI 0 "" ""))
- "issue+dmp, issue+dmp, dmp_write_port, dmp_write_port")
-
(define_insn_reservation "data_load" 3
(and (eq_attr "tune_arc700" "true")
- (eq_attr "type" "load")
- (not (match_operand:DI 0 "" "")))
+ (eq_attr "type" "load"))
"issue+dmp, nothing, dmp_write_port")
-(define_insn_reservation "data_store_DI" 2
- (and (eq_attr "tune_arc700" "true")
- (eq_attr "type" "store")
- (match_operand:DI 0 "" ""))
- "issue+dmp_write_port, issue+dmp_write_port")
-
(define_insn_reservation "data_store" 1
(and (eq_attr "tune_arc700" "true")
- (eq_attr "type" "store")
- (not (match_operand:DI 0 "" "")))
+ (eq_attr "type" "store"))
"issue+dmp_write_port")
(define_bypass 3 "data_store" "data_load" "arc_store_addr_hazard_p")