aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorTerry Guo <terry.guo@arm.com>2013-11-25 06:41:20 +0000
committerXuepeng Guo <xguo@gcc.gnu.org>2013-11-25 06:41:20 +0000
commit02231c13506fcbd63be93bba1215211bcbd1024c (patch)
treeb045e1d8d7d9e61ea7b8d4e8262360a69c3e0632 /gcc
parent36ef4e9d585a929f0423adc30c2076d6bb445c4a (diff)
downloadgcc-02231c13506fcbd63be93bba1215211bcbd1024c.zip
gcc-02231c13506fcbd63be93bba1215211bcbd1024c.tar.gz
gcc-02231c13506fcbd63be93bba1215211bcbd1024c.tar.bz2
invoke.texi (-mslow-flash-data): Document new option.
gcc/ChangeLog 2013-11-25 Terry Guo <terry.guo@arm.com> * doc/invoke.texi (-mslow-flash-data): Document new option. * config/arm/arm.opt (mslow-flash-data): New option. * config/arm/arm-protos.h (arm_max_const_double_inline_cost): Declare it. * config/arm/arm.h (TARGET_USE_MOVT): Always true when literal pools are disabled. (arm_disable_literal_pool): Declare it. * config/arm/arm.c (arm_disable_literal_pool): New variable. (arm_option_override): Handle new option. (thumb2_legitimate_address_p): Don't allow symbol references when literal pools are disabled. (arm_max_const_double_inline_cost): New function. * config/arm/arm.md (types.md): Include it before ... (use_literal_pool): New attribute. (enabled): Use new attribute. (split pattern): Replace symbol+offset with MOVW/MOVT. gcc/testsuite/ChangeLog 2013-11-25 Terry Guo <terry.guo@arm.com> * gcc.target/arm/thumb2-slow-flash-data.c: New. From-SVN: r205342
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog19
-rw-r--r--gcc/config/arm/arm-protos.h1
-rw-r--r--gcc/config/arm/arm.c45
-rw-r--r--gcc/config/arm/arm.h9
-rw-r--r--gcc/config/arm/arm.md59
-rw-r--r--gcc/config/arm/arm.opt4
-rw-r--r--gcc/doc/invoke.texi8
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data.c74
9 files changed, 218 insertions, 5 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 1fd4f3f..cbb27ce 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,22 @@
+2013-11-25 Terry Guo <terry.guo@arm.com>
+
+ * doc/invoke.texi (-mslow-flash-data): Document new option.
+ * config/arm/arm.opt (mslow-flash-data): New option.
+ * config/arm/arm-protos.h (arm_max_const_double_inline_cost): Declare
+ it.
+ * config/arm/arm.h (TARGET_USE_MOVT): Always true when literal pools
+ are disabled.
+ (arm_disable_literal_pool): Declare it.
+ * config/arm/arm.c (arm_disable_literal_pool): New variable.
+ (arm_option_override): Handle new option.
+ (thumb2_legitimate_address_p): Don't allow symbol references when
+ literal pools are disabled.
+ (arm_max_const_double_inline_cost): New function.
+ * config/arm/arm.md (types.md): Include it before ...
+ (use_literal_pool): New attribute.
+ (enabled): Use new attribute.
+ (split pattern): Replace symbol+offset with MOVW/MOVT.
+
2013-11-24 Steven Bosscher <steven@gcc.gnu.org>
PR bootstrap/59279
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 944cf10..c5b16da 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -121,6 +121,7 @@ extern rtx arm_gen_compare_reg (RTX_CODE, rtx, rtx, rtx);
extern rtx arm_gen_return_addr_mask (void);
extern void arm_reload_in_hi (rtx *);
extern void arm_reload_out_hi (rtx *);
+extern int arm_max_const_double_inline_cost (void);
extern int arm_const_double_inline_cost (rtx);
extern bool arm_const_double_by_parts (rtx);
extern bool arm_const_double_by_immediates (rtx);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 0d68f01..dc3dbdb 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -869,6 +869,9 @@ int arm_arch_thumb_hwdiv;
than core registers. */
int prefer_neon_for_64bits = 0;
+/* Nonzero if we shouldn't use literal pools. */
+bool arm_disable_literal_pool = false;
+
/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
we must report the mode of the memory reference from
TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
@@ -2573,6 +2576,16 @@ arm_option_override (void)
if (TARGET_APCS_FRAME)
flag_shrink_wrap = false;
+ /* We only support -mslow-flash-data on armv7-m targets. */
+ if (target_slow_flash_data
+ && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
+ || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
+ error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
+
+ /* Currently, for slow flash data, we just disable literal pools. */
+ if (target_slow_flash_data)
+ arm_disable_literal_pool = true;
+
/* Register global variables with the garbage collector. */
arm_add_gc_roots ();
}
@@ -6417,6 +6430,25 @@ thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
&& thumb2_legitimate_index_p (mode, xop0, strict_p)));
}
+ /* Normally we can assign constant values to target registers without
+ the help of constant pool. But there are cases we have to use constant
+ pool like:
+ 1) assign a label to register.
+ 2) sign-extend a 8bit value to 32bit and then assign to register.
+
+ Constant pool access in format:
+ (set (reg r0) (mem (symbol_ref (".LC0"))))
+ will cause the use of literal pool (later in function arm_reorg).
+ So here we mark such format as an invalid format, then the compiler
+ will adjust it into:
+ (set (reg r0) (symbol_ref (".LC0")))
+ (set (reg r0) (mem (reg r0))).
+ No extra register is required, and (mem (reg r0)) won't cause the use
+ of literal pools. */
+ else if (arm_disable_literal_pool && code == SYMBOL_REF
+ && CONSTANT_POOL_ADDRESS_P (x))
+ return 0;
+
else if (GET_MODE_CLASS (mode) != MODE_FLOAT
&& code == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x)
@@ -16222,6 +16254,19 @@ push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
minipool_fix_tail = fix;
}
+/* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
+ Returns the number of insns needed, or 99 if we always want to synthesize
+ the value. */
+int
+arm_max_const_double_inline_cost ()
+{
+ /* Let the value get synthesized to avoid the use of literal pools. */
+ if (arm_disable_literal_pool)
+ return 99;
+
+ return ((optimize_size || arm_ld_sched) ? 3 : 4);
+}
+
/* Return the cost of synthesizing a 64-bit constant VAL inline.
Returns the number of insns needed, or 99 if we don't know how to
do it. */
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index dbd841e..a816ec1 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -329,7 +329,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
/* Should MOVW/MOVT be used in preference to a constant pool. */
#define TARGET_USE_MOVT \
- (arm_arch_thumb2 && !optimize_size && !current_tune->prefer_constant_pool)
+ (arm_arch_thumb2 \
+ && (arm_disable_literal_pool \
+ || (!optimize_size && !current_tune->prefer_constant_pool)))
/* We could use unified syntax for arm mode, but for now we just use it
for Thumb-2. */
@@ -554,6 +556,11 @@ extern int arm_arch_thumb_hwdiv;
than core registers. */
extern int prefer_neon_for_64bits;
+/* Nonzero if we shouldn't use literal pools. */
+#ifndef USED_FOR_TARGET
+extern bool arm_disable_literal_pool;
+#endif
+
#ifndef TARGET_DEFAULT
#define TARGET_DEFAULT (MASK_APCS_FRAME)
#endif
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 8e52003..16095fa 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -82,6 +82,9 @@
;; Processor type. This is created automatically from arm-cores.def.
(include "arm-tune.md")
+;; Instruction classification types
+(include "types.md")
+
; IS_THUMB is set to 'yes' when we are generating Thumb code, and 'no' when
; generating ARM code. This is used to control the length of some insn
; patterns that share the same RTL in both ARM and Thumb code.
@@ -191,6 +194,12 @@
(const_string "yes")]
(const_string "no")))
+(define_attr "use_literal_pool" "no,yes"
+ (cond [(and (eq_attr "type" "f_loads,f_loadd")
+ (match_test "CONSTANT_P (operands[1])"))
+ (const_string "yes")]
+ (const_string "no")))
+
; Allows an insn to disable certain alternatives for reasons other than
; arch support.
(define_attr "insn_enabled" "no,yes"
@@ -210,6 +219,10 @@
(match_test "arm_restrict_it"))
(const_string "no")
+ (and (eq_attr "use_literal_pool" "yes")
+ (match_test "arm_disable_literal_pool"))
+ (const_string "no")
+
(eq_attr "arch_enabled" "no")
(const_string "no")
@@ -245,9 +258,6 @@
(set_attr "length" "4")
(set_attr "pool_range" "250")])
-;; Instruction classification types
-(include "types.md")
-
; Load scheduling, set from the arm_ld_sched variable
; initialized by arm_option_override()
(define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched")))
@@ -6049,7 +6059,7 @@
"TARGET_32BIT
&& reload_completed
&& (arm_const_double_inline_cost (operands[1])
- <= ((optimize_size || arm_ld_sched) ? 3 : 4))"
+ <= arm_max_const_double_inline_cost ())"
[(const_int 0)]
"
arm_split_constant (SET, SImode, curr_insn,
@@ -6312,6 +6322,47 @@
"
)
+;; A normal way to do (symbol + offset) requires three instructions at least
+;; (depends on how big the offset is) as below:
+;; movw r0, #:lower16:g
+;; movw r0, #:upper16:g
+;; adds r0, #4
+;;
+;; A better way would be:
+;; movw r0, #:lower16:g+4
+;; movw r0, #:upper16:g+4
+;;
+;; The limitation of this way is that the length of offset should be a 16-bit
+;; signed value, because current assembler only supports REL type relocation for
+;; such case. If the more powerful RELA type is supported in future, we should
+;; update this pattern to go with better way.
+(define_split
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
+ (const:SI (plus:SI (match_operand:SI 1 "general_operand" "")
+ (match_operand:SI 2 "const_int_operand" ""))))]
+ "TARGET_THUMB2
+ && arm_disable_literal_pool
+ && reload_completed
+ && GET_CODE (operands[1]) == SYMBOL_REF"
+ [(clobber (const_int 0))]
+ "
+ int offset = INTVAL (operands[2]);
+
+ if (offset < -0x8000 || offset > 0x7fff)
+ {
+ arm_emit_movpair (operands[0], operands[1]);
+ emit_insn (gen_rtx_SET (SImode, operands[0],
+ gen_rtx_PLUS (SImode, operands[0], operands[2])));
+ }
+ else
+ {
+ rtx op = gen_rtx_CONST (SImode,
+ gen_rtx_PLUS (SImode, operands[1], operands[2]));
+ arm_emit_movpair (operands[0], op);
+ }
+ "
+)
+
;; Split symbol_refs at the later stage (after cprop), instead of generating
;; movt/movw pair directly at expand. Otherwise corresponding high_sum
;; and lo_sum would be merged back into memory load at cprop. However,
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index fa0839a..24e5b06 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -271,3 +271,7 @@ Enable unaligned word and halfword accesses to packed data.
mneon-for-64bits
Target Report RejectNegative Var(use_neon_for_64bits) Init(0)
Use Neon to perform 64-bits operations rather than core registers.
+
+mslow-flash-data
+Target Report Var(target_slow_flash_data) Init(0)
+Assume loading data from flash is slower than fetching instructions.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 568c90d..501d080 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -533,6 +533,7 @@ Objective-C and Objective-C++ Dialects}.
-mfix-cortex-m3-ldrd @gol
-munaligned-access @gol
-mneon-for-64bits @gol
+-mslow-flash-data @gol
-mrestrict-it}
@emph{AVR Options}
@@ -12345,6 +12346,13 @@ Enables using Neon to handle scalar 64-bits operations. This is
disabled by default since the cost of moving data from core registers
to Neon is high.
+@item -mslow-flash-data
+@opindex mslow-flash-data
+Assume loading data from flash is slower than fetching instruction.
+Therefore literal load is minimized for better performance.
+This option is only supported when compiling for ARMv7 M-profile and
+off by default.
+
@item -mrestrict-it
@opindex mrestrict-it
Restricts generation of IT blocks to conform to the rules of ARMv8.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 2f28cb4..a2e144f 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2013-11-25 Terry Guo <terry.guo@arm.com>
+
+ * gcc.target/arm/thumb2-slow-flash-data.c: New.
+
2013-11-23 Uros Bizjak <ubizjak@gmail.com>
* gcc.dg/float-exact-1.c: Use dg-add-options ieee.
diff --git a/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data.c b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data.c
new file mode 100644
index 0000000..9852ea5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data.c
@@ -0,0 +1,74 @@
+/* The option -mslow-flash-data is just for performance tuning, it
+ doesn't totally disable the use of literal pools. But for below
+ simple cases, the use of literal pool should be replaced by
+ movw/movt or read-only constant pool. */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_cortex_m } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-options "-O2 -mthumb -mslow-flash-data" } */
+
+float sf;
+double df;
+long long l;
+static char *p = "Hello World";
+
+float
+testsf (float *p)
+{
+ if (*p > 1.1234f)
+ return 2.1234f;
+ else
+ return 3.1234f;
+}
+
+double
+testdf (double *p)
+{
+ if (*p > 4.1234)
+ return 2.1234;
+ else
+ return 3.1234;
+}
+
+long long
+testll (long long *p)
+{
+ if (*p > 0x123456789ABCDEFll)
+ return 0x111111111ll;
+ else
+ return 0x222222222ll;
+}
+
+char *
+testchar ()
+{
+ return p + 4;
+}
+
+int
+foo (int a, int b)
+{
+ int i;
+ volatile *labelref = &&label1;
+
+ if (a > b)
+ {
+ while (i < b)
+ {
+ a += *labelref;
+ i += 1;
+ }
+ goto *labelref;
+ }
+ else
+ b = b + 3;
+
+ a = a * b;
+
+label1:
+ return a + b;
+}
+
+/* { dg-final { scan-assembler-times "movt" 13 } } */
+/* { dg-final { scan-assembler-times "movt.*LC0\\+4" 1 } } */