diff options
author | Terry Guo <terry.guo@arm.com> | 2013-11-25 06:41:20 +0000 |
---|---|---|
committer | Xuepeng Guo <xguo@gcc.gnu.org> | 2013-11-25 06:41:20 +0000 |
commit | 02231c13506fcbd63be93bba1215211bcbd1024c (patch) | |
tree | b045e1d8d7d9e61ea7b8d4e8262360a69c3e0632 | |
parent | 36ef4e9d585a929f0423adc30c2076d6bb445c4a (diff) | |
download | gcc-02231c13506fcbd63be93bba1215211bcbd1024c.zip gcc-02231c13506fcbd63be93bba1215211bcbd1024c.tar.gz gcc-02231c13506fcbd63be93bba1215211bcbd1024c.tar.bz2 |
invoke.texi (-mslow-flash-data): Document new option.
gcc/ChangeLog
2013-11-25 Terry Guo <terry.guo@arm.com>
* doc/invoke.texi (-mslow-flash-data): Document new option.
* config/arm/arm.opt (mslow-flash-data): New option.
* config/arm/arm-protos.h (arm_max_const_double_inline_cost): Declare
it.
* config/arm/arm.h (TARGET_USE_MOVT): Always true when literal pools
are disabled.
(arm_disable_literal_pool): Declare it.
* config/arm/arm.c (arm_disable_literal_pool): New variable.
(arm_option_override): Handle new option.
(thumb2_legitimate_address_p): Don't allow symbol references when
literal pools are disabled.
(arm_max_const_double_inline_cost): New function.
* config/arm/arm.md (types.md): Include it before ...
(use_literal_pool): New attribute.
(enabled): Use new attribute.
(split pattern): Replace symbol+offset with MOVW/MOVT.
gcc/testsuite/ChangeLog
2013-11-25 Terry Guo <terry.guo@arm.com>
* gcc.target/arm/thumb2-slow-flash-data.c: New.
From-SVN: r205342
-rw-r--r-- | gcc/ChangeLog | 19 | ||||
-rw-r--r-- | gcc/config/arm/arm-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 45 | ||||
-rw-r--r-- | gcc/config/arm/arm.h | 9 | ||||
-rw-r--r-- | gcc/config/arm/arm.md | 59 | ||||
-rw-r--r-- | gcc/config/arm/arm.opt | 4 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 8 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data.c | 74 |
9 files changed, 218 insertions, 5 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1fd4f3f..cbb27ce 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2013-11-25 Terry Guo <terry.guo@arm.com> + + * doc/invoke.texi (-mslow-flash-data): Document new option. + * config/arm/arm.opt (mslow-flash-data): New option. + * config/arm/arm-protos.h (arm_max_const_double_inline_cost): Declare + it. + * config/arm/arm.h (TARGET_USE_MOVT): Always true when literal pools + are disabled. + (arm_disable_literal_pool): Declare it. + * config/arm/arm.c (arm_disable_literal_pool): New variable. + (arm_option_override): Handle new option. + (thumb2_legitimate_address_p): Don't allow symbol references when + literal pools are disabled. + (arm_max_const_double_inline_cost): New function. + * config/arm/arm.md (types.md): Include it before ... + (use_literal_pool): New attribute. + (enabled): Use new attribute. + (split pattern): Replace symbol+offset with MOVW/MOVT. + 2013-11-24 Steven Bosscher <steven@gcc.gnu.org> PR bootstrap/59279 diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 944cf10..c5b16da 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -121,6 +121,7 @@ extern rtx arm_gen_compare_reg (RTX_CODE, rtx, rtx, rtx); extern rtx arm_gen_return_addr_mask (void); extern void arm_reload_in_hi (rtx *); extern void arm_reload_out_hi (rtx *); +extern int arm_max_const_double_inline_cost (void); extern int arm_const_double_inline_cost (rtx); extern bool arm_const_double_by_parts (rtx); extern bool arm_const_double_by_immediates (rtx); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 0d68f01..dc3dbdb 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -869,6 +869,9 @@ int arm_arch_thumb_hwdiv; than core registers. */ int prefer_neon_for_64bits = 0; +/* Nonzero if we shouldn't use literal pools. */ +bool arm_disable_literal_pool = false; + /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we must report the mode of the memory reference from TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */ @@ -2573,6 +2576,16 @@ arm_option_override (void) if (TARGET_APCS_FRAME) flag_shrink_wrap = false; + /* We only support -mslow-flash-data on armv7-m targets. */ + if (target_slow_flash_data + && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em) + || (TARGET_THUMB1 || flag_pic || TARGET_NEON))) + error ("-mslow-flash-data only supports non-pic code on armv7-m targets"); + + /* Currently, for slow flash data, we just disable literal pools. */ + if (target_slow_flash_data) + arm_disable_literal_pool = true; + /* Register global variables with the garbage collector. */ arm_add_gc_roots (); } @@ -6417,6 +6430,25 @@ thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) && thumb2_legitimate_index_p (mode, xop0, strict_p))); } + /* Normally we can assign constant values to target registers without + the help of constant pool. But there are cases we have to use constant + pool like: + 1) assign a label to register. + 2) sign-extend a 8bit value to 32bit and then assign to register. + + Constant pool access in format: + (set (reg r0) (mem (symbol_ref (".LC0")))) + will cause the use of literal pool (later in function arm_reorg). + So here we mark such format as an invalid format, then the compiler + will adjust it into: + (set (reg r0) (symbol_ref (".LC0"))) + (set (reg r0) (mem (reg r0))). + No extra register is required, and (mem (reg r0)) won't cause the use + of literal pools. */ + else if (arm_disable_literal_pool && code == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (x)) + return 0; + else if (GET_MODE_CLASS (mode) != MODE_FLOAT && code == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x) @@ -16222,6 +16254,19 @@ push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc, minipool_fix_tail = fix; } +/* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline. + Returns the number of insns needed, or 99 if we always want to synthesize + the value. */ +int +arm_max_const_double_inline_cost () +{ + /* Let the value get synthesized to avoid the use of literal pools. */ + if (arm_disable_literal_pool) + return 99; + + return ((optimize_size || arm_ld_sched) ? 3 : 4); +} + /* Return the cost of synthesizing a 64-bit constant VAL inline. Returns the number of insns needed, or 99 if we don't know how to do it. */ diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index dbd841e..a816ec1 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -329,7 +329,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void); /* Should MOVW/MOVT be used in preference to a constant pool. */ #define TARGET_USE_MOVT \ - (arm_arch_thumb2 && !optimize_size && !current_tune->prefer_constant_pool) + (arm_arch_thumb2 \ + && (arm_disable_literal_pool \ + || (!optimize_size && !current_tune->prefer_constant_pool))) /* We could use unified syntax for arm mode, but for now we just use it for Thumb-2. */ @@ -554,6 +556,11 @@ extern int arm_arch_thumb_hwdiv; than core registers. */ extern int prefer_neon_for_64bits; +/* Nonzero if we shouldn't use literal pools. */ +#ifndef USED_FOR_TARGET +extern bool arm_disable_literal_pool; +#endif + #ifndef TARGET_DEFAULT #define TARGET_DEFAULT (MASK_APCS_FRAME) #endif diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 8e52003..16095fa 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -82,6 +82,9 @@ ;; Processor type. This is created automatically from arm-cores.def. (include "arm-tune.md") +;; Instruction classification types +(include "types.md") + ; IS_THUMB is set to 'yes' when we are generating Thumb code, and 'no' when ; generating ARM code. This is used to control the length of some insn ; patterns that share the same RTL in both ARM and Thumb code. @@ -191,6 +194,12 @@ (const_string "yes")] (const_string "no"))) +(define_attr "use_literal_pool" "no,yes" + (cond [(and (eq_attr "type" "f_loads,f_loadd") + (match_test "CONSTANT_P (operands[1])")) + (const_string "yes")] + (const_string "no"))) + ; Allows an insn to disable certain alternatives for reasons other than ; arch support. (define_attr "insn_enabled" "no,yes" @@ -210,6 +219,10 @@ (match_test "arm_restrict_it")) (const_string "no") + (and (eq_attr "use_literal_pool" "yes") + (match_test "arm_disable_literal_pool")) + (const_string "no") + (eq_attr "arch_enabled" "no") (const_string "no") @@ -245,9 +258,6 @@ (set_attr "length" "4") (set_attr "pool_range" "250")]) -;; Instruction classification types -(include "types.md") - ; Load scheduling, set from the arm_ld_sched variable ; initialized by arm_option_override() (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched"))) @@ -6049,7 +6059,7 @@ "TARGET_32BIT && reload_completed && (arm_const_double_inline_cost (operands[1]) - <= ((optimize_size || arm_ld_sched) ? 3 : 4))" + <= arm_max_const_double_inline_cost ())" [(const_int 0)] " arm_split_constant (SET, SImode, curr_insn, @@ -6312,6 +6322,47 @@ " ) +;; A normal way to do (symbol + offset) requires three instructions at least +;; (depends on how big the offset is) as below: +;; movw r0, #:lower16:g +;; movw r0, #:upper16:g +;; adds r0, #4 +;; +;; A better way would be: +;; movw r0, #:lower16:g+4 +;; movw r0, #:upper16:g+4 +;; +;; The limitation of this way is that the length of offset should be a 16-bit +;; signed value, because current assembler only supports REL type relocation for +;; such case. If the more powerful RELA type is supported in future, we should +;; update this pattern to go with better way. +(define_split + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (const:SI (plus:SI (match_operand:SI 1 "general_operand" "") + (match_operand:SI 2 "const_int_operand" ""))))] + "TARGET_THUMB2 + && arm_disable_literal_pool + && reload_completed + && GET_CODE (operands[1]) == SYMBOL_REF" + [(clobber (const_int 0))] + " + int offset = INTVAL (operands[2]); + + if (offset < -0x8000 || offset > 0x7fff) + { + arm_emit_movpair (operands[0], operands[1]); + emit_insn (gen_rtx_SET (SImode, operands[0], + gen_rtx_PLUS (SImode, operands[0], operands[2]))); + } + else + { + rtx op = gen_rtx_CONST (SImode, + gen_rtx_PLUS (SImode, operands[1], operands[2])); + arm_emit_movpair (operands[0], op); + } + " +) + ;; Split symbol_refs at the later stage (after cprop), instead of generating ;; movt/movw pair directly at expand. Otherwise corresponding high_sum ;; and lo_sum would be merged back into memory load at cprop. However, diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt index fa0839a..24e5b06 100644 --- a/gcc/config/arm/arm.opt +++ b/gcc/config/arm/arm.opt @@ -271,3 +271,7 @@ Enable unaligned word and halfword accesses to packed data. mneon-for-64bits Target Report RejectNegative Var(use_neon_for_64bits) Init(0) Use Neon to perform 64-bits operations rather than core registers. + +mslow-flash-data +Target Report Var(target_slow_flash_data) Init(0) +Assume loading data from flash is slower than fetching instructions. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 568c90d..501d080 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -533,6 +533,7 @@ Objective-C and Objective-C++ Dialects}. -mfix-cortex-m3-ldrd @gol -munaligned-access @gol -mneon-for-64bits @gol +-mslow-flash-data @gol -mrestrict-it} @emph{AVR Options} @@ -12345,6 +12346,13 @@ Enables using Neon to handle scalar 64-bits operations. This is disabled by default since the cost of moving data from core registers to Neon is high. +@item -mslow-flash-data +@opindex mslow-flash-data +Assume loading data from flash is slower than fetching instruction. +Therefore literal load is minimized for better performance. +This option is only supported when compiling for ARMv7 M-profile and +off by default. + @item -mrestrict-it @opindex mrestrict-it Restricts generation of IT blocks to conform to the rules of ARMv8. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2f28cb4..a2e144f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2013-11-25 Terry Guo <terry.guo@arm.com> + + * gcc.target/arm/thumb2-slow-flash-data.c: New. + 2013-11-23 Uros Bizjak <ubizjak@gmail.com> * gcc.dg/float-exact-1.c: Use dg-add-options ieee. diff --git a/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data.c b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data.c new file mode 100644 index 0000000..9852ea5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data.c @@ -0,0 +1,74 @@ +/* The option -mslow-flash-data is just for performance tuning, it + doesn't totally disable the use of literal pools. But for below + simple cases, the use of literal pool should be replaced by + movw/movt or read-only constant pool. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_cortex_m } */ +/* { dg-require-effective-target arm_thumb2_ok } */ +/* { dg-options "-O2 -mthumb -mslow-flash-data" } */ + +float sf; +double df; +long long l; +static char *p = "Hello World"; + +float +testsf (float *p) +{ + if (*p > 1.1234f) + return 2.1234f; + else + return 3.1234f; +} + +double +testdf (double *p) +{ + if (*p > 4.1234) + return 2.1234; + else + return 3.1234; +} + +long long +testll (long long *p) +{ + if (*p > 0x123456789ABCDEFll) + return 0x111111111ll; + else + return 0x222222222ll; +} + +char * +testchar () +{ + return p + 4; +} + +int +foo (int a, int b) +{ + int i; + volatile *labelref = &&label1; + + if (a > b) + { + while (i < b) + { + a += *labelref; + i += 1; + } + goto *labelref; + } + else + b = b + 3; + + a = a * b; + +label1: + return a + b; +} + +/* { dg-final { scan-assembler-times "movt" 13 } } */ +/* { dg-final { scan-assembler-times "movt.*LC0\\+4" 1 } } */ |