aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJan Hubicka <hubicka@ucw.cz>2015-01-20 00:58:19 +0100
committerJan Hubicka <hubicka@gcc.gnu.org>2015-01-19 23:58:19 +0000
commita1aff58fb98dcba080629c73933b51850d1f0e1e (patch)
tree72fbc4d3f5794930073c7c41b7d2842308a510c6 /gcc
parent0c84d521a9fe232a30c7fe638a4af21bc7c10c91 (diff)
downloadgcc-a1aff58fb98dcba080629c73933b51850d1f0e1e.zip
gcc-a1aff58fb98dcba080629c73933b51850d1f0e1e.tar.gz
gcc-a1aff58fb98dcba080629c73933b51850d1f0e1e.tar.bz2
re PR lto/45375 ([meta-bug] Issues with building Mozilla (i.e. Firefox) with LTO)
PR lto/45375 * i386.c (gate): Check flag_expensive_optimizations and optimize_size. (ix86_option_override_internal): Drop optimize_size condition on MASK_ACCUMULATE_OUTGOING_ARGS, MASK_VZEROUPPER, MASK_AVX256_SPLIT_UNALIGNED_LOAD, MASK_AVX256_SPLIT_UNALIGNED_STORE, MASK_PREFER_AVX128. (ix86_avx256_split_vector_move_misalign, ix86_avx256_split_vector_move_misalign): Check optimize_insn_for_speed. * sse.md (all uses of TARGET_PREFER_AVX128): Add optimize_insn_for_speed_p check. From-SVN: r219871
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog14
-rw-r--r--gcc/config/i386/i386.c48
-rw-r--r--gcc/config/i386/sse.md10
3 files changed, 42 insertions, 30 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 7e182f3..9a76dd7 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,17 @@
+2015-01-19 Jan Hubicka <hubicka@ucw.cz>
+
+ PR lto/45375
+ * i386.c (gate): Check flag_expensive_optimizations and
+ optimize_size.
+ (ix86_option_override_internal): Drop optimize_size condition
+ on MASK_ACCUMULATE_OUTGOING_ARGS, MASK_VZEROUPPER,
+ MASK_AVX256_SPLIT_UNALIGNED_LOAD, MASK_AVX256_SPLIT_UNALIGNED_STORE,
+ MASK_PREFER_AVX128.
+ (ix86_avx256_split_vector_move_misalign,
+ ix86_avx256_split_vector_move_misalign): Check optimize_insn_for_speed.
+ * sse.md (all uses of TARGET_PREFER_AVX128): Add
+ optimize_insn_for_speed_p check.
+
2015-01-19 Matthew Fortune <matthew.fortune@imgtec.com>
* config/mips/mips.h (FP_ASM_SPEC): New define.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3fa7842..ef1b288 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2578,7 +2578,9 @@ public:
/* opt_pass methods: */
virtual bool gate (function *)
{
- return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
+ return TARGET_AVX && !TARGET_AVX512F
+ && TARGET_VZEROUPPER && flag_expensive_optimizations
+ && !optimize_size;
}
virtual unsigned int execute (function *)
@@ -3874,6 +3876,8 @@ ix86_option_override_internal (bool main_args_p,
}
ix86_tune_cost = processor_target_table[ix86_tune].cost;
+ /* TODO: ix86_cost should be chosen at instruction or function granuality
+ so for cold code we use size_cost even in !optimize_size compilation. */
if (opts->x_optimize_size)
ix86_cost = &ix86_size_cost;
else
@@ -4113,8 +4117,7 @@ ix86_option_override_internal (bool main_args_p,
}
if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
- && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
- && !opts->x_optimize_size)
+ && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
/* If stack probes are required, the space used for large function
@@ -4244,26 +4247,19 @@ ix86_option_override_internal (bool main_args_p,
#endif
}
- /* When not opts->x_optimize for size, enable vzeroupper optimization for
- TARGET_AVX with -fexpensive-optimizations and split 32-byte
- AVX unaligned load/store. */
- if (!opts->x_optimize_size)
- {
- if (flag_expensive_optimizations
- && !(opts_set->x_target_flags & MASK_VZEROUPPER))
- opts->x_target_flags |= MASK_VZEROUPPER;
- if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
- && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
- opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
- if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
- && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
- opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
- /* Enable 128-bit AVX instruction generation
- for the auto-vectorizer. */
- if (TARGET_AVX128_OPTIMAL
- && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
- opts->x_target_flags |= MASK_PREFER_AVX128;
- }
+ if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
+ opts->x_target_flags |= MASK_VZEROUPPER;
+ if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
+ && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
+ opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
+ if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
+ && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
+ opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
+ /* Enable 128-bit AVX instruction generation
+ for the auto-vectorizer. */
+ if (TARGET_AVX128_OPTIMAL
+ && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
+ opts->x_target_flags |= MASK_PREFER_AVX128;
if (opts->x_ix86_recip_name)
{
@@ -17469,7 +17465,8 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
if (MEM_P (op1))
{
- if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
+ if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
+ && optimize_insn_for_speed_p ())
{
rtx r = gen_reg_rtx (mode);
m = adjust_address (op1, mode, 0);
@@ -17489,7 +17486,8 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
}
else if (MEM_P (op0))
{
- if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
+ if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
+ && optimize_insn_for_speed_p ())
{
m = adjust_address (op0, mode, 0);
emit_insn (extract (m, op1, const0_rtx));
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 41de832..ee2d93b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5434,7 +5434,7 @@
{
rtx tmp0, tmp1;
- if (TARGET_AVX && !TARGET_PREFER_AVX128)
+ if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
{
tmp0 = gen_reg_rtx (V4DFmode);
tmp1 = force_reg (V2DFmode, operands[1]);
@@ -5496,7 +5496,7 @@
{
rtx tmp0, tmp1, tmp2;
- if (TARGET_AVX && !TARGET_PREFER_AVX128)
+ if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
{
tmp0 = gen_reg_rtx (V4DFmode);
tmp1 = force_reg (V2DFmode, operands[1]);
@@ -5593,7 +5593,7 @@
{
rtx tmp0, tmp1, tmp2;
- if (TARGET_AVX && !TARGET_PREFER_AVX128)
+ if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
{
tmp0 = gen_reg_rtx (V4DFmode);
tmp1 = force_reg (V2DFmode, operands[1]);
@@ -14472,7 +14472,7 @@
rtx tmp0, tmp1;
if (<MODE>mode == V2DFmode
- && TARGET_AVX && !TARGET_PREFER_AVX128)
+ && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
{
rtx tmp2 = gen_reg_rtx (V4DFmode);
@@ -14579,7 +14579,7 @@
rtx tmp0, tmp1;
if (<MODE>mode == V2DFmode
- && TARGET_AVX && !TARGET_PREFER_AVX128)
+ && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
{
rtx tmp2 = gen_reg_rtx (V4DFmode);