diff options
Diffstat (limited to 'gcc/config/i386')
-rw-r--r-- | gcc/config/i386/i386-expand.cc | 45 | ||||
-rw-r--r-- | gcc/config/i386/i386-features.cc | 189 | ||||
-rw-r--r-- | gcc/config/i386/i386-features.h | 11 | ||||
-rw-r--r-- | gcc/config/i386/i386.cc | 28 |
4 files changed, 133 insertions, 140 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 7f0fdb6..ae817d8 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -3396,8 +3396,7 @@ ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) too common scenario. */ start_sequence (); compare_op = ix86_expand_fp_compare (code, op0, op1); - compare_seq = get_insns (); - end_sequence (); + compare_seq = end_sequence (); if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode) code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); @@ -3561,8 +3560,7 @@ ix86_expand_int_movcc (rtx operands[]) start_sequence (); compare_op = ix86_expand_compare (code, op0, op1); - compare_seq = get_insns (); - end_sequence (); + compare_seq = end_sequence (); compare_code = GET_CODE (compare_op); @@ -16403,8 +16401,7 @@ ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val) if (GET_MODE (reg) != innermode) reg = gen_lowpart (innermode, reg); SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg); - seq = get_insns (); - end_sequence (); + seq = end_sequence (); if (seq) emit_insn_before (seq, insn); @@ -22099,8 +22096,7 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d) V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */ start_sequence (); ok = expand_vec_perm_1 (&dfinal); - seq = get_insns (); - end_sequence (); + seq = end_sequence (); if (!ok) return false; @@ -22436,8 +22432,7 @@ expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d) start_sequence (); ok = expand_vec_perm_1 (&dfirst); - seq = get_insns (); - end_sequence (); + seq = end_sequence (); if (!ok) return false; @@ -22545,8 +22540,7 @@ expand_vec_perm_2perm_interleave (struct expand_vec_perm_d *d, bool two_insn) { start_sequence (); ok = expand_vec_perm_1 (&dfirst); - seq1 = get_insns (); - end_sequence (); + seq1 = end_sequence (); if (!ok) return false; @@ -22556,8 +22550,7 @@ expand_vec_perm_2perm_interleave (struct expand_vec_perm_d *d, bool two_insn) { start_sequence (); ok = expand_vec_perm_1 (&dsecond); - seq2 = get_insns (); - end_sequence (); + seq2 = end_sequence (); if (!ok) return false; @@ -22671,8 +22664,7 @@ expand_vec_perm_2perm_pblendv (struct expand_vec_perm_d *d, bool two_insn) { start_sequence (); ok = expand_vec_perm_1 (&dfirst); - seq1 = get_insns (); - end_sequence (); + seq1 = end_sequence (); if (!ok) return false; @@ -22682,8 +22674,7 @@ expand_vec_perm_2perm_pblendv (struct expand_vec_perm_d *d, bool two_insn) { start_sequence (); ok = expand_vec_perm_1 (&dsecond); - seq2 = get_insns (); - end_sequence (); + seq2 = end_sequence (); if (!ok) return false; @@ -22877,8 +22868,7 @@ expand_vec_perm2_vperm2f128_vblend (struct expand_vec_perm_d *d) canonicalize_perm (&dfirst); start_sequence (); ok = ix86_expand_vec_perm_const_1 (&dfirst); - seq1 = get_insns (); - end_sequence (); + seq1 = end_sequence (); if (!ok) return false; @@ -22886,8 +22876,7 @@ expand_vec_perm2_vperm2f128_vblend (struct expand_vec_perm_d *d) canonicalize_perm (&dsecond); start_sequence (); ok = ix86_expand_vec_perm_const_1 (&dsecond); - seq2 = get_insns (); - end_sequence (); + seq2 = end_sequence (); if (!ok) return false; @@ -26114,8 +26103,7 @@ ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq, } } - *prep_seq = get_insns (); - end_sequence (); + *prep_seq = end_sequence (); start_sequence (); @@ -26126,8 +26114,7 @@ ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq, end_sequence (); return NULL_RTX; } - *gen_seq = get_insns (); - end_sequence (); + *gen_seq = end_sequence (); return res; } @@ -26170,8 +26157,7 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, return NULL_RTX; } - *prep_seq = get_insns (); - end_sequence (); + *prep_seq = end_sequence (); target = gen_rtx_REG (cc_mode, FLAGS_REG); dfv = ix86_get_flags_cc ((rtx_code) cmp_code); @@ -26202,8 +26188,7 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, return NULL_RTX; } - *gen_seq = get_insns (); - end_sequence (); + *gen_seq = end_sequence (); return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx); } diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index cc8313b..b1682c2 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -296,9 +296,8 @@ scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_) insns_conv = BITMAP_ALLOC (NULL); queue = NULL; - n_sse_to_integer = 0; - n_integer_to_sse = 0; - + cost_sse_integer = 0; + weighted_cost_sse_integer = 0 ; max_visits = x86_stv_max_visits; } @@ -337,20 +336,52 @@ scalar_chain::mark_dual_mode_def (df_ref def) /* Record the def/insn pair so we can later efficiently iterate over the defs to convert on insns not in the chain. */ bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def)); + basic_block bb = BLOCK_FOR_INSN (DF_REF_INSN (def)); + profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + bool speed_p = optimize_bb_for_speed_p (bb); + int cost = 0; + if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def))) { if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def)) && !reg_new) return; - n_integer_to_sse++; + + /* Cost integer to sse moves. */ + if (speed_p) + cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2; + else if (TARGET_64BIT || smode == SImode) + cost = COSTS_N_BYTES (4); + /* vmovd (4 bytes) + vpinsrd (6 bytes). */ + else if (TARGET_SSE4_1) + cost = COSTS_N_BYTES (10); + /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes). */ + else + cost = COSTS_N_BYTES (12); } else { if (!reg_new) return; - n_sse_to_integer++; + + /* Cost sse to integer moves. */ + if (speed_p) + cost = COSTS_N_INSNS (ix86_cost->sse_to_integer) / 2; + else if (TARGET_64BIT || smode == SImode) + cost = COSTS_N_BYTES (4); + /* vmovd (4 bytes) + vpextrd (6 bytes). */ + else if (TARGET_SSE4_1) + cost = COSTS_N_BYTES (10); + /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes). */ + else + cost = COSTS_N_BYTES (13); } + if (speed_p) + weighted_cost_sse_integer += bb->count.to_sreal_scale (entry_count) * cost; + + cost_sse_integer += cost; + if (dump_file) fprintf (dump_file, " Mark r%d def in insn %d as requiring both modes in chain #%d\n", @@ -531,15 +562,15 @@ general_scalar_chain::vector_const_cost (rtx exp, basic_block bb) return COSTS_N_INSNS (ix86_cost->sse_load[smode == DImode ? 1 : 0]) / 2; } -/* Compute a gain for chain conversion. */ +/* Return true if it's cost profitable for chain conversion. */ -int +bool general_scalar_chain::compute_convert_gain () { bitmap_iterator bi; unsigned insn_uid; int gain = 0; - int cost = 0; + sreal weighted_gain = 0; if (dump_file) fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); @@ -559,10 +590,13 @@ general_scalar_chain::compute_convert_gain () rtx dst = SET_DEST (def_set); basic_block bb = BLOCK_FOR_INSN (insn); int igain = 0; + profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + bool speed_p = optimize_bb_for_speed_p (bb); + sreal bb_freq = bb->count.to_sreal_scale (entry_count); if (REG_P (src) && REG_P (dst)) { - if (optimize_bb_for_size_p (bb)) + if (!speed_p) /* reg-reg move is 2 bytes, while SSE 3. */ igain += COSTS_N_BYTES (2 * m - 3); else @@ -571,7 +605,7 @@ general_scalar_chain::compute_convert_gain () } else if (REG_P (src) && MEM_P (dst)) { - if (optimize_bb_for_size_p (bb)) + if (!speed_p) /* Integer load/store is 3+ bytes and SSE 4+. */ igain += COSTS_N_BYTES (3 * m - 4); else @@ -581,7 +615,7 @@ general_scalar_chain::compute_convert_gain () } else if (MEM_P (src) && REG_P (dst)) { - if (optimize_bb_for_size_p (bb)) + if (!speed_p) igain += COSTS_N_BYTES (3 * m - 4); else igain += COSTS_N_INSNS (m * ix86_cost->int_load[2] @@ -593,7 +627,7 @@ general_scalar_chain::compute_convert_gain () of explicit load and store instructions. */ if (MEM_P (dst)) { - if (optimize_bb_for_size_p (bb)) + if (!speed_p) /* ??? This probably should account size difference of SSE and integer load rather than full SSE load. */ igain -= COSTS_N_BYTES (8); @@ -667,7 +701,7 @@ general_scalar_chain::compute_convert_gain () igain -= vector_const_cost (XEXP (src, 1), bb); if (MEM_P (XEXP (src, 1))) { - if (optimize_bb_for_size_p (bb)) + if (!speed_p) igain -= COSTS_N_BYTES (m == 2 ? 3 : 5); else igain += COSTS_N_INSNS @@ -730,7 +764,7 @@ general_scalar_chain::compute_convert_gain () case CONST_INT: if (REG_P (dst)) { - if (optimize_bb_for_size_p (bb)) + if (!speed_p) { /* xor (2 bytes) vs. xorps (3 bytes). */ if (src == const0_rtx) @@ -769,14 +803,14 @@ general_scalar_chain::compute_convert_gain () if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx) { // movd (4 bytes) replaced with movdqa (4 bytes). - if (!optimize_bb_for_size_p (bb)) + if (!!speed_p) igain += COSTS_N_INSNS (ix86_cost->sse_to_integer - ix86_cost->xmm_move) / 2; } else { // pshufd; movd replaced with pshufd. - if (optimize_bb_for_size_p (bb)) + if (!speed_p) igain += COSTS_N_BYTES (4); else igain += ix86_cost->sse_to_integer; @@ -788,55 +822,34 @@ general_scalar_chain::compute_convert_gain () } } + if (speed_p) + weighted_gain += bb_freq * igain; + gain += igain; + if (igain != 0 && dump_file) { - fprintf (dump_file, " Instruction gain %d for ", igain); + fprintf (dump_file, " Instruction gain %d with bb_freq %.2f for", + igain, bb_freq.to_double ()); dump_insn_slim (dump_file, insn); } - gain += igain; } if (dump_file) - fprintf (dump_file, " Instruction conversion gain: %d\n", gain); - - /* Cost the integer to sse and sse to integer moves. */ - if (!optimize_function_for_size_p (cfun)) { - cost += n_sse_to_integer * COSTS_N_INSNS (ix86_cost->sse_to_integer) / 2; - /* ??? integer_to_sse but we only have that in the RA cost table. - Assume sse_to_integer/integer_to_sse are the same which they - are at the moment. */ - cost += n_integer_to_sse * COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2; + fprintf (dump_file, " Instruction conversion gain: %d, \n", + gain); + fprintf (dump_file, " Registers conversion cost: %d\n", + cost_sse_integer); + fprintf (dump_file, " Weighted instruction conversion gain: %.2f, \n", + weighted_gain.to_double ()); + fprintf (dump_file, " Weighted registers conversion cost: %.2f\n", + weighted_cost_sse_integer.to_double ()); } - else if (TARGET_64BIT || smode == SImode) - { - cost += n_sse_to_integer * COSTS_N_BYTES (4); - cost += n_integer_to_sse * COSTS_N_BYTES (4); - } - else if (TARGET_SSE4_1) - { - /* vmovd (4 bytes) + vpextrd (6 bytes). */ - cost += n_sse_to_integer * COSTS_N_BYTES (10); - /* vmovd (4 bytes) + vpinsrd (6 bytes). */ - cost += n_integer_to_sse * COSTS_N_BYTES (10); - } - else - { - /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes). */ - cost += n_sse_to_integer * COSTS_N_BYTES (13); - /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes). */ - cost += n_integer_to_sse * COSTS_N_BYTES (12); - } - - if (dump_file) - fprintf (dump_file, " Registers conversion cost: %d\n", cost); - - gain -= cost; - if (dump_file) - fprintf (dump_file, " Total gain: %d\n", gain); - - return gain; + if (weighted_gain != weighted_cost_sse_integer) + return weighted_gain > weighted_cost_sse_integer; + else + return gain > cost_sse_integer;; } /* Insert generated conversion instruction sequence INSNS @@ -935,8 +948,7 @@ scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg) else emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0), gen_gpr_to_xmm_move_src (vmode, reg))); - rtx_insn *seq = get_insns (); - end_sequence (); + rtx_insn *seq = end_sequence (); emit_conversion_insns (seq, insn); if (dump_file) @@ -1003,8 +1015,7 @@ scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src) else emit_move_insn (dst, src); - rtx_insn *seq = get_insns (); - end_sequence (); + rtx_insn *seq = end_sequence (); emit_conversion_insns (seq, insn); if (dump_file) @@ -1099,8 +1110,7 @@ scalar_chain::convert_op (rtx *op, rtx_insn *insn) { start_sequence (); vec_cst = validize_mem (force_const_mem (vmode, vec_cst)); - rtx_insn *seq = get_insns (); - end_sequence (); + rtx_insn *seq = end_sequence (); emit_insn_before (seq, insn); } @@ -1553,21 +1563,22 @@ timode_immed_const_gain (rtx cst, basic_block bb) return 0; } -/* Compute a gain for chain conversion. */ +/* Return true it's cost profitable for for chain conversion. */ -int +bool timode_scalar_chain::compute_convert_gain () { /* Assume that if we have to move TImode values between units, then transforming this chain isn't worth it. */ - if (n_sse_to_integer || n_integer_to_sse) - return -1; + if (cost_sse_integer) + return false; bitmap_iterator bi; unsigned insn_uid; /* Split ties to prefer V1TImode when not optimizing for size. */ int gain = optimize_size ? 0 : 1; + sreal weighted_gain = 0; if (dump_file) fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); @@ -1582,32 +1593,33 @@ timode_scalar_chain::compute_convert_gain () basic_block bb = BLOCK_FOR_INSN (insn); int scost, vcost; int igain = 0; + profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + bool speed_p = optimize_bb_for_speed_p (bb); + sreal bb_freq = bb->count.to_sreal_scale (entry_count); switch (GET_CODE (src)) { case REG: - if (optimize_bb_for_size_p (bb)) + if (!speed_p) igain = MEM_P (dst) ? COSTS_N_BYTES (6) : COSTS_N_BYTES (3); else igain = COSTS_N_INSNS (1); break; case MEM: - igain = optimize_bb_for_size_p (bb) ? COSTS_N_BYTES (7) - : COSTS_N_INSNS (1); + igain = !speed_p ? COSTS_N_BYTES (7) : COSTS_N_INSNS (1); break; case CONST_INT: if (MEM_P (dst) && standard_sse_constant_p (src, V1TImode)) - igain = optimize_bb_for_size_p (bb) ? COSTS_N_BYTES (11) : 1; + igain = !speed_p ? COSTS_N_BYTES (11) : 1; break; case CONST_WIDE_INT: /* 2 x mov vs. vmovdqa. */ if (MEM_P (dst)) - igain = optimize_bb_for_size_p (bb) ? COSTS_N_BYTES (3) - : COSTS_N_INSNS (1); + igain = !speed_p ? COSTS_N_BYTES (3) : COSTS_N_INSNS (1); break; case NOT: @@ -1628,7 +1640,7 @@ timode_scalar_chain::compute_convert_gain () case LSHIFTRT: /* See ix86_expand_v1ti_shift. */ op1val = INTVAL (XEXP (src, 1)); - if (optimize_bb_for_size_p (bb)) + if (!speed_p) { if (op1val == 64 || op1val == 65) scost = COSTS_N_BYTES (5); @@ -1662,7 +1674,7 @@ timode_scalar_chain::compute_convert_gain () case ASHIFTRT: /* See ix86_expand_v1ti_ashiftrt. */ op1val = INTVAL (XEXP (src, 1)); - if (optimize_bb_for_size_p (bb)) + if (!speed_p) { if (op1val == 64 || op1val == 127) scost = COSTS_N_BYTES (7); @@ -1740,7 +1752,7 @@ timode_scalar_chain::compute_convert_gain () case ROTATERT: /* See ix86_expand_v1ti_rotate. */ op1val = INTVAL (XEXP (src, 1)); - if (optimize_bb_for_size_p (bb)) + if (!speed_p) { scost = COSTS_N_BYTES (13); if ((op1val & 31) == 0) @@ -1772,34 +1784,40 @@ timode_scalar_chain::compute_convert_gain () { if (GET_CODE (XEXP (src, 0)) == AND) /* and;and;or (9 bytes) vs. ptest (5 bytes). */ - igain = optimize_bb_for_size_p (bb) ? COSTS_N_BYTES (4) - : COSTS_N_INSNS (2); + igain = !speed_p ? COSTS_N_BYTES (4) : COSTS_N_INSNS (2); /* or (3 bytes) vs. ptest (5 bytes). */ - else if (optimize_bb_for_size_p (bb)) + else if (!speed_p) igain = -COSTS_N_BYTES (2); } else if (XEXP (src, 1) == const1_rtx) /* and;cmp -1 (7 bytes) vs. pcmpeqd;pxor;ptest (13 bytes). */ - igain = optimize_bb_for_size_p (bb) ? -COSTS_N_BYTES (6) - : -COSTS_N_INSNS (1); + igain = !speed_p ? -COSTS_N_BYTES (6) : -COSTS_N_INSNS (1); break; default: break; } + gain += igain; + if (speed_p) + weighted_gain += bb_freq * igain; + if (igain != 0 && dump_file) { - fprintf (dump_file, " Instruction gain %d for ", igain); + fprintf (dump_file, " Instruction gain %d with bb_freq %.2f for ", + igain, bb_freq.to_double ()); dump_insn_slim (dump_file, insn); } - gain += igain; } if (dump_file) - fprintf (dump_file, " Total gain: %d\n", gain); + fprintf (dump_file, " Total gain: %d, weighted gain %.2f\n", + gain, weighted_gain.to_double ()); - return gain; + if (weighted_gain > (sreal) 0) + return true; + else + return gain > 0; } /* Fix uses of converted REG in debug insns. */ @@ -1908,8 +1926,7 @@ timode_scalar_chain::convert_insn (rtx_insn *insn) src = validize_mem (force_const_mem (V1TImode, src)); use_move = MEM_P (dst); } - rtx_insn *seq = get_insns (); - end_sequence (); + rtx_insn *seq = end_sequence (); if (seq) emit_insn_before (seq, insn); if (use_move) @@ -2595,7 +2612,7 @@ convert_scalars_to_vector (bool timode_p) conversions. */ if (chain->build (&candidates[i], uid, disallowed)) { - if (chain->compute_convert_gain () > 0) + if (chain->compute_convert_gain ()) converted_insns += chain->convert (); else if (dump_file) fprintf (dump_file, "Chain #%d conversion is not profitable\n", diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h index 7f7c0f7..e3719b3 100644 --- a/gcc/config/i386/i386-features.h +++ b/gcc/config/i386/i386-features.h @@ -153,12 +153,13 @@ class scalar_chain bitmap insns_conv; hash_map<rtx, rtx> defs_map; - unsigned n_sse_to_integer; - unsigned n_integer_to_sse; + /* Cost of inserted conversion between ineteger and sse. */ + int cost_sse_integer; + sreal weighted_cost_sse_integer; auto_vec<rtx_insn *> control_flow_insns; bool build (bitmap candidates, unsigned insn_uid, bitmap disallowed); - virtual int compute_convert_gain () = 0; + virtual bool compute_convert_gain () = 0; int convert (); protected: @@ -184,7 +185,7 @@ class general_scalar_chain : public scalar_chain public: general_scalar_chain (enum machine_mode smode_, enum machine_mode vmode_) : scalar_chain (smode_, vmode_) {} - int compute_convert_gain () final override; + bool compute_convert_gain () final override; private: void convert_insn (rtx_insn *insn) final override; @@ -196,7 +197,7 @@ class timode_scalar_chain : public scalar_chain { public: timode_scalar_chain () : scalar_chain (TImode, V1TImode) {} - int compute_convert_gain () final override; + bool compute_convert_gain () final override; private: void fix_debug_reg_uses (rtx reg); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 3d629b0..7f61041 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -1795,8 +1795,7 @@ ix86_init_pic_reg (void) add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); } - seq = get_insns (); - end_sequence (); + seq = end_sequence (); entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); insert_insn_on_edge (seq, entry_edge); @@ -4739,8 +4738,7 @@ ix86_va_start (tree valist, rtx nextarg) start_sequence (); emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno)); - seq = get_insns (); - end_sequence (); + seq = end_sequence (); push_topmost_sequence (); emit_insn_after (seq, entry_of_function ()); @@ -7981,8 +7979,7 @@ ix86_get_drap_rtx (void) start_sequence (); drap_vreg = copy_to_reg (arg_ptr); - seq = get_insns (); - end_sequence (); + seq = end_sequence (); insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ())); if (!optimize) @@ -12474,8 +12471,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) start_sequence (); emit_call_insn (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr)); - insns = get_insns (); - end_sequence (); + insns = end_sequence (); if (GET_MODE (x) != Pmode) x = gen_rtx_ZERO_EXTEND (Pmode, x); @@ -12529,8 +12525,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) start_sequence (); emit_call_insn (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr)); - insns = get_insns (); - end_sequence (); + insns = end_sequence (); /* Attach a unique REG_EQUAL, to allow the RTL optimizers to share the LD_BASE result with other LD model accesses. */ @@ -24823,8 +24818,7 @@ ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, } } - rtx_insn *seq = get_insns (); - end_sequence (); + rtx_insn *seq = end_sequence (); if (saw_asm_flag) return seq; @@ -26156,14 +26150,10 @@ ix86_vector_costs::finish_cost (const vector_costs *scalar_costs) /* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both a AVX2 and a SSE epilogue for AVX512 vectorized loops. */ if (loop_vinfo + && LOOP_VINFO_EPILOGUE_P (loop_vinfo) + && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32 && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES]) - { - if (GET_MODE_SIZE (loop_vinfo->vector_mode) == 64) - m_suggested_epilogue_mode = V32QImode; - else if (LOOP_VINFO_EPILOGUE_P (loop_vinfo) - && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32) - m_suggested_epilogue_mode = V16QImode; - } + m_suggested_epilogue_mode = V16QImode; /* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger enable a 64bit SSE epilogue. */ if (loop_vinfo |