aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386/i386.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386/i386.cc')
-rw-r--r--gcc/config/i386/i386.cc617
1 files changed, 459 insertions, 158 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 3d629b0..9657c6a 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -335,6 +335,14 @@ static int const x86_64_ms_abi_int_parameter_registers[4] =
CX_REG, DX_REG, R8_REG, R9_REG
};
+/* Similar as Clang's preserve_none function parameter passing.
+ NB: Use DI_REG and SI_REG, see ix86_function_value_regno_p. */
+
+static int const x86_64_preserve_none_int_parameter_registers[6] =
+{
+ R12_REG, R13_REG, R14_REG, R15_REG, DI_REG, SI_REG
+};
+
static int const x86_64_int_return_registers[4] =
{
AX_REG, DX_REG, DI_REG, SI_REG
@@ -460,7 +468,8 @@ int ix86_arch_specified;
red-zone.
NB: Don't use red-zone for functions with no_caller_saved_registers
- and 32 GPRs since 128-byte red-zone is too small for 31 GPRs.
+ and 32 GPRs or 16 XMM registers since 128-byte red-zone is too small
+ for 31 GPRs or 15 GPRs + 16 XMM registers.
TODO: If we can reserve the first 2 WORDs, for PUSH and, another
for CALL, in red-zone, we can allow local indirect jumps with
@@ -471,7 +480,7 @@ ix86_using_red_zone (void)
{
return (TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
- && (!TARGET_APX_EGPR
+ && ((!TARGET_APX_EGPR && !TARGET_SSE)
|| (cfun->machine->call_saved_registers
!= TYPE_NO_CALLER_SAVED_REGISTERS))
&& (!cfun->machine->has_local_indirect_jump
@@ -898,6 +907,18 @@ x86_64_elf_unique_section (tree decl, int reloc)
default_unique_section (decl, reloc);
}
+/* Return true if TYPE has no_callee_saved_registers or preserve_none
+ attribute. */
+
+bool
+ix86_type_no_callee_saved_registers_p (const_tree type)
+{
+ return (lookup_attribute ("no_callee_saved_registers",
+ TYPE_ATTRIBUTES (type)) != NULL
+ || lookup_attribute ("preserve_none",
+ TYPE_ATTRIBUTES (type)) != NULL);
+}
+
#ifdef COMMON_ASM_OP
#ifndef LARGECOMM_SECTION_ASM_OP
@@ -1019,11 +1040,10 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
/* Sibling call isn't OK if callee has no callee-saved registers
and the calling function has callee-saved registers. */
- if (cfun->machine->call_saved_registers != TYPE_NO_CALLEE_SAVED_REGISTERS
- && (cfun->machine->call_saved_registers
- != TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP)
- && lookup_attribute ("no_callee_saved_registers",
- TYPE_ATTRIBUTES (type)))
+ if ((cfun->machine->call_saved_registers
+ != TYPE_NO_CALLEE_SAVED_REGISTERS)
+ && cfun->machine->call_saved_registers != TYPE_PRESERVE_NONE
+ && ix86_type_no_callee_saved_registers_p (type))
return false;
/* If outgoing reg parm stack space changes, we cannot do sibcall. */
@@ -1188,10 +1208,16 @@ ix86_comp_type_attributes (const_tree type1, const_tree type2)
!= ix86_function_regparm (type2, NULL))
return 0;
- if (lookup_attribute ("no_callee_saved_registers",
- TYPE_ATTRIBUTES (type1))
- != lookup_attribute ("no_callee_saved_registers",
- TYPE_ATTRIBUTES (type2)))
+ if (ix86_type_no_callee_saved_registers_p (type1)
+ != ix86_type_no_callee_saved_registers_p (type2))
+ return 0;
+
+ /* preserve_none attribute uses a different calling convention is
+ only for 64-bit. */
+ if (TARGET_64BIT
+ && (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type1))
+ != lookup_attribute ("preserve_none",
+ TYPE_ATTRIBUTES (type2))))
return 0;
return 1;
@@ -1553,7 +1579,10 @@ ix86_function_arg_regno_p (int regno)
if (call_abi == SYSV_ABI && regno == AX_REG)
return true;
- if (call_abi == MS_ABI)
+ if (cfun
+ && cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE)
+ parm_regs = x86_64_preserve_none_int_parameter_registers;
+ else if (call_abi == MS_ABI)
parm_regs = x86_64_ms_abi_int_parameter_registers;
else
parm_regs = x86_64_int_parameter_registers;
@@ -1716,6 +1745,19 @@ ix86_asm_output_function_label (FILE *out_file, const char *fname,
}
}
+/* Output a user-defined label. In AT&T syntax, registers are prefixed
+ with %, so labels require no punctuation. In Intel syntax, registers
+ are unprefixed, so labels may clash with registers or other operators,
+ and require quoting. */
+void
+ix86_asm_output_labelref (FILE *file, const char *prefix, const char *label)
+{
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ fprintf (file, "%s%s", prefix, label);
+ else
+ fprintf (file, "\"%s%s\"", prefix, label);
+}
+
/* Implementation of call abi switching target hook. Specific to FNDECL
the specific call register sets are set. See also
ix86_conditional_register_usage for more details. */
@@ -1795,8 +1837,7 @@ ix86_init_pic_reg (void)
add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
}
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
insert_insn_on_edge (seq, entry_edge);
@@ -1823,6 +1864,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
memset (cum, 0, sizeof (*cum));
+ tree preserve_none_type;
if (fndecl)
{
target = cgraph_node::get (fndecl);
@@ -1831,12 +1873,24 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
target = target->function_symbol ();
local_info_node = cgraph_node::local_info_node (target->decl);
cum->call_abi = ix86_function_abi (target->decl);
+ preserve_none_type = TREE_TYPE (target->decl);
}
else
- cum->call_abi = ix86_function_abi (fndecl);
+ {
+ cum->call_abi = ix86_function_abi (fndecl);
+ preserve_none_type = TREE_TYPE (fndecl);
+ }
}
else
- cum->call_abi = ix86_function_type_abi (fntype);
+ {
+ cum->call_abi = ix86_function_type_abi (fntype);
+ preserve_none_type = fntype;
+ }
+ cum->preserve_none_abi
+ = (preserve_none_type
+ && (lookup_attribute ("preserve_none",
+ TYPE_ATTRIBUTES (preserve_none_type))
+ != nullptr));
cum->caller = caller;
@@ -1998,8 +2052,7 @@ type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
&& GET_MODE_INNER (mode) == innermode)
{
- if (size == 64 && (!TARGET_AVX512F || !TARGET_EVEX512)
- && !TARGET_IAMCU)
+ if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
{
static bool warnedavx512f;
static bool warnedavx512f_ret;
@@ -3410,9 +3463,15 @@ function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
break;
}
+ const int *parm_regs;
+ if (cum->preserve_none_abi)
+ parm_regs = x86_64_preserve_none_int_parameter_registers;
+ else
+ parm_regs = x86_64_int_parameter_registers;
+
return construct_container (mode, orig_mode, type, 0, cum->nregs,
cum->sse_nregs,
- &x86_64_int_parameter_registers [cum->regno],
+ &parm_regs[cum->regno],
cum->sse_regno);
}
@@ -4422,7 +4481,7 @@ ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
/* AVX512F values are returned in ZMM0 if available. */
if (size == 64)
- return !TARGET_AVX512F || !TARGET_EVEX512;
+ return !TARGET_AVX512F;
}
if (mode == XFmode)
@@ -4577,6 +4636,12 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
if (max > X86_64_REGPARM_MAX)
max = X86_64_REGPARM_MAX;
+ const int *parm_regs;
+ if (cum->preserve_none_abi)
+ parm_regs = x86_64_preserve_none_int_parameter_registers;
+ else
+ parm_regs = x86_64_int_parameter_registers;
+
for (i = cum->regno; i < max; i++)
{
mem = gen_rtx_MEM (word_mode,
@@ -4584,8 +4649,7 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
MEM_NOTRAP_P (mem) = 1;
set_mem_alias_set (mem, set);
emit_move_insn (mem,
- gen_rtx_REG (word_mode,
- x86_64_int_parameter_registers[i]));
+ gen_rtx_REG (word_mode, parm_regs[i]));
}
if (ix86_varargs_fpr_size)
@@ -4739,8 +4803,7 @@ ix86_va_start (tree valist, rtx nextarg)
start_sequence ();
emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
push_topmost_sequence ();
emit_insn_after (seq, entry_of_function ());
@@ -5377,7 +5440,7 @@ standard_sse_constant_p (rtx x, machine_mode pred_mode)
switch (GET_MODE_SIZE (mode))
{
case 64:
- if (TARGET_AVX512F && TARGET_EVEX512)
+ if (TARGET_AVX512F)
return 2;
break;
case 32:
@@ -5430,10 +5493,8 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
{
if (TARGET_AVX512VL)
return "vpxord\t%x0, %x0, %x0";
- else if (TARGET_EVEX512)
- return "vpxord\t%g0, %g0, %g0";
else
- gcc_unreachable ();
+ return "vpxord\t%g0, %g0, %g0";
}
return "vpxor\t%x0, %x0, %x0";
@@ -5449,19 +5510,15 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
{
if (TARGET_AVX512VL)
return "vxorpd\t%x0, %x0, %x0";
- else if (TARGET_EVEX512)
- return "vxorpd\t%g0, %g0, %g0";
else
- gcc_unreachable ();
+ return "vxorpd\t%g0, %g0, %g0";
}
else
{
if (TARGET_AVX512VL)
return "vpxorq\t%x0, %x0, %x0";
- else if (TARGET_EVEX512)
- return "vpxorq\t%g0, %g0, %g0";
else
- gcc_unreachable ();
+ return "vpxorq\t%g0, %g0, %g0";
}
}
return "vxorpd\t%x0, %x0, %x0";
@@ -5478,19 +5535,15 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
{
if (TARGET_AVX512VL)
return "vxorps\t%x0, %x0, %x0";
- else if (TARGET_EVEX512)
- return "vxorps\t%g0, %g0, %g0";
else
- gcc_unreachable ();
+ return "vxorps\t%g0, %g0, %g0";
}
else
{
if (TARGET_AVX512VL)
return "vpxord\t%x0, %x0, %x0";
- else if (TARGET_EVEX512)
- return "vpxord\t%g0, %g0, %g0";
else
- gcc_unreachable ();
+ return "vpxord\t%g0, %g0, %g0";
}
}
return "vxorps\t%x0, %x0, %x0";
@@ -5511,7 +5564,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
case MODE_XI:
case MODE_V8DF:
case MODE_V16SF:
- gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
+ gcc_assert (TARGET_AVX512F);
return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
case MODE_OI:
@@ -5527,10 +5580,8 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
{
if (TARGET_AVX512VL)
return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
- else if (TARGET_EVEX512)
- return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
else
- gcc_unreachable ();
+ return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
}
return (TARGET_AVX
? "vpcmpeqd\t%0, %0, %0"
@@ -5544,7 +5595,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
{
if (GET_MODE_SIZE (mode) == 64)
{
- gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
+ gcc_assert (TARGET_AVX512F);
return "vpcmpeqd\t%t0, %t0, %t0";
}
else if (GET_MODE_SIZE (mode) == 32)
@@ -5556,7 +5607,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
}
else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
{
- gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
+ gcc_assert (TARGET_AVX512F);
return "vpcmpeqd\t%x0, %x0, %x0";
}
@@ -5667,8 +5718,6 @@ ix86_get_ssemov (rtx *operands, unsigned size,
|| memory_operand (operands[1], mode))
gcc_unreachable ();
size = 64;
- /* We need TARGET_EVEX512 to move into zmm register. */
- gcc_assert (TARGET_EVEX512);
switch (type)
{
case opcode_int:
@@ -5707,7 +5756,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
: "%vmovaps");
else
opcode = (misaligned_p
- ? (TARGET_AVX512BW
+ ? (TARGET_AVX512BW && evex_reg_p
? "vmovdqu16"
: "%vmovdqu")
: "%vmovdqa");
@@ -5749,7 +5798,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
: "%vmovaps");
else
opcode = (misaligned_p
- ? (TARGET_AVX512BW
+ ? (TARGET_AVX512BW && evex_reg_p
? "vmovdqu8"
: "%vmovdqu")
: "%vmovdqa");
@@ -5769,7 +5818,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
: "%vmovaps");
else
opcode = (misaligned_p
- ? (TARGET_AVX512BW
+ ? (TARGET_AVX512BW && evex_reg_p
? "vmovdqu16"
: "%vmovdqu")
: "%vmovdqa");
@@ -6722,9 +6771,7 @@ ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
|| !frame_pointer_needed));
case TYPE_NO_CALLEE_SAVED_REGISTERS:
- return false;
-
- case TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP:
+ case TYPE_PRESERVE_NONE:
if (regno != HARD_FRAME_POINTER_REGNUM)
return false;
break;
@@ -6801,7 +6848,9 @@ ix86_nsaved_sseregs (void)
int nregs = 0;
int regno;
- if (!TARGET_64BIT_MS_ABI)
+ if (!TARGET_64BIT_MS_ABI
+ && (cfun->machine->call_saved_registers
+ != TYPE_NO_CALLER_SAVED_REGISTERS))
return 0;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
@@ -6909,6 +6958,26 @@ ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
&& (nregs + aligned) >= 3;
}
+/* Check if push/pop should be used to save/restore registers. */
+static bool
+save_regs_using_push_pop (HOST_WIDE_INT to_allocate)
+{
+ return ((!to_allocate && cfun->machine->frame.nregs <= 1)
+ || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
+ /* If static stack checking is enabled and done with probes,
+ the registers need to be saved before allocating the frame. */
+ || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+ /* If stack clash probing needs a loop, then it needs a
+ scratch register. But the returned register is only guaranteed
+ to be safe to use after register saves are complete. So if
+ stack clash protections are enabled and the allocated frame is
+ larger than the probe interval, then use pushes to save
+ callee saved registers. */
+ || (flag_stack_clash_protection
+ && !ix86_target_stack_probe ()
+ && to_allocate > get_probe_interval ()));
+}
+
/* Fill structure ix86_frame about frame of currently computed function. */
static void
@@ -6989,12 +7058,18 @@ ix86_compute_frame_layout (void)
gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
gcc_assert (preferred_alignment <= stack_alignment_needed);
- /* The only ABI saving SSE regs should be 64-bit ms_abi. */
- gcc_assert (TARGET_64BIT || !frame->nsseregs);
+ /* The only ABI saving SSE regs should be 64-bit ms_abi or with
+ no_caller_saved_registers attribue. */
+ gcc_assert (TARGET_64BIT
+ || (cfun->machine->call_saved_registers
+ == TYPE_NO_CALLER_SAVED_REGISTERS)
+ || !frame->nsseregs);
if (TARGET_64BIT && m->call_ms2sysv)
{
gcc_assert (stack_alignment_needed >= 16);
- gcc_assert (!frame->nsseregs);
+ gcc_assert ((cfun->machine->call_saved_registers
+ == TYPE_NO_CALLER_SAVED_REGISTERS)
+ || !frame->nsseregs);
}
/* For SEH we have to limit the amount of code movement into the prologue.
@@ -7193,20 +7268,7 @@ ix86_compute_frame_layout (void)
/* Size prologue needs to allocate. */
to_allocate = offset - frame->sse_reg_save_offset;
- if ((!to_allocate && frame->nregs <= 1)
- || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
- /* If static stack checking is enabled and done with probes,
- the registers need to be saved before allocating the frame. */
- || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
- /* If stack clash probing needs a loop, then it needs a
- scratch register. But the returned register is only guaranteed
- to be safe to use after register saves are complete. So if
- stack clash protections are enabled and the allocated frame is
- larger than the probe interval, then use pushes to save
- callee saved registers. */
- || (flag_stack_clash_protection
- && !ix86_target_stack_probe ()
- && to_allocate > get_probe_interval ()))
+ if (save_regs_using_push_pop (to_allocate))
frame->save_regs_using_mov = false;
if (ix86_using_red_zone ()
@@ -7664,7 +7726,9 @@ ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
{
- ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
+ /* Skip registers, already processed by shrink wrap separate. */
+ if (!cfun->machine->reg_is_wrapped_separately[regno])
+ ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
cfa_offset -= UNITS_PER_WORD;
}
}
@@ -7757,8 +7821,15 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
add_frame_related_expr = true;
}
- insn = emit_insn (gen_pro_epilogue_adjust_stack_add
- (Pmode, dest, src, addend));
+ /* Shrink wrap separate may insert prologue between TEST and JMP. In order
+ not to affect EFlags, emit add without reg clobbering. */
+ if (crtl->shrink_wrapped_separate)
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_add_nocc
+ (Pmode, dest, src, addend));
+ else
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_add
+ (Pmode, dest, src, addend));
+
if (style >= 0)
ix86_add_queued_cfa_restore_notes (insn);
@@ -7981,8 +8052,7 @@ ix86_get_drap_rtx (void)
start_sequence ();
drap_vreg = copy_to_reg (arg_ptr);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
if (!optimize)
@@ -9224,11 +9294,22 @@ ix86_expand_prologue (void)
doing this if we have to probe the stack; at least on x86_64 the
stack probe can turn into a call that clobbers a red zone location. */
else if (ix86_using_red_zone ()
- && (! TARGET_STACK_PROBE
- || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
+ && (! TARGET_STACK_PROBE
+ || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
{
+ HOST_WIDE_INT allocate_offset;
+ if (crtl->shrink_wrapped_separate)
+ {
+ allocate_offset = m->fs.sp_offset - frame.stack_pointer_offset;
+
+ /* Adjust the total offset at the beginning of the function. */
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (allocate_offset), -1,
+ m->fs.cfa_reg == stack_pointer_rtx);
+ m->fs.sp_offset = cfun->machine->frame.stack_pointer_offset;
+ }
+
ix86_emit_save_regs_using_mov (frame.reg_save_offset);
- cfun->machine->red_zone_used = true;
int_registers_saved = true;
}
}
@@ -9806,30 +9887,35 @@ ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
{
- rtx reg = gen_rtx_REG (word_mode, regno);
- rtx mem;
- rtx_insn *insn;
-
- mem = choose_baseaddr (cfa_offset, NULL);
- mem = gen_frame_mem (word_mode, mem);
- insn = emit_move_insn (reg, mem);
- if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
+ /* Skip registers, already processed by shrink wrap separate. */
+ if (!cfun->machine->reg_is_wrapped_separately[regno])
{
- /* Previously we'd represented the CFA as an expression
- like *(%ebp - 8). We've just popped that value from
- the stack, which means we need to reset the CFA to
- the drap register. This will remain until we restore
- the stack pointer. */
- add_reg_note (insn, REG_CFA_DEF_CFA, reg);
- RTX_FRAME_RELATED_P (insn) = 1;
+ rtx reg = gen_rtx_REG (word_mode, regno);
+ rtx mem;
+ rtx_insn *insn;
- /* This means that the DRAP register is valid for addressing. */
- m->fs.drap_valid = true;
- }
- else
- ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
+ mem = choose_baseaddr (cfa_offset, NULL);
+ mem = gen_frame_mem (word_mode, mem);
+ insn = emit_move_insn (reg, mem);
+ if (m->fs.cfa_reg == crtl->drap_reg
+ && regno == REGNO (crtl->drap_reg))
+ {
+ /* Previously we'd represented the CFA as an expression
+ like *(%ebp - 8). We've just popped that value from
+ the stack, which means we need to reset the CFA to
+ the drap register. This will remain until we restore
+ the stack pointer. */
+ add_reg_note (insn, REG_CFA_DEF_CFA, reg);
+ RTX_FRAME_RELATED_P (insn) = 1;
+
+ /* DRAP register is valid for addressing. */
+ m->fs.drap_valid = true;
+ }
+ else
+ ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
+ }
cfa_offset -= UNITS_PER_WORD;
}
}
@@ -10108,10 +10194,11 @@ ix86_expand_epilogue (int style)
less work than reloading sp and popping the register. */
else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
restore_regs_via_mov = true;
- else if (TARGET_EPILOGUE_USING_MOVE
- && cfun->machine->use_fast_prologue_epilogue
- && (frame.nregs > 1
- || m->fs.sp_offset != reg_save_offset))
+ else if (crtl->shrink_wrapped_separate
+ || (TARGET_EPILOGUE_USING_MOVE
+ && cfun->machine->use_fast_prologue_epilogue
+ && (frame.nregs > 1
+ || m->fs.sp_offset != reg_save_offset)))
restore_regs_via_mov = true;
else if (frame_pointer_needed
&& !frame.nregs
@@ -10125,6 +10212,9 @@ ix86_expand_epilogue (int style)
else
restore_regs_via_mov = false;
+ if (crtl->shrink_wrapped_separate)
+ gcc_assert (restore_regs_via_mov);
+
if (restore_regs_via_mov || frame.nsseregs)
{
/* Ensure that the entire register save area is addressable via
@@ -10177,6 +10267,7 @@ ix86_expand_epilogue (int style)
gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
gcc_assert (!crtl->drap_reg);
gcc_assert (!frame.nregs);
+ gcc_assert (!crtl->shrink_wrapped_separate);
}
else if (restore_regs_via_mov)
{
@@ -10191,6 +10282,8 @@ ix86_expand_epilogue (int style)
rtx sa = EH_RETURN_STACKADJ_RTX;
rtx_insn *insn;
+ gcc_assert (!crtl->shrink_wrapped_separate);
+
/* Stack realignment doesn't work with eh_return. */
if (crtl->stack_realign_needed)
sorry ("Stack realignment not supported with "
@@ -11422,7 +11515,7 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x)
case E_OImode:
case E_XImode:
if (!standard_sse_constant_p (x, mode)
- && GET_MODE_SIZE (TARGET_AVX512F && TARGET_EVEX512
+ && GET_MODE_SIZE (TARGET_AVX512F
? XImode
: (TARGET_AVX
? OImode
@@ -12469,13 +12562,13 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
if (TARGET_64BIT)
{
rtx rax = gen_rtx_REG (Pmode, AX_REG);
+ rtx rdi = gen_rtx_REG (Pmode, DI_REG);
rtx_insn *insns;
start_sequence ();
emit_call_insn
- (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr));
- insns = get_insns ();
- end_sequence ();
+ (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr, rdi));
+ insns = end_sequence ();
if (GET_MODE (x) != Pmode)
x = gen_rtx_ZERO_EXTEND (Pmode, x);
@@ -12529,8 +12622,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
start_sequence ();
emit_call_insn
(gen_tls_local_dynamic_base_64 (Pmode, rax, caddr));
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
/* Attach a unique REG_EQUAL, to allow the RTL optimizers to
share the LD_BASE result with other LD model accesses. */
@@ -20321,14 +20413,10 @@ ix86_vectorize_builtin_scatter (const_tree vectype,
{
bool si;
enum ix86_builtins code;
- const machine_mode mode = TYPE_MODE (TREE_TYPE (vectype));
if (!TARGET_AVX512F)
return NULL_TREE;
- if (!TARGET_EVEX512 && GET_MODE_SIZE (mode) == 64)
- return NULL_TREE;
-
if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
? !TARGET_USE_SCATTER_2PARTS
: (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
@@ -21450,7 +21538,7 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
- any of 512-bit wide vector mode
- any scalar mode. */
if (TARGET_AVX512F
- && ((VALID_AVX512F_REG_OR_XI_MODE (mode) && TARGET_EVEX512)
+ && ((VALID_AVX512F_REG_OR_XI_MODE (mode))
|| VALID_AVX512F_SCALAR_MODE (mode)))
return true;
@@ -21692,7 +21780,7 @@ ix86_set_reg_reg_cost (machine_mode mode)
case MODE_VECTOR_INT:
case MODE_VECTOR_FLOAT:
- if ((TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
+ if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
|| (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
|| (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
|| (TARGET_SSE && VALID_SSE_REG_MODE (mode))
@@ -22148,9 +22236,9 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
/* Handling different vternlog variants. */
if ((GET_MODE_SIZE (mode) == 64
- ? (TARGET_AVX512F && TARGET_EVEX512)
+ ? TARGET_AVX512F
: (TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)))
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256)))
&& GET_MODE_SIZE (mode) >= 16
&& outer_code_i == SET
&& ternlog_operand (x, mode))
@@ -22499,8 +22587,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
{
/* (ior (not ...) ...) can be a single insn in AVX512. */
if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
- && ((TARGET_EVEX512
- && GET_MODE_SIZE (mode) == 64)
+ && (GET_MODE_SIZE (mode) == 64
|| (TARGET_AVX512VL
&& (GET_MODE_SIZE (mode) == 32
|| GET_MODE_SIZE (mode) == 16))))
@@ -22591,8 +22678,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
/* (and (not ...) (not ...)) can be a single insn in AVX512. */
if (GET_CODE (right) == NOT && TARGET_AVX512F
- && ((TARGET_EVEX512
- && GET_MODE_SIZE (mode) == 64)
+ && (GET_MODE_SIZE (mode) == 64
|| (TARGET_AVX512VL
&& (GET_MODE_SIZE (mode) == 32
|| GET_MODE_SIZE (mode) == 16))))
@@ -22662,8 +22748,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
{
/* (not (xor ...)) can be a single insn in AVX512. */
if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
- && ((TARGET_EVEX512
- && GET_MODE_SIZE (mode) == 64)
+ && (GET_MODE_SIZE (mode) == 64
|| (TARGET_AVX512VL
&& (GET_MODE_SIZE (mode) == 32
|| GET_MODE_SIZE (mode) == 16))))
@@ -22952,7 +23037,17 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
}
/* This is masked instruction, assume the same cost,
as nonmasked variant. */
- else if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
+ else if (TARGET_AVX512F
+ && (register_operand (mask, GET_MODE (mask))
+ /* Redunduant clean up of high bits for kmask with VL=2/4
+ .i.e (vec_merge op0, op1, (and op3 15)). */
+ || (GET_CODE (mask) == AND
+ && register_operand (XEXP (mask, 0), GET_MODE (mask))
+ && CONST_INT_P (XEXP (mask, 1))
+ && ((INTVAL (XEXP (mask, 1)) == 3
+ && GET_MODE_NUNITS (mode) == 2)
+ || (INTVAL (XEXP (mask, 1)) == 15
+ && GET_MODE_NUNITS (mode) == 4)))))
{
*total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
+ rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
@@ -23262,7 +23357,9 @@ x86_this_parameter (tree function)
{
const int *parm_regs;
- if (ix86_function_type_abi (type) == MS_ABI)
+ if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type)))
+ parm_regs = x86_64_preserve_none_int_parameter_registers;
+ else if (ix86_function_type_abi (type) == MS_ABI)
parm_regs = x86_64_ms_abi_int_parameter_registers;
else
parm_regs = x86_64_int_parameter_registers;
@@ -24575,7 +24672,7 @@ ix86_vector_mode_supported_p (machine_mode mode)
return true;
if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
return true;
- if (TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
+ if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
return true;
if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
&& VALID_MMX_REG_MODE (mode))
@@ -24823,8 +24920,7 @@ ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
}
}
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
if (saw_asm_flag)
return seq;
@@ -25200,12 +25296,18 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
/* One vinserti128 for combining two SSE vectors for AVX256. */
else if (GET_MODE_BITSIZE (mode) == 256)
return ((n - 2) * ix86_cost->sse_op
- + ix86_vec_cost (mode, ix86_cost->addss));
+ + ix86_vec_cost (mode, ix86_cost->sse_op));
/* One vinserti64x4 and two vinserti128 for combining SSE
and AVX256 vectors to AVX512. */
else if (GET_MODE_BITSIZE (mode) == 512)
- return ((n - 4) * ix86_cost->sse_op
- + 3 * ix86_vec_cost (mode, ix86_cost->addss));
+ {
+ machine_mode half_mode
+ = mode_for_vector (GET_MODE_INNER (mode),
+ GET_MODE_NUNITS (mode) / 2).require ();
+ return ((n - 4) * ix86_cost->sse_op
+ + 2 * ix86_vec_cost (half_mode, ix86_cost->sse_op)
+ + ix86_vec_cost (mode, ix86_cost->sse_op));
+ }
gcc_unreachable ();
}
@@ -25373,7 +25475,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
switch (mode)
{
case E_QImode:
- if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
return V64QImode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V32QImode;
@@ -25381,7 +25483,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
return V16QImode;
case E_HImode:
- if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
return V32HImode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V16HImode;
@@ -25389,7 +25491,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
return V8HImode;
case E_SImode:
- if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
return V16SImode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V8SImode;
@@ -25397,7 +25499,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
return V4SImode;
case E_DImode:
- if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
return V8DImode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V4DImode;
@@ -25411,16 +25513,15 @@ ix86_preferred_simd_mode (scalar_mode mode)
{
if (TARGET_PREFER_AVX128)
return V8HFmode;
- else if (TARGET_PREFER_AVX256 || !TARGET_EVEX512)
+ else if (TARGET_PREFER_AVX256)
return V16HFmode;
}
- if (TARGET_EVEX512)
- return V32HFmode;
+ return V32HFmode;
}
return word_mode;
case E_BFmode:
- if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
return V32BFmode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V16BFmode;
@@ -25428,7 +25529,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
return V8BFmode;
case E_SFmode:
- if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
return V16SFmode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V8SFmode;
@@ -25436,7 +25537,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
return V4SFmode;
case E_DFmode:
- if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
return V8DFmode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V4DFmode;
@@ -25456,13 +25557,13 @@ ix86_preferred_simd_mode (scalar_mode mode)
static unsigned int
ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
{
- if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
{
modes->safe_push (V64QImode);
modes->safe_push (V32QImode);
modes->safe_push (V16QImode);
}
- else if (TARGET_AVX512F && TARGET_EVEX512 && all)
+ else if (TARGET_AVX512F && all)
{
modes->safe_push (V32QImode);
modes->safe_push (V16QImode);
@@ -25500,7 +25601,7 @@ ix86_get_mask_mode (machine_mode data_mode)
unsigned elem_size = vector_size / nunits;
/* Scalar mask case. */
- if ((TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64)
+ if ((TARGET_AVX512F && vector_size == 64)
|| (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))
/* AVX512FP16 only supports vector comparison
to kmask for _Float16. */
@@ -26064,7 +26165,22 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
else
{
m_num_gpr_needed[where]++;
- stmt_cost += COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
+
+ int cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
+
+ /* For integer construction, the number of actual GPR -> XMM
+ moves will be somewhere between 0 and n.
+ We do not have very good idea about actual number, since
+ the source may be a constant, memory or a chain of
+ instructions that will be later converted by
+ scalar-to-vector pass. */
+ if (kind == vec_construct
+ && GET_MODE_BITSIZE (mode) == 256)
+ cost *= 2;
+ else if (kind == vec_construct
+ && GET_MODE_BITSIZE (mode) == 512)
+ cost *= 3;
+ stmt_cost += cost;
}
}
}
@@ -26156,14 +26272,10 @@ ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
/* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both
a AVX2 and a SSE epilogue for AVX512 vectorized loops. */
if (loop_vinfo
+ && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+ && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32
&& ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
- {
- if (GET_MODE_SIZE (loop_vinfo->vector_mode) == 64)
- m_suggested_epilogue_mode = V32QImode;
- else if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
- && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32)
- m_suggested_epilogue_mode = V16QImode;
- }
+ m_suggested_epilogue_mode = V16QImode;
/* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger
enable a 64bit SSE epilogue. */
if (loop_vinfo
@@ -26291,7 +26403,7 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
{
/* If the function isn't exported, we can pick up just one ISA
for the clones. */
- if (TARGET_AVX512F && TARGET_EVEX512)
+ if (TARGET_AVX512F)
clonei->vecsize_mangle = 'e';
else if (TARGET_AVX2)
clonei->vecsize_mangle = 'd';
@@ -26383,17 +26495,17 @@ ix86_simd_clone_usable (struct cgraph_node *node, machine_mode)
return -1;
if (!TARGET_AVX)
return 0;
- return (TARGET_AVX512F && TARGET_EVEX512) ? 3 : TARGET_AVX2 ? 2 : 1;
+ return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
case 'c':
if (!TARGET_AVX)
return -1;
- return (TARGET_AVX512F && TARGET_EVEX512) ? 2 : TARGET_AVX2 ? 1 : 0;
+ return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
case 'd':
if (!TARGET_AVX2)
return -1;
- return (TARGET_AVX512F && TARGET_EVEX512) ? 1 : 0;
+ return TARGET_AVX512F ? 1 : 0;
case 'e':
- if (!TARGET_AVX512F || !TARGET_EVEX512)
+ if (!TARGET_AVX512F)
return -1;
return 0;
default:
@@ -28065,6 +28177,195 @@ ix86_cannot_copy_insn_p (rtx_insn *insn)
#undef TARGET_DOCUMENTATION_NAME
#define TARGET_DOCUMENTATION_NAME "x86"
+/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
+sbitmap
+ix86_get_separate_components (void)
+{
+ HOST_WIDE_INT offset, to_allocate;
+ sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
+ bitmap_clear (components);
+ struct machine_function *m = cfun->machine;
+
+ offset = m->frame.stack_pointer_offset;
+ to_allocate = offset - m->frame.sse_reg_save_offset;
+
+ /* Shrink wrap separate uses MOV, which means APX PPX cannot be used.
+ Experiments show that APX PPX can speed up the prologue. If the function
+ does not exit early during actual execution, then using APX PPX is faster.
+ If the function always exits early during actual execution, then shrink
+ wrap separate reduces the number of MOV (PUSH/POP) instructions actually
+ executed, thus speeding up execution.
+ foo:
+ movl $1, %eax
+ testq %rdi, %rdi
+ jne.L60
+ ret ---> early return.
+ .L60:
+ subq $88, %rsp ---> belong to prologue.
+ xorl %eax, %eax
+ movq %rbx, 40 (%rsp) ---> belong to prologue.
+ movq 8 (%rdi), %rbx
+ movq %rbp, 48 (%rsp) ---> belong to prologue.
+ movq %rdi, %rbp
+ testq %rbx, %rbx
+ jne.L61
+ movq 40 (%rsp), %rbx
+ movq 48 (%rsp), %rbp
+ addq $88, %rsp
+ ret
+ .L61:
+ movq %r12, 56 (%rsp) ---> belong to prologue.
+ movq %r13, 64 (%rsp) ---> belong to prologue.
+ movq %r14, 72 (%rsp) ---> belong to prologue.
+ ... ...
+
+ Disable shrink wrap separate when PPX is enabled. */
+ if ((TARGET_APX_PPX && !crtl->calls_eh_return)
+ || cfun->machine->func_type != TYPE_NORMAL
+ || TARGET_SEH
+ || crtl->stack_realign_needed
+ || m->call_ms2sysv)
+ return components;
+
+ /* Since shrink wrapping separate uses MOV instead of PUSH/POP.
+ Disable shrink wrap separate when MOV is prohibited. */
+ if (save_regs_using_push_pop (to_allocate))
+ return components;
+
+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
+ {
+ /* Skip registers with large offsets, where a pseudo may be needed. */
+ if (IN_RANGE (offset, -0x8000, 0x7fff))
+ bitmap_set_bit (components, regno);
+ offset += UNITS_PER_WORD;
+ }
+
+ /* Don't mess with the following registers. */
+ if (frame_pointer_needed)
+ bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
+
+ if (crtl->drap_reg)
+ bitmap_clear_bit (components, REGNO (crtl->drap_reg));
+
+ if (pic_offset_table_rtx)
+ bitmap_clear_bit (components, REAL_PIC_OFFSET_TABLE_REGNUM);
+
+ return components;
+}
+
+/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
+sbitmap
+ix86_components_for_bb (basic_block bb)
+{
+ bitmap in = DF_LIVE_IN (bb);
+ bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
+ bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
+
+ sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
+ bitmap_clear (components);
+
+ function_abi_aggregator callee_abis;
+ rtx_insn *insn;
+ FOR_BB_INSNS (bb, insn)
+ if (CALL_P (insn))
+ callee_abis.note_callee_abi (insn_callee_abi (insn));
+ HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
+
+ /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (!fixed_regs[regno]
+ && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
+ || bitmap_bit_p (in, regno)
+ || bitmap_bit_p (gen, regno)
+ || bitmap_bit_p (kill, regno)))
+ bitmap_set_bit (components, regno);
+
+ return components;
+}
+
+/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
+void
+ix86_disqualify_components (sbitmap, edge, sbitmap, bool)
+{
+ /* Nothing to do for x86. */
+}
+
+/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
+void
+ix86_emit_prologue_components (sbitmap components)
+{
+ HOST_WIDE_INT cfa_offset;
+ struct machine_function *m = cfun->machine;
+
+ cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
+ - m->frame.stack_pointer_offset;
+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
+ {
+ if (bitmap_bit_p (components, regno))
+ ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
+ cfa_offset -= UNITS_PER_WORD;
+ }
+}
+
+/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
+void
+ix86_emit_epilogue_components (sbitmap components)
+{
+ HOST_WIDE_INT cfa_offset;
+ struct machine_function *m = cfun->machine;
+ cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
+ - m->frame.stack_pointer_offset;
+
+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
+ {
+ if (bitmap_bit_p (components, regno))
+ {
+ rtx reg = gen_rtx_REG (word_mode, regno);
+ rtx mem;
+ rtx_insn *insn;
+
+ mem = choose_baseaddr (cfa_offset, NULL);
+ mem = gen_frame_mem (word_mode, mem);
+ insn = emit_move_insn (reg, mem);
+
+ RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_CFA_RESTORE, reg);
+ }
+ cfa_offset -= UNITS_PER_WORD;
+ }
+}
+
+/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
+void
+ix86_set_handled_components (sbitmap components)
+{
+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (bitmap_bit_p (components, regno))
+ {
+ cfun->machine->reg_is_wrapped_separately[regno] = true;
+ cfun->machine->use_fast_prologue_epilogue = true;
+ cfun->machine->frame.save_regs_using_mov = true;
+ }
+}
+
+#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
+#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components
+#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
+#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB ix86_components_for_bb
+#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
+#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS ix86_disqualify_components
+#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
+ ix86_emit_prologue_components
+#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
+ ix86_emit_epilogue_components
+#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
+#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS ix86_set_handled_components
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-i386.h"