aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386/i386.cc
diff options
context:
space:
mode:
authorHongyu Wang <hongyu.wang@intel.com>2023-11-16 15:18:07 +0800
committerHongyu Wang <hongyu.wang@intel.com>2023-11-21 16:00:53 +0800
commit7ad308bd4cca871e7509a5eeaef83ee68678820f (patch)
tree4ded873e6bb1fcef57e3440493040b69ade7f521 /gcc/config/i386/i386.cc
parentf48244fad2afba7a0064cca2d979566cf0518554 (diff)
downloadgcc-7ad308bd4cca871e7509a5eeaef83ee68678820f.zip
gcc-7ad308bd4cca871e7509a5eeaef83ee68678820f.tar.gz
gcc-7ad308bd4cca871e7509a5eeaef83ee68678820f.tar.bz2
[APX PPX] Support Intel APX PPX
PPX stands for Push-Pop Acceleration. PUSH/PUSH2 and its corresponding POP can be marked with a 1-bit hint to indicate that the POP reads the value written by the PUSH from the stack. The processor tracks these marked instructions internally and fast-forwards register data between matching PUSH and POP instructions, without going through memory or through the training loop of the Fast Store Forwarding Predictor (FSFP). This feature can also be adopted to PUSH2/POP2. For GCC, we emit explicit suffix 'p' (paired) to indicate the push/pop pair are marked with PPX hint. To separate form original push/pop, we add an UNSPEC on top of those PUSH/POP patterns. In the first implementation we only emit them under prologue/epilogue when saving/restoring callee-saved registers to make sure push/pop are paired. So an extra flag was added to check if PPX insns can be emitted for those register save/restore interfaces. The PPX hint is purely a performance hint. If the 'p' suffix is not emitted for paired push/pop, the PPX optimization will be disabled, while program sematic will not be affected at all. gcc/ChangeLog: * config/i386/i386-expand.h (gen_push): Add default bool parameter. (gen_pop): Likewise. * config/i386/i386-opts.h (enum apx_features): Add apx_ppx, add it to apx_all. * config/i386/i386.cc (ix86_emit_restore_reg_using_pop): Add ppx_p parameter for function declaration. (gen_push2): Add ppx_p parameter, emit push2p if ppx_p is true. (gen_push): Likewise. (ix86_emit_restore_reg_using_pop2): Likewise for pop2p. (ix86_emit_save_regs): Emit pushp/push2p under TARGET_APX_PPX. (ix86_emit_restore_reg_using_pop): Add ppx_p, emit popp insn and adjust cfi when ppx_p is ture. (ix86_emit_restore_reg_using_pop2): Add ppx_p and parse to its callee. (ix86_emit_restore_regs_using_pop2): Likewise. (ix86_expand_epilogue): Parse TARGET_APX_PPX to ix86_emit_restore_reg_using_pop. * config/i386/i386.h (TARGET_APX_PPX): New. * config/i386/i386.md (UNSPEC_APX_PPX): New unspec. (pushp_di): New define_insn. (popp_di): Likewise. (push2p_di): Likewise. (pop2p_di): Likewise. * config/i386/i386.opt: Add apx_ppx enum. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-interrupt-1.c: Adjust option to restrict them under certain subfeatures. * gcc.target/i386/apx-push2pop2-1.c: Likewise. * gcc.target/i386/apx-push2pop2_force_drap-1.c: Likewise. * gcc.target/i386/apx-push2pop2_interrupt-1.c: Likewise. * gcc.target/i386/apx-ppx-1.c: New test.
Diffstat (limited to 'gcc/config/i386/i386.cc')
-rw-r--r--gcc/config/i386/i386.cc70
1 files changed, 42 insertions, 28 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 683ac64..bd34058 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -105,7 +105,7 @@ along with GCC; see the file COPYING3. If not see
static rtx legitimize_dllimport_symbol (rtx, bool);
static rtx legitimize_pe_coff_extern_decl (rtx, bool);
static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
-static void ix86_emit_restore_reg_using_pop (rtx);
+static void ix86_emit_restore_reg_using_pop (rtx, bool = false);
#ifndef CHECK_STACK_LIMIT
@@ -6448,7 +6448,7 @@ output_set_got (rtx dest, rtx label)
/* Generate an "push" pattern for input ARG. */
rtx
-gen_push (rtx arg)
+gen_push (rtx arg, bool ppx_p)
{
struct machine_function *m = cfun->machine;
@@ -6459,10 +6459,10 @@ gen_push (rtx arg)
if (REG_P (arg) && GET_MODE (arg) != word_mode)
arg = gen_rtx_REG (word_mode, REGNO (arg));
- return gen_rtx_SET (gen_rtx_MEM (word_mode,
- gen_rtx_PRE_DEC (Pmode,
- stack_pointer_rtx)),
- arg);
+ rtx stack = gen_rtx_MEM (word_mode,
+ gen_rtx_PRE_DEC (Pmode,
+ stack_pointer_rtx));
+ return ppx_p ? gen_pushp_di (stack, arg) : gen_rtx_SET (stack, arg);
}
rtx
@@ -6486,15 +6486,16 @@ gen_pushfl (void)
/* Generate an "pop" pattern for input ARG. */
rtx
-gen_pop (rtx arg)
+gen_pop (rtx arg, bool ppx_p)
{
if (REG_P (arg) && GET_MODE (arg) != word_mode)
arg = gen_rtx_REG (word_mode, REGNO (arg));
- return gen_rtx_SET (arg,
- gen_rtx_MEM (word_mode,
- gen_rtx_POST_INC (Pmode,
- stack_pointer_rtx)));
+ rtx stack = gen_rtx_MEM (word_mode,
+ gen_rtx_POST_INC (Pmode,
+ stack_pointer_rtx));
+
+ return ppx_p ? gen_popp_di (arg, stack) : gen_rtx_SET (arg, stack);
}
rtx
@@ -6512,7 +6513,7 @@ gen_popfl (void)
/* Generate a "push2" pattern for input ARG. */
rtx
-gen_push2 (rtx mem, rtx reg1, rtx reg2)
+gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false)
{
struct machine_function *m = cfun->machine;
const int offset = UNITS_PER_WORD * 2;
@@ -6527,7 +6528,8 @@ gen_push2 (rtx mem, rtx reg1, rtx reg2)
if (REG_P (reg2) && GET_MODE (reg2) != word_mode)
reg2 = gen_rtx_REG (word_mode, REGNO (reg2));
- return gen_push2_di (mem, reg1, reg2);
+ return ppx_p ? gen_push2p_di (mem, reg1, reg2):
+ gen_push2_di (mem, reg1, reg2);
}
/* Return >= 0 if there is an unused call-clobbered register available
@@ -7369,7 +7371,8 @@ ix86_emit_save_regs (void)
for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
{
- insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
+ insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
+ TARGET_APX_PPX));
RTX_FRAME_RELATED_P (insn) = 1;
}
}
@@ -7399,7 +7402,8 @@ ix86_emit_save_regs (void)
gen_rtx_REG (word_mode,
regno_list[0]),
gen_rtx_REG (word_mode,
- regno_list[1])));
+ regno_list[1]),
+ TARGET_APX_PPX));
RTX_FRAME_RELATED_P (insn) = 1;
rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
@@ -7431,7 +7435,8 @@ ix86_emit_save_regs (void)
}
else
{
- insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
+ insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
+ TARGET_APX_PPX));
RTX_FRAME_RELATED_P (insn) = 1;
aligned = true;
}
@@ -7439,7 +7444,8 @@ ix86_emit_save_regs (void)
if (loaded_regnum == 1)
{
insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
- regno_list[0])));
+ regno_list[0]),
+ TARGET_APX_PPX));
RTX_FRAME_RELATED_P (insn) = 1;
}
}
@@ -9268,13 +9274,13 @@ ix86_expand_prologue (void)
emit_insn (gen_prologue_use (stack_pointer_rtx));
}
-/* Emit code to restore REG using a POP insn. */
+/* Emit code to restore REG using a POP or POPP insn. */
static void
-ix86_emit_restore_reg_using_pop (rtx reg)
+ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p)
{
struct machine_function *m = cfun->machine;
- rtx_insn *insn = emit_insn (gen_pop (reg));
+ rtx_insn *insn = emit_insn (gen_pop (reg, ppx_p));
ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
m->fs.sp_offset -= UNITS_PER_WORD;
@@ -9328,14 +9334,19 @@ ix86_emit_restore_reg_using_pop (rtx reg)
/* Emit code to restore REG using a POP2 insn. */
static void
-ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2)
+ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false)
{
struct machine_function *m = cfun->machine;
const int offset = UNITS_PER_WORD * 2;
+ rtx_insn *insn;
rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode,
stack_pointer_rtx));
- rtx_insn *insn = emit_insn (gen_pop2_di (reg1, mem, reg2));
+
+ if (ppx_p)
+ insn = emit_insn (gen_pop2p_di (reg1, mem, reg2));
+ else
+ insn = emit_insn (gen_pop2_di (reg1, mem, reg2));
RTX_FRAME_RELATED_P (insn) = 1;
@@ -9397,13 +9408,13 @@ ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2)
/* Emit code to restore saved registers using POP insns. */
static void
-ix86_emit_restore_regs_using_pop (void)
+ix86_emit_restore_regs_using_pop (bool ppx_p)
{
unsigned int regno;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
- ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
+ ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), ppx_p);
}
/* Emit code to restore saved registers using POP2 insns. */
@@ -9432,20 +9443,23 @@ ix86_emit_restore_regs_using_pop2 (void)
ix86_emit_restore_reg_using_pop2 (gen_rtx_REG (word_mode,
regno_list[0]),
gen_rtx_REG (word_mode,
- regno_list[1]));
+ regno_list[1]),
+ TARGET_APX_PPX);
loaded_regnum = 0;
regno_list[0] = regno_list[1] = -1;
}
}
else
{
- ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
+ ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno),
+ TARGET_APX_PPX);
aligned = true;
}
}
if (loaded_regnum == 1)
- ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0]));
+ ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0]),
+ TARGET_APX_PPX);
}
/* Emit code and notes for the LEAVE instruction. If insn is non-null,
@@ -9990,7 +10004,7 @@ ix86_expand_epilogue (int style)
if (TARGET_APX_PUSH2POP2 && m->func_type == TYPE_NORMAL)
ix86_emit_restore_regs_using_pop2 ();
else
- ix86_emit_restore_regs_using_pop ();
+ ix86_emit_restore_regs_using_pop (TARGET_APX_PPX);
}
/* If we used a stack pointer and haven't already got rid of it,