aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2010-09-17 21:07:09 +0000
committerH.J. Lu <hjl@gcc.gnu.org>2010-09-17 14:07:09 -0700
commite7ed95a24d005410ee3fb772c97189f43dadaee7 (patch)
treede5a48ea5e9cadf018a942934426371b6d2cf5ab
parent8cab7c1398a6275c0b8e4132f3c2d1e22181c00b (diff)
downloadgcc-e7ed95a24d005410ee3fb772c97189f43dadaee7.zip
gcc-e7ed95a24d005410ee3fb772c97189f43dadaee7.tar.gz
gcc-e7ed95a24d005410ee3fb772c97189f43dadaee7.tar.bz2
Pad short functions with NOPs for Atom.
gcc/ 2010-09-17 H.J. Lu <hongjiu.lu@intel.com> Richard Henderson <rth@redhat.com> * config/i386/i386.c (initial_ix86_tune_features): Add X86_TUNE_PAD_SHORT_FUNCTION. (ix86_code_end): Pad with 8 NOPs for TARGET_PAD_SHORT_FUNCTION. (ix86_count_insn): New. (ix86_pad_short_function): Likewise. (ix86_reorg): Support TARGET_PAD_SHORT_FUNCTION. * config/i386/i386.h (ix86_tune_indices): Add X86_TUNE_PAD_SHORT_FUNCTION. (TARGET_PAD_SHORT_FUNCTION): New. * config/i386/i386.md (UNSPEC_NOPS): New. (nops): Likewise. gcc/testsuite/ 2010-09-17 H.J. Lu <hongjiu.lu@intel.com> * gcc.target/i386/pad-1.c: New. * gcc.target/i386/pad-2.c: Likewise. * gcc.target/i386/pad-3.c: Likewise. * gcc.target/i386/pad-4.c: Likewise. * gcc.target/i386/pad-5a.c: Likewise. * gcc.target/i386/pad-5b.c: Likewise. * gcc.target/i386/pad-6a.c: Likewise. * gcc.target/i386/pad-6b.c: Likewise. * gcc.target/i386/pad-7.c: Likewise. * gcc.target/i386/pad-8.c: Likewise. * gcc.target/i386/pad-9.c: Likewise. * gcc.target/i386/pad-10.c: Likewise. Co-Authored-By: Richard Henderson <rth@redhat.com> From-SVN: r164379
-rw-r--r--gcc/ChangeLog17
-rw-r--r--gcc/config/i386/i386.c126
-rw-r--r--gcc/config/i386/i386.h3
-rw-r--r--gcc/config/i386/i386.md34
-rw-r--r--gcc/testsuite/ChangeLog15
-rw-r--r--gcc/testsuite/gcc.target/i386/pad-1.c9
-rw-r--r--gcc/testsuite/gcc.target/i386/pad-10.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/pad-2.c9
-rw-r--r--gcc/testsuite/gcc.target/i386/pad-3.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/pad-4.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/pad-5a.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/pad-5b.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/pad-6a.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/pad-6b.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/pad-7.c11
-rw-r--r--gcc/testsuite/gcc.target/i386/pad-8.c11
-rw-r--r--gcc/testsuite/gcc.target/i386/pad-9.c15
17 files changed, 343 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5301e8d..dcfc7e6 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,4 +1,21 @@
2010-09-17 H.J. Lu <hongjiu.lu@intel.com>
+ Richard Henderson <rth@redhat.com>
+
+ * config/i386/i386.c (initial_ix86_tune_features): Add
+ X86_TUNE_PAD_SHORT_FUNCTION.
+ (ix86_code_end): Pad with 8 NOPs for TARGET_PAD_SHORT_FUNCTION.
+ (ix86_count_insn): New.
+ (ix86_pad_short_function): Likewise.
+ (ix86_reorg): Support TARGET_PAD_SHORT_FUNCTION.
+
+ * config/i386/i386.h (ix86_tune_indices): Add
+ X86_TUNE_PAD_SHORT_FUNCTION.
+ (TARGET_PAD_SHORT_FUNCTION): New.
+
+ * config/i386/i386.md (UNSPEC_NOPS): New.
+ (nops): Likewise.
+
+2010-09-17 H.J. Lu <hongjiu.lu@intel.com>
PR middle-end/45234
* calls.c (expand_call): Make sure that all variable sized
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index abec057..d9f9237 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1576,6 +1576,9 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
/* X86_TUNE_PAD_RETURNS */
m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
+ /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
+ m_ATOM,
+
/* X86_TUNE_EXT_80387_CONSTANTS */
m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
| m_CORE2 | m_GENERIC,
@@ -8021,6 +8024,11 @@ ix86_code_end (void)
xops[0] = gen_rtx_REG (Pmode, regno);
xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
+ /* Pad stack IP move with 4 instructions. 2 NOPs count as 1
+ instruction. */
+ if (TARGET_PAD_SHORT_FUNCTION)
+ output_asm_insn ("nop; nop; nop; nop; nop; nop; nop; nop",
+ xops);
output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
output_asm_insn ("ret", xops);
final_end_function ();
@@ -27882,6 +27890,120 @@ ix86_pad_returns (void)
}
}
+/* Count the minimum number of instructions in BB. Return 4 if the
+ number of instructions >= 4. */
+
+static int
+ix86_count_insn_bb (basic_block bb)
+{
+ rtx insn;
+ int insn_count = 0;
+
+ /* Count number of instructions in this block. Return 4 if the number
+ of instructions >= 4. */
+ FOR_BB_INSNS (bb, insn)
+ {
+ /* Only happen in exit blocks. */
+ if (JUMP_P (insn)
+ && GET_CODE (PATTERN (insn)) == RETURN)
+ break;
+
+ if (NONDEBUG_INSN_P (insn)
+ && GET_CODE (PATTERN (insn)) != USE
+ && GET_CODE (PATTERN (insn)) != CLOBBER)
+ {
+ insn_count++;
+ if (insn_count >= 4)
+ return insn_count;
+ }
+ }
+
+ return insn_count;
+}
+
+
+/* Count the minimum number of instructions in code path in BB.
+ Return 4 if the number of instructions >= 4. */
+
+static int
+ix86_count_insn (basic_block bb)
+{
+ edge e;
+ edge_iterator ei;
+ int min_prev_count;
+
+ /* Only bother counting instructions along paths with no
+ more than 2 basic blocks between entry and exit. Given
+ that BB has an edge to exit, determine if a predecessor
+ of BB has an edge from entry. If so, compute the number
+ of instructions in the predecessor block. If there
+ happen to be multiple such blocks, compute the minimum. */
+ min_prev_count = 4;
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ {
+ edge prev_e;
+ edge_iterator prev_ei;
+
+ if (e->src == ENTRY_BLOCK_PTR)
+ {
+ min_prev_count = 0;
+ break;
+ }
+ FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
+ {
+ if (prev_e->src == ENTRY_BLOCK_PTR)
+ {
+ int count = ix86_count_insn_bb (e->src);
+ if (count < min_prev_count)
+ min_prev_count = count;
+ break;
+ }
+ }
+ }
+
+ if (min_prev_count < 4)
+ min_prev_count += ix86_count_insn_bb (bb);
+
+ return min_prev_count;
+}
+
+/* Pad short funtion to 4 instructions. */
+
+static void
+ix86_pad_short_function (void)
+{
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+ {
+ rtx ret = BB_END (e->src);
+ if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
+ {
+ int insn_count = ix86_count_insn (e->src);
+
+ /* Pad short function. */
+ if (insn_count < 4)
+ {
+ rtx insn = ret;
+
+ /* Find epilogue. */
+ while (insn
+ && (!NOTE_P (insn)
+ || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
+ insn = PREV_INSN (insn);
+
+ if (!insn)
+ insn = ret;
+
+ /* Two NOPs are counted as one instruction. */
+ insn_count = 2 * (4 - insn_count);
+ emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
+ }
+ }
+ }
+}
+
/* Implement machine specific optimizations. We implement padding of returns
for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
static void
@@ -27889,7 +28011,9 @@ ix86_reorg (void)
{
if (optimize && optimize_function_for_speed_p (cfun))
{
- if (TARGET_PAD_RETURNS)
+ if (TARGET_PAD_SHORT_FUNCTION)
+ ix86_pad_short_function ();
+ else if (TARGET_PAD_RETURNS)
ix86_pad_returns ();
#ifdef ASM_OUTPUT_MAX_SKIP_PAD
if (TARGET_FOUR_JUMP_LIMIT)
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 22dd02b..aa246c6 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -299,6 +299,7 @@ enum ix86_tune_indices {
X86_TUNE_USE_BT,
X86_TUNE_USE_INCDEC,
X86_TUNE_PAD_RETURNS,
+ X86_TUNE_PAD_SHORT_FUNCTION,
X86_TUNE_EXT_80387_CONSTANTS,
X86_TUNE_SHORTEN_X87_SSE,
X86_TUNE_AVOID_VECTOR_DECODE,
@@ -385,6 +386,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
#define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT]
#define TARGET_USE_INCDEC ix86_tune_features[X86_TUNE_USE_INCDEC]
#define TARGET_PAD_RETURNS ix86_tune_features[X86_TUNE_PAD_RETURNS]
+#define TARGET_PAD_SHORT_FUNCTION \
+ ix86_tune_features[X86_TUNE_PAD_SHORT_FUNCTION]
#define TARGET_EXT_80387_CONSTANTS \
ix86_tune_features[X86_TUNE_EXT_80387_CONSTANTS]
#define TARGET_SHORTEN_X87_SSE ix86_tune_features[X86_TUNE_SHORTEN_X87_SSE]
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ec43793..4ccd932 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -104,6 +104,7 @@
UNSPEC_LD_MPIC ; load_macho_picbase
UNSPEC_TRUNC_NOOP
UNSPEC_DIV_ALREADY_SPLIT
+ UNSPEC_NOPS
;; For SSE/MMX support:
UNSPEC_FIX_NOTRUNC
@@ -11465,6 +11466,39 @@
(set_attr "length_immediate" "0")
(set_attr "modrm" "0")])
+;; Generate nops. Operand 0 is the number of nops, up to 8.
+(define_insn "nops"
+ [(unspec [(match_operand 0 "const_int_operand" "")]
+ UNSPEC_NOPS)]
+ "reload_completed"
+{
+ switch (INTVAL (operands[0]))
+ {
+ case 1:
+ return "nop";
+ case 2:
+ return "nop; nop";
+ case 3:
+ return "nop; nop; nop";
+ case 4:
+ return "nop; nop; nop; nop";
+ case 5:
+ return "nop; nop; nop; nop; nop";
+ case 6:
+ return "nop; nop; nop; nop; nop; nop";
+ case 7:
+ return "nop; nop; nop; nop; nop; nop; nop";
+ case 8:
+ return "nop; nop; nop; nop; nop; nop; nop; nop";
+ default:
+ gcc_unreachable ();
+ break;
+ }
+}
+ [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))
+ (set_attr "length_immediate" "0")
+ (set_attr "modrm" "0")])
+
;; Pad to 16-byte boundary, max skip in op0. Used to avoid
;; branch prediction penalty for the third jump in a 16-byte
;; block on K8.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 6bec743..f1193f9 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,20 @@
2010-09-17 H.J. Lu <hongjiu.lu@intel.com>
+ * gcc.target/i386/pad-1.c: New.
+ * gcc.target/i386/pad-2.c: Likewise.
+ * gcc.target/i386/pad-3.c: Likewise.
+ * gcc.target/i386/pad-4.c: Likewise.
+ * gcc.target/i386/pad-5a.c: Likewise.
+ * gcc.target/i386/pad-5b.c: Likewise.
+ * gcc.target/i386/pad-6a.c: Likewise.
+ * gcc.target/i386/pad-6b.c: Likewise.
+ * gcc.target/i386/pad-7.c: Likewise.
+ * gcc.target/i386/pad-8.c: Likewise.
+ * gcc.target/i386/pad-9.c: Likewise.
+ * gcc.target/i386/pad-10.c: Likewise.
+
+2010-09-17 H.J. Lu <hongjiu.lu@intel.com>
+
PR middle-end/45234
* gcc.dg/torture/stackalign/alloca-5.c: New.
diff --git a/gcc/testsuite/gcc.target/i386/pad-1.c b/gcc/testsuite/gcc.target/i386/pad-1.c
new file mode 100644
index 0000000..87a9d6c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -mtune=generic -S" } */
+/* { dg-final { scan-assembler "rep" } } */
+/* { dg-final { scan-assembler-not "nop" } } */
+
+void
+foo ()
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-10.c b/gcc/testsuite/gcc.target/i386/pad-10.c
new file mode 100644
index 0000000..6ba3b78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-10.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-not "nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+extern void bar ();
+
+int
+foo2 (int z, int x)
+{
+ if (x == 1)
+ {
+ bar ();
+ return z;
+ }
+ else
+ return x + z;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-2.c b/gcc/testsuite/gcc.target/i386/pad-2.c
new file mode 100644
index 0000000..964547c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+void
+foo ()
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-3.c b/gcc/testsuite/gcc.target/i386/pad-3.c
new file mode 100644
index 0000000..52442b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-3.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-not "nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int s[8] = {1, 2, 3, 4, 5, 6, 7, 8};
+int d[8] = {11, 22, 33, 44, 55, 66, 77, 88};
+
+void
+foo ()
+{
+ int i;
+ for (i = 0; i < 8; i++)
+ d[i] = s[i] + 0x1000;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-4.c b/gcc/testsuite/gcc.target/i386/pad-4.c
new file mode 100644
index 0000000..a7033fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S -fPIC" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+extern int bar;
+
+int
+foo ()
+{
+ return bar;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-5a.c b/gcc/testsuite/gcc.target/i386/pad-5a.c
new file mode 100644
index 0000000..9d0aa2a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-5a.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "nop; nop; nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y, int z)
+{
+ return x + y + z;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-5b.c b/gcc/testsuite/gcc.target/i386/pad-5b.c
new file mode 100644
index 0000000..2e1cf12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-5b.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y, int z)
+{
+ return x + y + z;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-6a.c b/gcc/testsuite/gcc.target/i386/pad-6a.c
new file mode 100644
index 0000000..e865967
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-6a.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y)
+{
+ return x + y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-6b.c b/gcc/testsuite/gcc.target/i386/pad-6b.c
new file mode 100644
index 0000000..41aeaee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-6b.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop; nop; nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y)
+{
+ return x + y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-7.c b/gcc/testsuite/gcc.target/i386/pad-7.c
new file mode 100644
index 0000000..7a7493d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-7.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-not "nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y, int z)
+{
+ return x + y + z + y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-8.c b/gcc/testsuite/gcc.target/i386/pad-8.c
new file mode 100644
index 0000000..873a0a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-8.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop; nop; nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y)
+{
+ return y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-9.c b/gcc/testsuite/gcc.target/i386/pad-9.c
new file mode 100644
index 0000000..3d68805
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pad-9.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+extern void bar (void);
+
+void
+foo (int x)
+{
+ if (x)
+ bar ();
+}