aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorGanesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com>2013-12-19 11:04:43 +0000
committerGanesh Gopalasubramanian <gganesh@gcc.gnu.org>2013-12-19 11:04:43 +0000
commit2f62165dce49a99b72d729aae9f2d992a85c7765 (patch)
treebf1aa32dc01d1e35668395d5ec1e419139aa9fe5 /gcc
parentf317df4f6d6006ed22cff09d17bc5028fda69965 (diff)
downloadgcc-2f62165dce49a99b72d729aae9f2d992a85c7765.zip
gcc-2f62165dce49a99b72d729aae9f2d992a85c7765.tar.gz
gcc-2f62165dce49a99b72d729aae9f2d992a85c7765.tar.bz2
Enable TARGET_LOOP_UNROLL_ADJUST for bdver3/bdver4
From-SVN: r206110
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog11
-rw-r--r--gcc/config/i386/i386.c62
-rw-r--r--gcc/config/i386/i386.h2
-rw-r--r--gcc/config/i386/x86-tune.def6
4 files changed, 81 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 13bb35a..6f69334 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2013-12-19 Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com>
+
+ * config/i386/i386.c: Include cfgloop.h.
+ (ix86_loop_memcount): New function.
+ (ix86_loop_unroll_adjust): New function.
+ (TARGET_LOOP_UNROLL_ADJUST): Define.
+ * config/i386/i386.h
+ (TARGET_ADJUST_UNROLL): Define.
+ * config/i386/x86-tune.def
+ (X86_TUNE_ADJUST_UNROLL): Define.
+
2013-12-19 Marek Polacek <polacek@redhat.com>
* config/i386/i386.c (ix86_parse_stringop_strategy_string): Remove
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 862231b..f82d1a4 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -64,6 +64,7 @@ along with GCC; see the file COPYING3. If not see
#include "is-a.h"
#include "gimple.h"
#include "gimplify.h"
+#include "cfgloop.h"
#include "dwarf2.h"
#include "df.h"
#include "tm-constrs.h"
@@ -44014,6 +44015,64 @@ ix86_simd_clone_usable (struct cgraph_node *node)
}
}
+/* This function gives out the number of memory references.
+ This value determines the unrolling factor for
+ bdver3 and bdver4 architectures. */
+
+static int
+ix86_loop_memcount (rtx *x, unsigned *mem_count)
+{
+ if (*x != NULL_RTX && MEM_P (*x))
+ {
+ enum machine_mode mode;
+ unsigned int n_words;
+
+ mode = GET_MODE (*x);
+ n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
+
+ if (n_words > 4)
+ (*mem_count)+=2;
+ else
+ (*mem_count)+=1;
+ }
+ return 0;
+}
+
+/* This function adjusts the unroll factor based on
+ the hardware capabilities. For ex, bdver3 has
+ a loop buffer which makes unrolling of smaller
+ loops less important. This function decides the
+ unroll factor using number of memory references
+ (value 32 is used) as a heuristic. */
+
+static unsigned
+ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
+{
+ basic_block *bbs;
+ rtx insn;
+ unsigned i;
+ unsigned mem_count = 0;
+
+ if (!TARGET_ADJUST_UNROLL)
+ return nunroll;
+
+ /* Count the number of memory references within the loop body. */
+ bbs = get_loop_body (loop);
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
+ if (NONDEBUG_INSN_P (insn))
+ for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
+ }
+ free (bbs);
+
+ if (mem_count && mem_count <=32)
+ return 32/mem_count;
+
+ return nunroll;
+}
+
+
/* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
static bool
@@ -44499,6 +44558,9 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
#define TARGET_INIT_LIBFUNCS darwin_rename_builtins
#endif
+#undef TARGET_LOOP_UNROLL_ADJUST
+#define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
+
#undef TARGET_SPILL_CLASS
#define TARGET_SPILL_CLASS ix86_spill_class
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 7efd1e0..b6e7d46 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -443,6 +443,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE]
#define TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS \
ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS]
+#define TARGET_ADJUST_UNROLL \
+ ix86_tune_features[X86_TUNE_ADJUST_UNROLL]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 4c13c3a..9539685 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -503,3 +503,9 @@ DEF_TUNE (X86_TUNE_QIMODE_MATH, "qimode_math", ~0)
arithmetic to 32bit via PROMOTE_MODE macro. This code generation scheme
is usually used for RISC targets. */
DEF_TUNE (X86_TUNE_PROMOTE_QI_REGS, "promote_qi_regs", 0)
+
+/* X86_TUNE_ADJUST_UNROLL: This enables adjusting the unroll factor based
+ on hardware capabilities. Bdver3 hardware has a loop buffer which makes
+ unrolling small loop less important. For, such architectures we adjust
+ the unroll factor so that the unrolled loop fits the loop buffer. */
+DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4)