[AArch64] Handle literal pools for functions > 1 MiB in size.

This patch fixes the issue in PR63304 where we have functions that are > 1MiB. The idea is to use adrp / ldr or adrp / add instructions to address the literal pools under the use of a command line option. I would like to turn this on by default on trunk but keep this disabled by default for the release branches in order to get some serious testing for this feature while it bakes on trunk. As a follow-up I would like to try and see if estimate_num_insns or something else can give us a heuristic to turn this on for "large" functions. After all the number of incidences of this are quite low in real life, so may be we should look to restrict this use as much as possible on the grounds that this code generation implies an extra integer register for addressing for every floating point and vector constant and I don't think that's great in code that already may have high register pressure. Tested on aarch64-none-elf with no regressions. A previous version was bootstrapped and regression tested. Applied to trunk. regards Ramana 2015-09-14 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> PR target/63304 * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Handle nopcrelative_literal_loads. (aarch64_classify_address): Likewise. (aarch64_constant_pool_reload_icode): Define. (aarch64_secondary_reload): Handle secondary reloads for literal pools. (aarch64_override_options): Handle nopcrelative_literal_loads. (aarch64_classify_symbol): Handle nopcrelative_literal_loads. * config/aarch64/aarch64.md (aarch64_reload_movcp<GPF_TF:mode><P:mode>): Define. (aarch64_reload_movcp<VALL:mode><P:mode>): Likewise. * config/aarch64/aarch64.opt (mpc-relative-literal-loads): New option. * config/aarch64/predicates.md (aarch64_constant_pool_symref): New predicate. * doc/invoke.texi (mpc-relative-literal-loads): Document. From-SVN: r227748
author: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> 2015-09-14 13:16:59 +0000
committer: Ramana Radhakrishnan <ramana@gcc.gnu.org> 2015-09-14 13:16:59 +0000
commit: b4f50fd4c09a6bb80947e7ea4ee46c976ad44d96 (patch)
tree: 1da7640d452969bb8627ff1d214216b843d57b15
parent: 641f1ab4ed20677f2c9515bb039d8263a463576f (diff)
download: gcc-b4f50fd4c09a6bb80947e7ea4ee46c976ad44d96.zip
gcc-b4f50fd4c09a6bb80947e7ea4ee46c976ad44d96.tar.gz
gcc-b4f50fd4c09a6bb80947e7ea4ee46c976ad44d96.tar.bz2
7 files changed, 169 insertions, 4 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d5ef728..5484f2b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,22 @@
+2015-09-14  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+    	PR target/63304
+    	* config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Handle
+    	nopcrelative_literal_loads.
+    	(aarch64_classify_address): Likewise.
+    	(aarch64_constant_pool_reload_icode): Define.
+    	(aarch64_secondary_reload): Handle secondary reloads for
+    	literal pools.
+    	(aarch64_override_options): Handle nopcrelative_literal_loads.
+    	(aarch64_classify_symbol): Handle nopcrelative_literal_loads.
+    	* config/aarch64/aarch64.md (aarch64_reload_movcp<GPF_TF:mode><P:mode>):
+    	Define.
+    	(aarch64_reload_movcp<VALL:mode><P:mode>): Likewise.
+    	* config/aarch64/aarch64.opt (mpc-relative-literal-loads): New option.
+    	* config/aarch64/predicates.md (aarch64_constant_pool_symref): New
+    	predicate.
+    	* doc/invoke.texi (mpc-relative-literal-loads): Document.
+
 2015-09-14  John David Anglin  <danglin@gcc.gnu.org>
 
 	PR middle-end/67401
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b2a481b..4d2126b 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1734,11 +1734,27 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
 	      aarch64_emit_move (dest, base);
 	      return;
 	    }
+
 	  mem = force_const_mem (ptr_mode, imm);
 	  gcc_assert (mem);
+
+	  /* If we aren't generating PC relative literals, then
+	     we need to expand the literal pool access carefully.
+	     This is something that needs to be done in a number
+	     of places, so could well live as a separate function.  */
+	  if (nopcrelative_literal_loads)
+	    {
+	      gcc_assert (can_create_pseudo_p ());
+	      base = gen_reg_rtx (ptr_mode);
+	      aarch64_expand_mov_immediate (base, XEXP (mem, 0));
+	      mem = gen_rtx_MEM (ptr_mode, base);
+	    }
+
 	  if (mode != ptr_mode)
 	    mem = gen_rtx_ZERO_EXTEND (mode, mem);
+
 	  emit_insn (gen_rtx_SET (dest, mem));
+
 	  return;
 
         case SYMBOL_SMALL_TLSGD:
@@ -3854,9 +3870,10 @@ aarch64_classify_address (struct aarch64_address_info *info,
 	  rtx sym, addend;
 
 	  split_const (x, &sym, &addend);
-	  return (GET_CODE (sym) == LABEL_REF
-		  || (GET_CODE (sym) == SYMBOL_REF
-		      && CONSTANT_POOL_ADDRESS_P (sym)));
+	  return ((GET_CODE (sym) == LABEL_REF
+		   || (GET_CODE (sym) == SYMBOL_REF
+		       && CONSTANT_POOL_ADDRESS_P (sym)
+		       && !nopcrelative_literal_loads)));
 	}
       return false;
 
@@ -5039,12 +5056,69 @@ aarch64_legitimize_reload_address (rtx *x_p,
 }
 
 
+/* Return the reload icode required for a constant pool in mode.  */
+static enum insn_code
+aarch64_constant_pool_reload_icode (machine_mode mode)
+{
+  switch (mode)
+    {
+    case SFmode:
+      return CODE_FOR_aarch64_reload_movcpsfdi;
+
+    case DFmode:
+      return CODE_FOR_aarch64_reload_movcpdfdi;
+
+    case TFmode:
+      return CODE_FOR_aarch64_reload_movcptfdi;
+
+    case V8QImode:
+      return CODE_FOR_aarch64_reload_movcpv8qidi;
+
+    case V16QImode:
+      return CODE_FOR_aarch64_reload_movcpv16qidi;
+
+    case V4HImode:
+      return CODE_FOR_aarch64_reload_movcpv4hidi;
+
+    case V8HImode:
+      return CODE_FOR_aarch64_reload_movcpv8hidi;
+
+    case V2SImode:
+      return CODE_FOR_aarch64_reload_movcpv2sidi;
+
+    case V4SImode:
+      return CODE_FOR_aarch64_reload_movcpv4sidi;
+
+    case V2DImode:
+      return CODE_FOR_aarch64_reload_movcpv2didi;
+
+    case V2DFmode:
+      return CODE_FOR_aarch64_reload_movcpv2dfdi;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  gcc_unreachable ();
+}
 static reg_class_t
 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
 			  reg_class_t rclass,
 			  machine_mode mode,
 			  secondary_reload_info *sri)
 {
+
+  /* If we have to disable direct literal pool loads and stores because the
+     function is too big, then we need a scratch register.  */
+  if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
+      && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
+	  || targetm.vector_mode_supported_p (GET_MODE (x)))
+      && nopcrelative_literal_loads)
+    {
+      sri->icode = aarch64_constant_pool_reload_icode (mode);
+      return NO_REGS;
+    }
+
   /* Without the TARGET_SIMD instructions we cannot move a Q register
      to a Q register directly.  We need a scratch.  */
   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
@@ -7693,6 +7767,24 @@ aarch64_override_options_after_change_1 (struct gcc_options *opts)
       if (opts->x_align_functions <= 0)
 	opts->x_align_functions = aarch64_tune_params.function_align;
     }
+
+  /* If nopcrelative_literal_loads is set on the command line, this
+     implies that the user asked for PC relative literal loads.  */
+  if (nopcrelative_literal_loads == 1)
+    nopcrelative_literal_loads = 0;
+
+  /* If it is not set on the command line, we default to no
+     pc relative literal loads.  */
+  if (nopcrelative_literal_loads == 2)
+    nopcrelative_literal_loads = 1;
+
+  /* In the tiny memory model it makes no sense
+     to disallow non PC relative literal pool loads
+     as many other things will break anyway.  */
+  if (nopcrelative_literal_loads
+      && (aarch64_cmodel == AARCH64_CMODEL_TINY
+	  || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC))
+    nopcrelative_literal_loads = 0;
 }
 
 /* 'Unpack' up the internal tuning structs and update the options
@@ -8884,7 +8976,16 @@ aarch64_classify_symbol (rtx x, rtx offset,
   if (GET_CODE (x) == SYMBOL_REF)
     {
       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
-	  return SYMBOL_FORCE_TO_MEM;
+	{
+	  /* This is alright even in PIC code as the constant
+	     pool reference is always PC relative and within
+	     the same translation unit.  */
+	  if (nopcrelative_literal_loads
+	      && CONSTANT_POOL_ADDRESS_P (x))
+	    return SYMBOL_SMALL_ABSOLUTE;
+	  else
+	    return SYMBOL_FORCE_TO_MEM;
+	}
 
       if (aarch64_tls_symbol_p (x))
 	return aarch64_classify_tls_symbol (x);
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 5a005b5..88ba72e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4415,6 +4415,32 @@
 ;; -------------------------------------------------------------------
 ;; Reload support
 ;; -------------------------------------------------------------------
+;; Reload Scalar Floating point modes from constant pool.
+;; The AArch64 port doesn't have __int128 constant move support.
+(define_expand "aarch64_reload_movcp<GPF_TF:mode><P:mode>"
+ [(set (match_operand:GPF_TF 0 "register_operand" "=w")
+       (mem:GPF_TF (match_operand 1 "aarch64_constant_pool_symref" "S")))
+  (clobber (match_operand:P 2 "register_operand" "=&r"))]
+ "TARGET_FLOAT && nopcrelative_literal_loads"
+ {
+   aarch64_expand_mov_immediate (operands[2], XEXP (operands[1], 0));
+   emit_move_insn (operands[0], gen_rtx_MEM (<GPF_TF:MODE>mode, operands[2]));
+   DONE;
+ }
+)
+
+;; Reload Vector modes from constant pool.
+(define_expand "aarch64_reload_movcp<VALL:mode><P:mode>"
+ [(set (match_operand:VALL 0 "register_operand" "=w")
+       (mem:VALL (match_operand 1 "aarch64_constant_pool_symref" "S")))
+  (clobber (match_operand:P 2 "register_operand" "=&r"))]
+ "TARGET_FLOAT && nopcrelative_literal_loads"
+ {
+   aarch64_expand_mov_immediate (operands[2], XEXP (operands[1], 0));
+   emit_move_insn (operands[0], gen_rtx_MEM (<VALL:MODE>mode, operands[2]));
+   DONE;
+ }
+)
 
 (define_expand "aarch64_reload_mov<mode>"
   [(set (match_operand:TX 0 "register_operand" "=w")
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index 8642bdb..a1ce58d 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -144,3 +144,7 @@ Enum(aarch64_abi) String(ilp32) Value(AARCH64_ABI_ILP32)
 
 EnumValue
 Enum(aarch64_abi) String(lp64) Value(AARCH64_ABI_LP64)
+
+mpc-relative-literal-loads
+Target Report Save Var(nopcrelative_literal_loads) Init(2) Save
+PC relative literal loads.
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index ff69800..42cb979 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -44,6 +44,9 @@
 ;; Double vector modes.
 (define_mode_iterator VDF [V2SF V4HF])
 
+;; Iterator for all scalar floating point modes (SF, DF and TF)
+(define_mode_iterator GPF_TF [SF DF TF])
+
 ;; Integer vector modes.
 (define_mode_iterator VDQ_I [V8QI V16QI V4HI V8HI V2SI V4SI V2DI])
 
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 3979209..7b852a4 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -362,3 +362,7 @@
 (define_predicate "aarch64_simd_shift_imm_bitsize_di"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 64)")))
+
+(define_predicate "aarch64_constant_pool_symref"
+   (and (match_code "symbol_ref")
+	(match_test "CONSTANT_POOL_ADDRESS_P (op)")))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 7b5e44e..99c9685 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -12449,6 +12449,14 @@ for @var{string} in this option are not guaranteed to be consistent
 across releases.
 
 This option is only intended to be useful when developing GCC.
+
+@item -mpc-relative-literal-loads
+@opindex mpcrelativeliteralloads
+Enable PC relative literal loads. If this option is used, literal
+pools are assumed to have a range of up to 1MiB and an appropriate
+instruction sequence is used. This option has no impact when used
+with @option{-mcmodel=tiny}.
+
 @end table
 
 @subsubsection @option{-march} and @option{-mcpu} Feature Modifiers
author	Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>	2015-09-14 13:16:59 +0000
committer	Ramana Radhakrishnan <ramana@gcc.gnu.org>	2015-09-14 13:16:59 +0000
commit	b4f50fd4c09a6bb80947e7ea4ee46c976ad44d96 (patch)
tree	1da7640d452969bb8627ff1d214216b843d57b15
parent	641f1ab4ed20677f2c9515bb039d8263a463576f (diff)
download	gcc-b4f50fd4c09a6bb80947e7ea4ee46c976ad44d96.zip gcc-b4f50fd4c09a6bb80947e7ea4ee46c976ad44d96.tar.gz gcc-b4f50fd4c09a6bb80947e7ea4ee46c976ad44d96.tar.bz2