config.gcc (with_fpu): Allow --with-fpu=vfp3.

gcc/ * config.gcc (with_fpu): Allow --with-fpu=vfp3. * config/arm/aout.h (REGISTER_NAMES): Add D16-D31. * config/arm/aof.h (REGISTER_NAMES): Add D16-D31. * config/arm/arm.c (FL_VFPV3): New flag for VFPv3 processor capability. (all_fpus): Add FPUTYPE_VFP3. (fp_model_for_fpu): Add VFPv3 field. (arm_rtx_costs_1): Give cost to VFPv3 constants. (vfp3_const_double_index): New function. Return integer index of VFPv3 constant suitable for fconst[sd] insns, or -1 if constant isn't suitable. (vfp3_const_double_rtx): New function. True if VFPv3 is enabled and argument represents a valid RTX for a VFPv3 constant. (vfp_output_fldmd): Split fldmd with > 16 registers in the list into two instructions. (vfp_emit_fstmd): Similar, for fstmd. (arm_print_operand): Implement new code 'G' for VFPv3 floating-point constants, represented as integer indices. (arm_hard_regno_mode_ok): Use VFP_REGNO_OK_FOR_SINGLE, VFP_REGNO_OK_FOR_DOUBLE macros. (arm_regno_class): Handle VFPv3 d0-d7, low, high register split. (arm_file_start): Set float-abi attribute for VFPv3, and output correct ".fpu" assembler directive. (arm_dbx_register_numbering): Add FIXME. * config/arm/arm.h (TARGET_VFP3): New macro. Target supports VFPv3. (fputype): Add FPUTYPE_VFP3. (FIXED_REGISTERS): Add 32 registers for D16-D31. (CALL_USED_REGISTERS): Likewise. (CONDITIONAL_REGISTER_USAGE): Add note about conditional definition of LAST_VFP_REGNUM. Make D16-D31 caller-saved, if present. (LAST_VFP_REGNUM): Extend available VFP registers for VFPv3. (D7_VFP_REGNUM): New. (LAST_LO_VFP_REGNUM, FIRST_HI_VFP_REGNUM, LAST_HI_VFP_REGNUM) (VFP_REGNO_OK_FOR_SINGLE, VFP_REGNO_OK_FOR_SINGLE) (VFP_REGNO_OK_FOR_DOUBLE): Define new macros. (FIRST_PSEUDO_REGISTER): Shift up to 128 to accommodate VFPv3. (REG_ALLOC_ORDER): Adjust for VFPv3. (reg_class): Add VFP_D0_D7_REGS, VFP_LO_REGS, VFP_HI_REGS. (REG_CLASS_NAMES): Add entries corresponding to VFP_D0_D7_REGS, VFP_LO_REGS, VFP_HI_REGS. (REG_CLASS_CONTENTS): Likewise. Extend contents for VFP_REGS. (IS_VFP_CLASS): Define macro. (SECONDARY_OUTPUT_RELOAD_CLASS, SECONDARY_INPUT_RELOAD_CLASS): Use IS_VFP_CLASS. (REGISTER_MOVE_COST): Likewise. * config/arm/arm-protos.h (vfp3_const_double_rtx): Add prototype. * config/arm/vfp.md (VFPCC_REGNUM): Redefine as 127. (*arm_movsi_vfp, *thumb2_movsi_vfp, *movsfcc_vfp) (*thumb2_movsfcc_vfp, *abssf2_vfp, *negsf2_vfp, *addsf3_vfp) (*subsf3_vfp, *divsf_vfp, *mulsf_vfp, *mulsf3negsf_vfp) (*mulsf3addsf_vfp, *mulsf3subsf_vfp, *mulsf3negsfaddsf_vfp) (*extendsfdf2_vfp, *truncdfsf2_vfp, *truncsisf2_vfp) (*truncsidf2_vfp, fixuns_truncsfsi2, fixuns_truncdfsi2) (*floatsisf2_vfp, *floatsidf2_vfp, floatunssisf2) (floatunssidf2, *sqrtsf2_vfp, *cmpsf_split_vfp) (*cmpsf_trap_split_vfp, *cmpsf_vfp, *cmpsf_trap_vfp): Use 't' where appropriate for single-word registers. (*movsf_vfp, *thumb2_movsf_vfp, *movdf_vfp, *thumb2_movdf_vfp): As above. Fix type attributes. * config/arm/constraints.md (register_contraint "t"): Define. (register_constraint "w"): Change to D0-D15, or D0-D31 for VFPv3/NEON. (register_constraint "x"): Define. (constraint "Dv"): Define. From-SVN: r126272
author: Julian Brown <julian@codesourcery.com> 2007-07-03 19:42:36 +0000
committer: Julian Brown <jules@gcc.gnu.org> 2007-07-03 19:42:36 +0000
commit: f1adb0a9f4d73554d243604b5ffe173843d3d705 (patch)
tree: e271e1e30fa0f1bd69a1d4542437b7453aa640aa /gcc/config/arm/arm.c
parent: 2d5b90b2fdf5e67857942f6ffa6417ad61f4a929 (diff)
download: gcc-f1adb0a9f4d73554d243604b5ffe173843d3d705.zip
gcc-f1adb0a9f4d73554d243604b5ffe173843d3d705.tar.gz
gcc-f1adb0a9f4d73554d243604b5ffe173843d3d705.tar.bz2
1 files changed, 164 insertions, 11 deletions
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index df08c46..6c9a695 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -457,6 +457,7 @@ static int thumb_call_reg_needed;
 #define FL_NOTM	      (1 << 17)	      /* Instructions not present in the 'M'
 					 profile.  */
 #define FL_DIV	      (1 << 18)	      /* Hardware divide.  */
+#define FL_VFPV3      (1 << 19)       /* Vector Floating Point V3.  */
 
 #define FL_IWMMXT     (1 << 29)	      /* XScale v2 or "Intel Wireless MMX technology".  */
 
@@ -700,7 +701,8 @@ static const struct fpu_desc all_fpus[] =
   {"fpe2",	FPUTYPE_FPA_EMU2},
   {"fpe3",	FPUTYPE_FPA_EMU2},
   {"maverick",	FPUTYPE_MAVERICK},
-  {"vfp",	FPUTYPE_VFP}
+  {"vfp",	FPUTYPE_VFP},
+  {"vfp3",	FPUTYPE_VFP3},
 };
 
 
@@ -715,7 +717,8 @@ static const enum fputype fp_model_for_fpu[] =
   ARM_FP_MODEL_FPA,		/* FPUTYPE_FPA_EMU2  */
   ARM_FP_MODEL_FPA,		/* FPUTYPE_FPA_EMU3  */
   ARM_FP_MODEL_MAVERICK,	/* FPUTYPE_MAVERICK  */
-  ARM_FP_MODEL_VFP		/* FPUTYPE_VFP  */
+  ARM_FP_MODEL_VFP,		/* FPUTYPE_VFP  */
+  ARM_FP_MODEL_VFP		/* FPUTYPE_VFP3  */
 };
 
 
@@ -4950,7 +4953,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
       return 6;
 
     case CONST_DOUBLE:
-      if (arm_const_double_rtx (x))
+      if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
 	return outer == SET ? 2 : -1;
       else if ((outer == COMPARE || outer == PLUS)
 	       && neg_const_double_rtx_ok_for_fpa (x))
@@ -5649,6 +5652,108 @@ neg_const_double_rtx_ok_for_fpa (rtx x)
 
   return 0;
 }
+
+
+/* VFPv3 has a fairly wide range of representable immediates, formed from
+   "quarter-precision" floating-point values. These can be evaluated using this
+   formula (with ^ for exponentiation):
+
+     -1^s * n * 2^-r
+
+   Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
+   16 <= n <= 31 and 0 <= r <= 7.
+
+   These values are mapped onto an 8-bit integer ABCDEFGH s.t.
+
+     - A (most-significant) is the sign bit.
+     - BCD are the exponent (encoded as r XOR 3).
+     - EFGH are the mantissa (encoded as n - 16).
+*/
+
+/* Return an integer index for a VFPv3 immediate operand X suitable for the
+   fconst[sd] instruction, or -1 if X isn't suitable.  */
+static int
+vfp3_const_double_index (rtx x)
+{
+  REAL_VALUE_TYPE r, m;
+  int sign, exponent;
+  unsigned HOST_WIDE_INT mantissa, mant_hi;
+  unsigned HOST_WIDE_INT mask;
+  int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
+
+  if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
+    return -1;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+
+  /* We can't represent these things, so detect them first.  */
+  if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
+    return -1;
+
+  /* Extract sign, exponent and mantissa.  */
+  sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
+  r = REAL_VALUE_ABS (r);
+  exponent = REAL_EXP (&r);
+  /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
+     highest (sign) bit, with a fixed binary point at bit point_pos.
+     WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
+     bits for the mantissa, this may fail (low bits would be lost).  */
+  real_ldexp (&m, &r, point_pos - exponent);
+  REAL_VALUE_TO_INT (&mantissa, &mant_hi, m);
+
+  /* If there are bits set in the low part of the mantissa, we can't
+     represent this value.  */
+  if (mantissa != 0)
+    return -1;
+
+  /* Now make it so that mantissa contains the most-significant bits, and move
+     the point_pos to indicate that the least-significant bits have been
+     discarded.  */
+  point_pos -= HOST_BITS_PER_WIDE_INT;
+  mantissa = mant_hi;
+
+  /* We can permit four significant bits of mantissa only, plus a high bit
+     which is always 1.  */
+  mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
+  if ((mantissa & mask) != 0)
+    return -1;
+
+  /* Now we know the mantissa is in range, chop off the unneeded bits.  */
+  mantissa >>= point_pos - 5;
+
+  /* The mantissa may be zero. Disallow that case. (It's possible to load the
+     floating-point immediate zero with Neon using an integer-zero load, but
+     that case is handled elsewhere.)  */
+  if (mantissa == 0)
+    return -1;
+
+  gcc_assert (mantissa >= 16 && mantissa <= 31);
+
+  /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
+     normalised significands are in the range [1, 2). (Our mantissa is shifted
+     left 4 places at this point relative to normalised IEEE754 values).  GCC
+     internally uses [0.5, 1) (see real.c), so the exponent returned from
+     REAL_EXP must be altered.  */
+  exponent = 5 - exponent;
+
+  if (exponent < 0 || exponent > 7)
+    return -1;
+
+  /* Sign, mantissa and exponent are now in the correct form to plug into the
+     formulae described in the comment above.  */
+  return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
+}
+
+/* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
+int
+vfp3_const_double_rtx (rtx x)
+{
+  if (!TARGET_VFP3)
+    return 0;
+
+  return vfp3_const_double_index (x) != -1;
+}
+
 
 /* Predicates for `match_operand' and `match_operator'.  */
 
@@ -8808,6 +8913,15 @@ vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
       count++;
     }
 
+  /* FLDMD may not load more than 16 doubleword registers at a time. Split the
+     load into multiple parts if we have to handle more than 16 registers.  */
+  if (count > 16)
+    {
+      vfp_output_fldmd (stream, base, reg, 16);
+      vfp_output_fldmd (stream, base, reg + 16, count - 16);
+      return;
+    }
+
   fputc ('\t', stream);
   asm_fprintf (stream, "fldmfdd\t%r!, {", base);
 
@@ -8870,6 +8984,19 @@ vfp_emit_fstmd (int base_reg, int count)
       count++;
     }
 
+  /* FSTMD may not store more than 16 doubleword registers at once.  Split
+     larger stores into multiple parts (up to a maximum of two, in
+     practice).  */
+  if (count > 16)
+    {
+      int saved;
+      /* NOTE: base_reg is an internal register number, so each D register
+         counts as 2.  */
+      saved = vfp_emit_fstmd (base_reg + 32, count - 16);
+      saved += vfp_emit_fstmd (base_reg, 16);
+      return saved;
+    }
+
   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
 
@@ -11982,6 +12109,16 @@ arm_print_operand (FILE *stream, rtx x, int code)
       }
       return;
 
+    /* Print a VFPv3 floating-point constant, represented as an integer
+       index.  */
+    case 'G':
+      {
+        int index = vfp3_const_double_index (x);
+	gcc_assert (index != -1);
+	fprintf (stream, "%d", index);
+      }
+      return;
+
     default:
       if (x == 0)
 	{
@@ -12761,11 +12898,10 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
       && IS_VFP_REGNUM (regno))
     {
       if (mode == SFmode || mode == SImode)
-	return TRUE;
+	return VFP_REGNO_OK_FOR_SINGLE (regno);
 
-      /* DFmode values are only valid in even register pairs.  */
       if (mode == DFmode)
-	return ((regno - FIRST_VFP_REGNUM) & 1) == 0;
+	return VFP_REGNO_OK_FOR_DOUBLE (regno);
       return FALSE;
     }
 
@@ -12828,7 +12964,14 @@ arm_regno_class (int regno)
     return CIRRUS_REGS;
 
   if (IS_VFP_REGNUM (regno))
-    return VFP_REGS;
+    {
+      if (regno <= D7_VFP_REGNUM)
+	return VFP_D0_D7_REGS;
+      else if (regno <= LAST_LO_VFP_REGNUM)
+        return VFP_LO_REGS;
+      else
+        return VFP_HI_REGS;
+    }
 
   if (IS_IWMMXT_REGNUM (regno))
     return IWMMXT_REGS;
@@ -15270,6 +15413,7 @@ arm_file_start (void)
 	}
       else
 	{
+	  int set_float_abi_attributes = 0;
 	  switch (arm_fpu_arch)
 	    {
 	    case FPUTYPE_FPA:
@@ -15285,15 +15429,23 @@ arm_file_start (void)
 	      fpu_name = "maverick";
 	      break;
 	    case FPUTYPE_VFP:
-	      if (TARGET_HARD_FLOAT)
-		asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
-	      if (TARGET_HARD_FLOAT_ABI)
-		asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
 	      fpu_name = "vfp";
+	      set_float_abi_attributes = 1;
+	      break;
+	    case FPUTYPE_VFP3:
+	      fpu_name = "vfp3";
+	      set_float_abi_attributes = 1;
 	      break;
 	    default:
 	      abort();
 	    }
+	  if (set_float_abi_attributes)
+	    {
+	      if (TARGET_HARD_FLOAT)
+		asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
+	      if (TARGET_HARD_FLOAT_ABI)
+		asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
+	    }
 	}
       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
 
@@ -16172,6 +16324,7 @@ arm_dbx_register_number (unsigned int regno)
   if (IS_FPA_REGNUM (regno))
     return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
 
+  /* FIXME: VFPv3 register numbering.  */
   if (IS_VFP_REGNUM (regno))
     return 64 + regno - FIRST_VFP_REGNUM;
author	Julian Brown <julian@codesourcery.com>	2007-07-03 19:42:36 +0000
committer	Julian Brown <jules@gcc.gnu.org>	2007-07-03 19:42:36 +0000
commit	f1adb0a9f4d73554d243604b5ffe173843d3d705 (patch)
tree	e271e1e30fa0f1bd69a1d4542437b7453aa640aa /gcc/config/arm/arm.c
parent	2d5b90b2fdf5e67857942f6ffa6417ad61f4a929 (diff)
download	gcc-f1adb0a9f4d73554d243604b5ffe173843d3d705.zip gcc-f1adb0a9f4d73554d243604b5ffe173843d3d705.tar.gz gcc-f1adb0a9f4d73554d243604b5ffe173843d3d705.tar.bz2