Support for official Sparc V9 ABI:

* sparc.c (sparc_override_options): Force stack bias off for !arch64. Care for flag_pcc_struct_return default. (output_move_quad): Rewrite to move by halves on v9 and in the proper direction. (move_quad_direction): New function. (output_fp_move_quad): Use it to determine the direction of copy. (function_arg_slotno): Return -1 for FP reg overflow as well. (function_arg_record_value*): New functions. (function_arg): Use them. Streamline unprototyped parameter passing. (function_arg_pass_by_reference): Pass TCmode by reference. (function_value): New function. * sparc.h (PTRDIFF_TYPE, SIZE_TYPE): For -pedantic's sake, don't use long long in 64-bit mode. (RETURN_IN_MEMORY): v9 returns structs < 32-bytes in regs. (DEFAULT_PCC_STRUCT_RETURN): Make the default detectable. (BASE_RETURN_VALUE_REG): Consider complex float types for arch64. (BASE_OUTGOING_VALUE_REG, BASE_PASSING_ARG_REG): Likewise. (BASE_INCOMING_ARG_REG): Likewise. (FUNCTION_VALUE): Call function_value. (FUNCTION_OUTGOING_VALUE, LIBCALL_VALUE): Likewise. * sparc.md (movdi_sp32_v9): Disable for arch64. (movsf, movdf, movtf): Sort all ulternatives using fp regs first. (call_value_address_sp64): Remove register class constraints. (call_value_symbolic_sp64): Likewise. (nonlocal_goto): Pass label reg directly to goto_handlers. Constrain v9 case to 32-bit constants. (goto_handler_and_restore_v9): Provide a version for arch64. * sparc/linux64.h (SIZE_TYPE, PTRDIFF_TYPE): Remove private definition. * sparc/sp64-aout.h (TARGET_DEFAULT): Turn on stack bias. (CPP_PREDEFINES): New. * sparc/sp64-elf.h: Likewise. (PREFERRED_DEBUGGING_TYPE): Dwarf2. (ASM_OUTPUT_DWARF2_ADDR_CONST): New. * sparc/sysv4.h (SIZE_TYPE, PTRDIFF_TYPE): Undo svr4.h's changes. From-SVN: r19526
author: Richard Henderson <rth@cygnus.com> 1998-05-03 07:19:46 -0700
committer: Richard Henderson <rth@gcc.gnu.org> 1998-05-03 07:19:46 -0700
commit: 82d6b402bb0e637381ec610a1a448237afaab48c (patch)
tree: 74046c7306c56e7c32bd1dc66a7f2387740a3fbe /gcc
parent: 959d87966a28c5f67a33db114e4d342fe7072f0c (diff)
download: gcc-82d6b402bb0e637381ec610a1a448237afaab48c.zip
gcc-82d6b402bb0e637381ec610a1a448237afaab48c.tar.gz
gcc-82d6b402bb0e637381ec610a1a448237afaab48c.tar.bz2
8 files changed, 761 insertions, 316 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c548005..1a38402 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,41 @@
+Sun May  3 13:51:34 PDT 1998  Richard Henerson  <rth@cygnus.com>
+
+	Support for official Sparc V9 ABI:
+	* sparc.c (sparc_override_options): Force stack bias off for !arch64.
+	Care for flag_pcc_struct_return default.
+	(output_move_quad): Rewrite to move by halves on v9 and in the
+	proper direction.
+	(move_quad_direction): New function.
+	(output_fp_move_quad): Use it to determine the direction of copy.
+	(function_arg_slotno): Return -1 for FP reg overflow as well.
+	(function_arg_record_value*): New functions.
+	(function_arg): Use them.  Streamline unprototyped parameter passing.
+	(function_arg_pass_by_reference): Pass TCmode by reference.
+	(function_value): New function.
+	* sparc.h (PTRDIFF_TYPE, SIZE_TYPE): For -pedantic's sake, don't use
+	long long in 64-bit mode.
+	(RETURN_IN_MEMORY): v9 returns structs < 32-bytes in regs.
+	(DEFAULT_PCC_STRUCT_RETURN): Make the default detectable.
+	(BASE_RETURN_VALUE_REG): Consider complex float types for arch64.
+	(BASE_OUTGOING_VALUE_REG, BASE_PASSING_ARG_REG): Likewise.
+	(BASE_INCOMING_ARG_REG): Likewise.
+	(FUNCTION_VALUE): Call function_value.
+	(FUNCTION_OUTGOING_VALUE, LIBCALL_VALUE): Likewise.
+	* sparc.md (movdi_sp32_v9): Disable for arch64.
+	(movsf, movdf, movtf): Sort all ulternatives using fp regs first.
+	(call_value_address_sp64): Remove register class constraints.
+	(call_value_symbolic_sp64): Likewise.
+	(nonlocal_goto): Pass label reg directly to goto_handlers.  Constrain
+	v9 case to 32-bit constants.
+	(goto_handler_and_restore_v9): Provide a version for arch64.
+	* sparc/linux64.h (SIZE_TYPE, PTRDIFF_TYPE): Remove private definition.
+	* sparc/sp64-aout.h (TARGET_DEFAULT): Turn on stack bias.
+	(CPP_PREDEFINES): New.
+	* sparc/sp64-elf.h: Likewise.
+	(PREFERRED_DEBUGGING_TYPE): Dwarf2.
+	(ASM_OUTPUT_DWARF2_ADDR_CONST): New.
+	* sparc/sysv4.h (SIZE_TYPE, PTRDIFF_TYPE): Undo svr4.h's changes.
+
 Sat May 2 17:47:17 PDT 1998 Jeff Law  (law@cygnus.com)
 
 	* version.c: Bump for snapshot.
diff --git a/gcc/config/sparc/linux64.h b/gcc/config/sparc/linux64.h
index b676e72..3f35936 100644
--- a/gcc/config/sparc/linux64.h
+++ b/gcc/config/sparc/linux64.h
@@ -84,12 +84,6 @@ Boston, MA 02111-1307, USA.  */
 #undef SPARC_DEFAULT_CMODEL
 #define SPARC_DEFAULT_CMODEL CM_MEDANY
 
-#undef SIZE_TYPE
-#define SIZE_TYPE "long long unsigned int"
- 
-#undef PTRDIFF_TYPE
-#define PTRDIFF_TYPE "long long int"
-  
 #undef WCHAR_TYPE
 #define WCHAR_TYPE "long int"
    
diff --git a/gcc/config/sparc/sp64-aout.h b/gcc/config/sparc/sp64-aout.h
index fbd4ef3..e3056df 100644
--- a/gcc/config/sparc/sp64-aout.h
+++ b/gcc/config/sparc/sp64-aout.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for SPARC64, a.out.
-   Copyright (C) 1994, 1996, 1997 Free Software Foundation, Inc.
+   Copyright (C) 1994, 1996, 1997, 1998  Free Software Foundation, Inc.
    Contributed by Doug Evans, dje@cygnus.com.
 
 This file is part of GNU CC.
@@ -28,8 +28,11 @@ Boston, MA 02111-1307, USA.  */
 #undef TARGET_DEFAULT
 #define TARGET_DEFAULT \
   (MASK_V9 + MASK_PTR64 + MASK_64BIT + MASK_HARD_QUAD \
-   + MASK_APP_REGS + MASK_EPILOGUE + MASK_FPU)
+   + MASK_APP_REGS + MASK_EPILOGUE + MASK_FPU + MASK_STACK_BIAS)
 
 /* The only code model supported is Medium/Low.  */
 #undef SPARC_DEFAULT_CMODEL
 #define SPARC_DEFAULT_CMODEL CM_MEDLOW
+
+#undef CPP_PREDEFINES
+#define CPP_PREDEFINES "-Dsparc -Acpu(sparc) -Amachine(sparc)"
diff --git a/gcc/config/sparc/sp64-elf.h b/gcc/config/sparc/sp64-elf.h
index 0c0bb6b..a6f3e72 100644
--- a/gcc/config/sparc/sp64-elf.h
+++ b/gcc/config/sparc/sp64-elf.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for SPARC64, ELF.
-   Copyright (C) 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
+   Copyright (C) 1994, 1995, 1996, 1997, 1998  Free Software Foundation, Inc.
    Contributed by Doug Evans, dje@cygnus.com.
 
 This file is part of GNU CC.
@@ -29,19 +29,19 @@ Boston, MA 02111-1307, USA.  */
 #undef TARGET_VERSION
 #define TARGET_VERSION fprintf (stderr, " (sparc64-elf)")
 
-/* A 64 bit v9 compiler without stack-bias,
-   in a Medium/Anywhere code model environment.
-   There is no stack bias as this configuration is intended for
-   embedded systems.  */
+/* A 64 bit v9 compiler in a Medium/Anywhere code model environment.  */
 
 #undef TARGET_DEFAULT
 #define TARGET_DEFAULT \
 (MASK_V9 + MASK_PTR64 + MASK_64BIT + MASK_HARD_QUAD \
- + MASK_APP_REGS + MASK_EPILOGUE + MASK_FPU)
+ + MASK_APP_REGS + MASK_EPILOGUE + MASK_FPU + MASK_STACK_BIAS)
 
 #undef SPARC_DEFAULT_CMODEL
 #define SPARC_DEFAULT_CMODEL CM_EMBMEDANY
 
+#undef CPP_PREDEFINES
+#define CPP_PREDEFINES "-Dsparc -D__ELF__ -Acpu(sparc) -Amachine(sparc)"
+
 /* __svr4__ is used by the C library (FIXME) */
 #undef CPP_SUBTARGET_SPEC
 #define CPP_SUBTARGET_SPEC "-D__svr4__"
@@ -99,11 +99,6 @@ crtbegin.o%s \
 #undef LONG_DOUBLE_TYPE_SIZE
 #define LONG_DOUBLE_TYPE_SIZE 128
 
-#undef PTRDIFF_TYPE
-#define PTRDIFF_TYPE "long long int"
-#undef SIZE_TYPE
-#define SIZE_TYPE "long long unsigned int"
-
 /* The medium/anywhere code model practically requires us to put jump tables
    in the text section as gcc is unable to distinguish LABEL_REF's of jump
    tables from other label refs (when we need to).  */
@@ -120,7 +115,7 @@ crtbegin.o%s \
 #define DBX_DEBUGGING_INFO
 
 #undef PREFERRED_DEBUGGING_TYPE
-#define PREFERRED_DEBUGGING_TYPE DWARF_DEBUG
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
 
 /* Stabs doesn't use this, and it confuses a simulator.  */
 /* ??? Need to see what DWARF needs, if anything.  */
@@ -150,6 +145,9 @@ do {								\
   fputc ('\n', (FILE));						\
 } while (0)
 
+#define ASM_OUTPUT_DWARF2_ADDR_CONST(FILE, ADDR) \
+  fprintf ((FILE), "\t%s\t%s", UNALIGNED_LONGLONG_ASM_OP, (ADDR))
+
 /* ??? Not sure if this should be 4 or 8 bytes.  4 works for now.  */
 #define ASM_OUTPUT_DWARF_REF(FILE, LABEL) \
 do {								\
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index caebb08..806ece4 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -280,6 +280,10 @@ sparc_override_options ()
   if (! TARGET_V9)
     target_flags &= ~MASK_V8PLUS;
 
+  /* Don't use stack biasing in 32 bit mode.  */
+  if (TARGET_ARCH32)
+    target_flags &= ~MASK_STACK_BIAS;
+
   /* Validate -malign-loops= value, or provide default.  */
   if (sparc_align_loops_string)
     {
@@ -321,6 +325,10 @@ sparc_override_options ()
   else
     sparc_align_funcs = DEFAULT_SPARC_ALIGN_FUNCS;
 
+  /* Validate PCC_STRUCT_RETURN.  */
+  if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
+    flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
+
   /* Do various machine dependent initializations.  */
   sparc_init_modes ();
 }
@@ -2150,9 +2158,18 @@ output_move_quad (operands)
   register enum optype optype0;
   register enum optype optype1;
   rtx wordpart[4][2];
+  rtx load_late[4];
+  int load_late_half[2];
   rtx addreg0 = 0;
   rtx addreg1 = 0;
 
+  load_late_half[0] = 0; load_late_half[1] = 0;
+  load_late[0] = 0; load_late[1] = 0; load_late[2] = 0; 
+  load_late[3] = 0;
+
+  wordpart[0][0] = NULL;  wordpart[1][0] = NULL;  wordpart[2][0] = NULL;
+  wordpart[3][0] = NULL;
+
   /* First classify both operands.  */
 
   if (REG_P (op0))
@@ -2183,6 +2200,72 @@ output_move_quad (operands)
       || (optype0 == MEM && optype1 == MEM))
     abort ();
 
+  if (optype0 == REGOP)
+    {
+      wordpart[0][0] = gen_rtx (REG, word_mode, REGNO (op0) + 0);
+      if (TARGET_ARCH64 && FP_REG_P (op0) 
+	  && REGNO (op0) < SPARC_FIRST_V9_FP_REG)
+	wordpart[1][0] = gen_rtx (REG, word_mode, REGNO (op0) + 2);
+      else
+	wordpart[1][0] = gen_rtx (REG, word_mode, REGNO (op0) + 1);
+
+      if (TARGET_ARCH32)
+	{
+	  wordpart[2][0] = gen_rtx (REG, word_mode, REGNO (op0) + 2);
+	  wordpart[3][0] = gen_rtx (REG, word_mode, REGNO (op0) + 3);
+	}
+  
+      /* Loading into a register which overlaps a register used in the
+	 address.  */
+      if (optype1 != REGOP && reg_overlap_mentioned_p (op0, op1))
+	{
+	  int i;
+	  int count;
+
+	  count = 0;
+
+	  for (i = 0; i < 4 && wordpart[i][0] != NULL; i++)
+	    {
+	      if (reg_mentioned_p (wordpart[i][0], op1))
+		{
+		  load_late[i] = wordpart[i][0];
+		  load_late_half[TARGET_ARCH64 ? i : i/2] = 1;
+		  count++;
+		}
+	    }
+	  if (count > 2)
+	    {
+	      /* Not sure what to do here. Multiple adds? Can't happen. */
+	      abort ();
+	    }
+	  else if (count == 2)
+	    {
+	      /* We have a two-address source operand, and both registers
+		 overlap with the dest quad. Add them together and
+		 store the result into the last register of the quad being
+		 loaded, then generate an appropriate MEM insn. */
+	      rtx temp[3];
+	      int place = 0;
+
+	      for (i = 0; i < 4; i++)
+		{
+		  if (load_late[i])
+		    {
+		      temp[place++] = load_late[i];
+		      load_late[i] = 0;
+		    }
+		}
+	      temp[2] = wordpart[3][0];
+	      output_asm_insn ("add %0, %1, %2", temp);
+	      load_late_half[0] = 0;
+	      load_late_half[1] = 1;
+	      op1 = gen_rtx (MEM, TFmode, wordpart[3][0]);
+	      operands[1] = op1;
+	      optype1 = OFFSOP;
+	    }
+	}
+    }
+
   /* If an operand is an unoffsettable memory ref, find a register
      we can increment temporarily to make it refer to the later words.  */
 
@@ -2195,17 +2278,7 @@ output_move_quad (operands)
   /* Ok, we can do one word at a time.
      Set up in wordpart the operands to use for each word of the arguments.  */
 
-  if (optype0 == REGOP)
-    {
-      wordpart[0][0] = gen_rtx (REG, word_mode, REGNO (op0) + 0);
-      wordpart[1][0] = gen_rtx (REG, word_mode, REGNO (op0) + 1);
-      if (TARGET_ARCH32)
-	{
-	  wordpart[2][0] = gen_rtx (REG, word_mode, REGNO (op0) + 2);
-	  wordpart[3][0] = gen_rtx (REG, word_mode, REGNO (op0) + 3);
-	}
-    }
-  else if (optype0 == OFFSOP)
+  if (optype0 == OFFSOP)
     {
       wordpart[0][0] = adj_offsettable_operand (op0, 0);
       if (TARGET_ARCH32)
@@ -2217,7 +2290,7 @@ output_move_quad (operands)
       else
 	wordpart[1][0] = adj_offsettable_operand (op0, 8);
     }
-  else
+  else if (optype0 != REGOP)
     {
       wordpart[0][0] = op0;
       wordpart[1][0] = op0;
@@ -2228,7 +2301,12 @@ output_move_quad (operands)
   if (optype1 == REGOP)
     {
       wordpart[0][1] = gen_rtx (REG, word_mode, REGNO (op1) + 0);
-      wordpart[1][1] = gen_rtx (REG, word_mode, REGNO (op1) + 1);
+      if (TARGET_ARCH64 && FP_REG_P (op1)
+	  && REGNO (op1) < SPARC_FIRST_V9_FP_REG)
+	wordpart[1][1] = gen_rtx (REG, word_mode, REGNO (op1) + 2);
+      else
+	wordpart[1][1] = gen_rtx (REG, word_mode, REGNO (op1) + 1);
+
       if (TARGET_ARCH32)
 	{
 	  wordpart[2][1] = gen_rtx (REG, word_mode, REGNO (op1) + 2);
@@ -2282,6 +2360,7 @@ output_move_quad (operands)
       || (optype0 == OFFSOP && optype1 == REGOP && (REGNO (op1) & 1) == 0))
     {
       rtx mem, reg;
+      int use_ldx;
 
       if (optype0 == REGOP)
 	mem = op1, reg = op0;
@@ -2294,19 +2373,33 @@ output_move_quad (operands)
 	     the register number.  */
 	  || (TARGET_V9 && REGNO (reg) >= SPARC_FIRST_V9_FP_REG))
 	{
+	  static char * const mov_by_64[2][2][2] = {
+	    { { "std %S1,%2;std %1,%0", "stx %R1,%2;stx %1,%0" },
+	      { "ldd %2,%S0;ldd %1,%0", "ldx %2,%R0;ldx %1,%0" } },
+	    { { "std %1,%0;std %S1,%2", "stx %1,%0;stx %R1,%2" },
+	      { "ldd %1,%0;ldd %2,%S0", "ldx %1,%0;ldx %2,%R0" } }
+	  };
+
 	  if (TARGET_V9 && FP_REG_P (reg) && TARGET_HARD_QUAD)
 	    {
-	      if ((REGNO (reg) & 3) != 0)
-		abort ();
-	      /* ??? Can `mem' have an inappropriate alignment here?  */
-	      return (mem == op1 ? "ldq %1,%0" : "stq %1,%0");
+	      /* Only abort if the register # requires that we use ldq. */
+	      if ((REGNO (reg) & 3) == 0)
+		{
+		  /* ??? Can `mem' have an inappropriate alignment here?  */
+		  return (mem == op1 ? "ldq %1,%0" : "stq %1,%0");
+		}
+	      else 
+		{
+		  if (REGNO (reg) >= SPARC_FIRST_V9_FP_REG)
+		    abort();
+		}
 	    }
 	  operands[2] = adj_offsettable_operand (mem, 8);
-	  /* ??? In arch64 case, shouldn't we use ldd/std for fp regs.  */
-	  if (mem == op1)
-	    return TARGET_ARCH64 ? "ldx %1,%0\n\tldx %2,%R0" : "ldd %1,%0\n\tldd %2,%S0";
-	  else
-	    return TARGET_ARCH64 ? "stx %1,%0\n\tstx %R1,%2" : "std %1,%0\n\tstd %S1,%2";
+
+	  /* Do the loads in the right order; can't overwrite our address
+	     register. */
+	  use_ldx = TARGET_ARCH64 && !FP_REG_P (reg);
+	  return mov_by_64[!load_late_half[0]][mem != op1][use_ldx];
 	}
     }
 
@@ -2343,77 +2436,124 @@ output_move_quad (operands)
 	}
     }
 
-  /* Loading into a register which overlaps a register used in the address.  */
-  if (optype0 == REGOP && optype1 != REGOP
-      && reg_overlap_mentioned_p (op0, op1))
-    {
-      /* ??? Not implemented yet.  This is a bit complicated, because we
-	 must load which ever part overlaps the address last.  If the address
-	 is a double-reg address, then there are two parts which need to
-	 be done last, which is impossible.  We would need a scratch register
-	 in that case.  */
-      abort ();
-    }
-
-  /* Normal case: move the words in lowest to highest address order.  */
+  /* Normal case: move the words in lowest to highest address order.
+     There may have an overlapping register; in that case, skip and go
+     back. */
 
   if (TARGET_ARCH32)
     {
-      output_asm_insn (singlemove_string (wordpart[0]), wordpart[0]);
-
-      /* Make any unoffsettable addresses point at the second word.  */
-      if (addreg0)
-	output_asm_insn ("add %0,0x4,%0", &addreg0);
-      if (addreg1)
-	output_asm_insn ("add %0,0x4,%0", &addreg1);
-
-      /* Do the second word.  */
-      output_asm_insn (singlemove_string (wordpart[1]), wordpart[1]);
-
-      /* Make any unoffsettable addresses point at the third word.  */
-      if (addreg0)
-	output_asm_insn ("add %0,0x4,%0", &addreg0);
-      if (addreg1)
-	output_asm_insn ("add %0,0x4,%0", &addreg1);
-
-      /* Do the third word.  */
-      output_asm_insn (singlemove_string (wordpart[2]), wordpart[2]);
+      int i;
+      int offset = 0xc;
+      rtx temp[2];
 
-      /* Make any unoffsettable addresses point at the fourth word.  */
-      if (addreg0)
-	output_asm_insn ("add %0,0x4,%0", &addreg0);
-      if (addreg1)
-	output_asm_insn ("add %0,0x4,%0", &addreg1);
+      for (i = 0; i < 4; i++)
+	{
+	  if (! load_late[i])
+	    output_asm_insn (singlemove_string (wordpart[i]), wordpart[i]);
 
-      /* Do the fourth word.  */
-      output_asm_insn (singlemove_string (wordpart[3]), wordpart[3]);
+	  if (i != 3)
+	    {
+	      /* Make any unoffsettable addresses point at the next word.  */
+	      if (addreg0)
+		output_asm_insn ("add %0,0x4,%0", &addreg0);
+	      if (addreg1)
+		output_asm_insn ("add %0,0x4,%0", &addreg1);
+	    }
+	}
+      for (i = 0; i < 4; i++)
+	{
+	  if (load_late[i])
+	    {
+	      int fix = offset - i * 4;
 
-      /* Undo the adds we just did.  */
-      if (addreg0)
-	output_asm_insn ("add %0,-0xc,%0", &addreg0);
-      if (addreg1)
-	output_asm_insn ("add %0,-0xc,%0", &addreg1);
+	      /* Back up to the appropriate place. */
+	      temp[1] = gen_rtx (CONST_INT, VOIDmode, -fix);
+	      if (addreg0)
+		{
+		  temp[0] = addreg0;
+		  output_asm_insn ("add %0,%1,%0", temp);
+		}
+	      if (addreg1)
+		{
+		  temp[0] = addreg1;
+		  output_asm_insn ("add %0,%1,%0", temp);
+		}
+	      output_asm_insn (singlemove_string (wordpart[i]),
+			       wordpart[i]);
+	      /* Don't modify the register that's the destination of the
+		 move. */
+	      temp[0] = gen_rtx (CONST_INT, VOIDmode, -(offset - fix));
+	      if (addreg0 && REGNO (addreg0) != REGNO (wordpart[i][0]))
+		{
+		  temp[1] = addreg0;
+		  output_asm_insn("add %0,%1,%0", temp);
+		}
+	      if (addreg1 && REGNO (addreg1) != REGNO (wordpart[i][0]))
+		{
+		  temp[1] = addreg1;
+		  output_asm_insn("add %0,%1,%0",temp);
+		}
+	      offset = 0;
+	      break;
+	    }
+	}
+      if (offset)
+	{
+	  temp[1] = gen_rtx (CONST_INT, VOIDmode, -offset);
+	  /* Undo the adds we just did.  */
+	  if (addreg0)
+	    {
+	      temp[0] = addreg0;
+	      output_asm_insn ("add %0,%1,%0", temp);
+	    }
+	  if (addreg1)
+	    {
+	      temp[0] = addreg1;
+	      output_asm_insn ("add %0,%1,%0", temp);
+	    }
+	}
     }
   else /* TARGET_ARCH64 */
     {
-      output_asm_insn (doublemove_string (wordpart[0]), wordpart[0]);
+      if (load_late_half[0]) 
+	{
+	  /* Load the second half first. */
+	  if (addreg0)
+	    output_asm_insn ("add %0,0x8,%0", &addreg0);
+	  if (addreg1)
+	    output_asm_insn ("add %0,0x8,%0", &addreg1);
 
-      /* Make any unoffsettable addresses point at the second word.  */
-      if (addreg0)
-	output_asm_insn ("add %0,0x8,%0", &addreg0);
-      if (addreg1)
-	output_asm_insn ("add %0,0x8,%0", &addreg1);
+	  output_asm_insn (doublemove_string (wordpart[1]), wordpart[1]);
 
-      /* Do the second word.  */
-      output_asm_insn (doublemove_string (wordpart[1]), wordpart[1]);
+	  /* Undo the adds we just did.  */
+	  if (addreg0)
+	    output_asm_insn ("add %0,-0x8,%0", &addreg0);
+	  if (addreg1)
+	    output_asm_insn ("add %0,-0x8,%0", &addreg1);
 
-      /* Undo the adds we just did.  */
-      if (addreg0)
-	output_asm_insn ("add %0,-0x8,%0", &addreg0);
-      if (addreg1)
-	output_asm_insn ("add %0,-0x8,%0", &addreg1);
-    }
+	  output_asm_insn (doublemove_string (wordpart[0]), wordpart[0]);
+	}
+      else
+	{
+	  output_asm_insn (doublemove_string (wordpart[0]), wordpart[0]);
 
+	  if (addreg0)
+	    output_asm_insn ("add %0,0x8,%0", &addreg0);
+	  if (addreg1)
+	    output_asm_insn ("add %0,0x8,%0", &addreg1);
+
+	  /* Do the second word.  */
+	  output_asm_insn (doublemove_string (wordpart[1]), wordpart[1]);
+
+	  /* Undo the adds we just did.  But don't modify the dest of
+	     the move. */
+	  if (addreg0 && REGNO (addreg0) != REGNO (wordpart[1][0]))
+	    output_asm_insn ("add %0,-0x8,%0", &addreg0);
+	  if (addreg1 && REGNO (addreg1) != REGNO (wordpart[1][0]))
+	    output_asm_insn ("add %0,-0x8,%0", &addreg1);
+	}
+    }
+  
   return "";
 }
 
@@ -2448,6 +2588,24 @@ output_fp_move_double (operands)
   else abort ();
 }
 
+/* When doing a quad-register move, determine the drection in which
+   the move needs to be performed. SRC and DST are the source and
+   destination registers.
+
+   A value of -1 indicates that the move needs to be done from the
+   highest register to the lowest. */
+
+static int
+move_quad_direction (src, dst)
+     rtx src, dst;
+{
+  if ((REGNO (dst) > REGNO (src))
+      && (REGNO (dst) < (REGNO (src) + 4)))
+    return -1;
+  else
+    return 1;
+}
+
 /* Output assembler code to perform a quadword move insn with operands
    OPERANDS, one of which must be a floating point register.  */
 
@@ -2465,9 +2623,21 @@ output_fp_move_quad (operands)
 	  if (TARGET_V9 && TARGET_HARD_QUAD)
 	    return "fmovq %1,%0";
 	  else if (TARGET_V9)
-	    return "fmovd %1,%0\n\tfmovd %S1,%S0";
+	    {
+	      int dir = move_quad_direction (op1, op0);
+	      if (dir > 0)
+		return "fmovd %1,%0\n\tfmovd %S1,%S0";
+	      else 
+		return "fmovd %S1,%S0\n\tfmovd %1,%0";
+	    }
 	  else
-	    return "fmovs %1,%0\n\tfmovs %R1,%R0\n\tfmovs %S1,%S0\n\tfmovs %T1,%T0";
+	    {
+	      int dir = move_quad_direction (op0, op1);
+	      if (dir > 0)
+		return "fmovs %1,%0\n\tfmovs %R1,%R0\n\tfmovs %S1,%S0\n\tfmovs %T1,%T0";
+	      else
+		return "fmovs %T1,%T0\n\tfmovs %S1,%S0\n\tfmovs %R1,%R0\n\tfmovs %1,%0";
+	    }
 	}
       else if (GET_CODE (op1) == REG)
 	abort ();
@@ -3777,7 +3947,7 @@ function_arg_slotno (cum, mode, type, named, incoming_p, pregno, ppadding)
 	  if (TARGET_FPU && named)
 	    {
 	      if (slotno >= SPARC_FP_ARG_MAX)
-		return 0;
+		return -1;
 	      regno = SPARC_FP_ARG_FIRST + slotno * 2;
 	      if (mode == SFmode)
 		regno++;
@@ -3855,6 +4025,271 @@ function_arg_slotno (cum, mode, type, named, incoming_p, pregno, ppadding)
   return slotno;
 }
 
+/* Handle recursive register counting for structure field layout.  */
+
+struct function_arg_record_value_parms
+{
+  rtx ret;
+  int slotno, named, regbase;
+  int nregs, intoffset;
+};
+
+static void
+function_arg_record_value_1 (type, startbitpos, parms)
+     tree type;
+     int startbitpos;
+     struct function_arg_record_value_parms *parms;
+{
+  tree field;
+
+  /* The ABI obviously doesn't specify how packed structures are
+     passed.  These are defined to be passed in int regs if possible,
+     otherwise memory.  */
+  int packed_p = 0;
+
+  /* We need to compute how many registers are needed so we can
+     allocate the PARALLEL but before we can do that we need to know
+     whether there are any packed fields.  If there are, int regs are
+     used regardless of whether there are fp values present.  */
+  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+    {
+      if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
+	{
+	  packed_p = 1;
+	  break;
+	}
+    }
+
+  /* Compute how many registers we need.  */
+  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+    {
+      if (TREE_CODE (field) == FIELD_DECL)
+	{
+	  int bitpos = startbitpos;
+	  if (DECL_FIELD_BITPOS (field))
+	    bitpos += TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
+	  /* ??? FIXME: else assume zero offset.  */
+
+	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
+	    {
+	      function_arg_record_value_1 (TREE_TYPE (field), bitpos, parms);
+	    }
+	  else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
+	           && TARGET_FPU
+	           && ! packed_p
+	           && parms->named)
+	    {
+	      if (parms->intoffset != -1)
+		{
+		  int intslots, this_slotno;
+
+		  intslots = (bitpos - parms->intoffset + BITS_PER_WORD - 1)
+		    / BITS_PER_WORD;
+		  this_slotno = parms->slotno + parms->intoffset
+		    / BITS_PER_WORD;
+
+		  intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
+		  intslots = MAX (intslots, 0);
+		  parms->nregs += intslots;
+		  parms->intoffset = -1;
+		}
+
+	      /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
+		 If it wasn't true we wouldn't be here.  */
+	      parms->nregs += 1;
+	    }
+	  else
+	    {
+	      if (parms->intoffset == -1)
+		parms->intoffset = bitpos;
+	    }
+	}
+    }
+}
+
+/* Handle recursive structure field register assignment.  */
+
+static void 
+function_arg_record_value_3 (bitpos, parms)
+     int bitpos;
+     struct function_arg_record_value_parms *parms;
+{
+  enum machine_mode mode;
+  int regno, this_slotno, intslots, intoffset;
+  rtx reg;
+
+  if (parms->intoffset == -1)
+    return;
+  intoffset = parms->intoffset;
+  parms->intoffset = -1;
+
+  intslots = (bitpos - intoffset + BITS_PER_WORD - 1) / BITS_PER_WORD;
+  this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
+
+  intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
+  if (intslots <= 0)
+    return;
+
+  /* If this is the trailing part of a word, only load that much into
+     the register.  Otherwise load the whole register.  Note that in
+     the latter case we may pick up unwanted bits.  It's not a problem
+     at the moment but may wish to revisit.  */
+
+  if (intoffset % BITS_PER_WORD != 0)
+    {
+      mode = mode_for_size (BITS_PER_WORD - intoffset%BITS_PER_WORD,
+			    MODE_INT, 0);
+    }
+  else
+    mode = word_mode;
+
+  intoffset /= BITS_PER_UNIT;
+  do
+    {
+      regno = parms->regbase + this_slotno;
+      reg = gen_rtx (REG, mode, regno);
+      XVECEXP (parms->ret, 0, parms->nregs)
+	= gen_rtx (EXPR_LIST, VOIDmode, reg, GEN_INT (intoffset));
+
+      this_slotno += 1;
+      intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
+      parms->nregs += 1;
+      intslots -= 1;
+    }
+  while (intslots > 0);
+}
+
+static void
+function_arg_record_value_2 (type, startbitpos, parms)
+     tree type;
+     int startbitpos;
+     struct function_arg_record_value_parms *parms;
+{
+  tree field;
+  int packed_p = 0;
+
+  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+    {
+      if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
+	{
+	  packed_p = 1;
+	  break;
+	}
+    }
+
+  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+    {
+      if (TREE_CODE (field) == FIELD_DECL)
+	{
+	  int bitpos = startbitpos;
+	  if (DECL_FIELD_BITPOS (field))
+	    bitpos += TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
+	  /* ??? FIXME: else assume zero offset.  */
+
+	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
+	    {
+	      function_arg_record_value_2 (TREE_TYPE (field), bitpos, parms);
+	    }
+	  else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
+	           && TARGET_FPU
+	           && ! packed_p
+	           && parms->named)
+	    {
+	      int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
+	      rtx reg;
+
+	      function_arg_record_value_3 (bitpos, parms);
+
+	      reg = gen_rtx (REG, DECL_MODE (field),
+			         (SPARC_FP_ARG_FIRST + this_slotno * 2
+			          + (DECL_MODE (field) == SFmode
+				     && (bitpos & 32) != 0)));
+	      XVECEXP (parms->ret, 0, parms->nregs)
+		= gen_rtx (EXPR_LIST, VOIDmode, reg,
+			   GEN_INT (bitpos / BITS_PER_UNIT));
+	      parms->nregs += 1;
+	    }
+	  else
+	    {
+	      if (parms->intoffset == -1)
+		parms->intoffset = bitpos;
+	    }
+	}
+    }
+}
+
+static rtx
+function_arg_record_value (type, slotno, named, regbase)
+     tree type;
+     int slotno, named, regbase;
+{
+  HOST_WIDE_INT typesize = int_size_in_bytes (type);
+  struct function_arg_record_value_parms parms;
+  int nregs;
+
+  parms.ret = NULL_RTX;
+  parms.slotno = slotno;
+  parms.named = named;
+  parms.regbase = regbase;
+
+  /* Compute how many registers we need.  */
+  parms.nregs = 0;
+  parms.intoffset = 0;
+  function_arg_record_value_1 (type, 0, &parms);
+
+  if (parms.intoffset != -1)
+    {
+      int intslots, this_slotno;
+
+      intslots = (typesize*BITS_PER_UNIT - parms.intoffset + BITS_PER_WORD - 1)
+	/ BITS_PER_WORD;
+      this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
+
+      intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
+      intslots = MAX (intslots, 0);
+
+      parms.nregs += intslots;
+    }
+  nregs = parms.nregs;
+
+  /* Allocate the vector and handle some annoying special cases.  */
+  if (nregs == 0)
+    {
+      /* ??? Empty structure has no value?  Duh?  */
+      if (typesize <= 0)
+	{
+	  /* Though there's nothing really to store, return a word register
+	     anyway so the rest of gcc doesn't go nuts.  Returning a PARALLEL
+	     leads to breakage due to the fact that there are zero bytes to
+	     load.  */
+	  return gen_rtx (REG, DImode, regbase);
+	}
+      else
+	{
+	  /* ??? C++ has structures with no fields, and yet a size.  Give up
+	     for now and pass everything back in integer registers.  */
+	  nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+	}
+      if (nregs + slotno > SPARC_INT_ARG_MAX)
+	nregs = SPARC_INT_ARG_MAX - slotno;
+    }
+  if (nregs == 0)
+    abort();
+
+  parms.ret = gen_rtx (PARALLEL, VOIDmode, rtvec_alloc (nregs));
+
+  /* Fill in the entries.  */
+  parms.nregs = 0;
+  parms.intoffset = 0;
+  function_arg_record_value_2 (type, 0, &parms);
+  function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
+
+  if (parms.nregs != nregs)
+    abort ();
+
+  return parms.ret;
+}
+
 /* Handle the FUNCTION_ARG macro.
    Determine where to put an argument to a function.
    Value is zero to push the argument on the stack,
@@ -3909,6 +4344,7 @@ function_arg (cum, mode, type, named, incoming_p)
 	{
 	  /* "* 2" because fp reg numbers are recorded in 4 byte
 	     quantities.  */
+#if 0
 	  /* ??? This will cause the value to be passed in the fp reg and
 	     in the stack.  When a prototype exists we want to pass the
 	     value in the reg but reserve space on the stack.  That's an
@@ -3921,172 +4357,58 @@ function_arg (cum, mode, type, named, incoming_p)
 				       gen_rtx (EXPR_LIST, VOIDmode,
 						reg, const0_rtx)));
 	  else
+#else
+	  /* ??? It seems that passing back a register even when past
+	     the area declared by REG_PARM_STACK_SPACE will allocate
+	     space appropriately, and will not copy the data onto the
+	     stack, exactly as we desire.
+
+	     This is due to locate_and_pad_parm being called in
+	     expand_call whenever reg_parm_stack_space > 0, which
+	     while benefical to our example here, would seem to be
+	     in error from what had been intended.  Ho hum...  -- r~ */
+#endif
 	    return reg;
 	}
       else
 	{
+	  rtx v0, v1;
+
 	  if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
 	    {
-	      int regbase = (incoming_p
-			     ? SPARC_INCOMING_INT_ARG_FIRST
-			     : SPARC_OUTGOING_INT_ARG_FIRST);
-	      int intreg = regbase + (regno - SPARC_FP_ARG_FIRST) / 2;
-	      return gen_rtx (PARALLEL, mode,
-			      gen_rtvec (2,
-					 gen_rtx (EXPR_LIST, VOIDmode,
-						  reg, const0_rtx),
-					 gen_rtx (EXPR_LIST, VOIDmode,
-						  gen_rtx (REG, mode, intreg),
-						  const0_rtx)));
+	      int intreg;
+
+	      /* On incoming, we don't need to know that the value
+		 is passed in %f0 and %i0, and it confuses other parts
+		 causing needless spillage even on the simplest cases.  */
+	      if (incoming_p)
+		return reg;
+
+	      intreg = (SPARC_OUTGOING_INT_ARG_FIRST
+			+ (regno - SPARC_FP_ARG_FIRST) / 2);
+
+	      v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
+	      v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
+				      const0_rtx);
+	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
 	    }
 	  else
-	    return gen_rtx (PARALLEL, mode,
-			    gen_rtvec (2,
-				       gen_rtx (EXPR_LIST, VOIDmode,
-						NULL_RTX, const0_rtx),
-				       gen_rtx (EXPR_LIST, VOIDmode,
-						reg, const0_rtx)));
+	    {
+	      v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+	      v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
+	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
+	    }
 	}
     }
   else if (type && TREE_CODE (type) == RECORD_TYPE)
     {
       /* Structures up to 16 bytes in size are passed in arg slots on the
 	 stack and are promoted to registers where possible.  */
-      tree field;
-      rtx ret;
-      int i;
-      int nregs;
-      /* Starting bit position of a sequence of integer fields, counted from
-	 msb of left most byte, -1 if last field wasn't an int.  */
-      /* ??? This isn't entirely necessary, some simplification
-	 may be possible.  */
-      int start_int_bitpos;
-      /* Current bitpos in struct, counted from msb of left most byte.  */
-      int bitpos, this_slotno;
-      /* The ABI obviously doesn't specify how packed
-	 structures are passed.  These are defined to be passed
-	 in int regs if possible, otherwise memory.  */
-      int packed_p = 0;
 
       if (int_size_in_bytes (type) > 16)
 	abort (); /* shouldn't get here */
 
-      /* We need to compute how many registers are needed so we can allocate
-	 the PARALLEL but before we can do that we need to know whether there
-	 are any packed fields.  If there are, int regs are used regardless of
-	 whether there are fp values present.  */
-      for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
-	{
-	  if (TREE_CODE (field) == FIELD_DECL
-	      && DECL_PACKED (field))
-	    {
-	      packed_p = 1;
-	      break;
-	    }
-	}
-
-      /* Compute how many registers we need.  */
-      nregs = 0;
-      start_int_bitpos = -1;
-      for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
-	{
-	  bitpos = TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
-	  this_slotno = slotno + bitpos / BITS_PER_WORD;
-	  if (TREE_CODE (field) == FIELD_DECL)
-	    {
-	      if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
-		  && TARGET_FPU
-		  && ! packed_p
-		  && named)
-		{
-		  /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
-		     If it wasn't true we wouldn't be here.  */
-		  nregs++;
-		  start_int_bitpos = -1;
-		}
-	      else if (this_slotno < SPARC_INT_ARG_MAX)
-		{
-		  if (start_int_bitpos == -1)
-		    {
-		      nregs++;
-		      start_int_bitpos = bitpos;
-		    }
-		  else
-		    {
-		      if (bitpos % BITS_PER_WORD == 0)
-			nregs++;
-		    }
-		}
-	    }
-	}
-      if (nregs == 0)
-	abort ();
-
-      ret = gen_rtx (PARALLEL, BLKmode, rtvec_alloc (nregs + 1));
-
-      /* ??? This causes the entire struct to be passed in memory.
-	 This isn't necessary, but is left for later.  */
-      XVECEXP (ret, 0, 0) = gen_rtx (EXPR_LIST, VOIDmode, NULL_RTX,
-				     const0_rtx);
-
-      /* Fill in the entries.  */
-      start_int_bitpos = -1;
-      for (i = 1, field = TYPE_FIELDS (type);
-	   field;
-	   field = TREE_CHAIN (field))
-	{
-	  bitpos = TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
-	  this_slotno = slotno + bitpos / BITS_PER_WORD;
-	  if (TREE_CODE (field) == FIELD_DECL)
-	    {
-	      if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
-		  && TARGET_FPU
-		  && ! packed_p
-		  && named)
-		{
-		  reg = gen_rtx (REG, DECL_MODE (field),
-				 (SPARC_FP_ARG_FIRST + this_slotno * 2
-				  + (DECL_MODE (field) == SFmode
-				     && (bitpos & 32) != 0)));
-		  XVECEXP (ret, 0, i) = gen_rtx (EXPR_LIST, VOIDmode, reg,
-						 GEN_INT (bitpos / BITS_PER_UNIT));
-		  i++;
-		  start_int_bitpos = -1;
-		}
-	      else
-		{
-		  if (this_slotno < SPARC_INT_ARG_MAX
-		      && (start_int_bitpos == -1
-			  || bitpos % BITS_PER_WORD == 0))
-		    {
-		      enum machine_mode mode;
-
-		      /* If this is the trailing part of a word, only load
-			 that much into the register.  Otherwise load the
-			 whole register.  Note that in the latter case we may
-			 pick up unwanted bits.  It's not a problem at the
-			 moment but may wish to revisit.  */
-		      if (bitpos % BITS_PER_WORD != 0)
-			mode = mode_for_size (BITS_PER_WORD - bitpos % BITS_PER_WORD,
-					      MODE_INT, 0);
-		      else
-			mode = word_mode;
-
-		      regno = regbase + this_slotno;
-		      reg = gen_rtx (REG, mode, regno);
-		      XVECEXP (ret, 0, i) = gen_rtx (EXPR_LIST, VOIDmode, reg,
-						     GEN_INT (bitpos / BITS_PER_UNIT));
-		      i++;
-		      if (start_int_bitpos == -1)
-			start_int_bitpos = bitpos;
-		    }
-		}
-	    }
-	}
-      if (i != nregs + 1)
-	abort ();
-
-      return ret;
+      return function_arg_record_value (type, slotno, named, regbase);
     }
   else if (type && TREE_CODE (type) == UNION_TYPE)
     {
@@ -4187,7 +4509,7 @@ function_arg_partial_nregs (cum, mode, type, named)
 /* Handle the FUNCTION_ARG_PASS_BY_REFERENCE macro.
    !v9: The SPARC ABI stipulates passing struct arguments (of any size) and
    quad-precision floats by invisible reference.
-   v9: aggregates greater than 16 bytes are passed by reference.
+   v9: Aggregates greater than 16 bytes are passed by reference.
    For Pascal, also pass arrays by reference.  */
 
 int
@@ -4205,6 +4527,8 @@ function_arg_pass_by_reference (cum, mode, type, named)
   else
     {
       return ((type && TREE_CODE (type) == ARRAY_TYPE)
+	      /* Consider complex values as aggregates, so care for TCmode. */
+	      || GET_MODE_SIZE (mode) > 16
 	      || (type && AGGREGATE_TYPE_P (type)
 		  && int_size_in_bytes (type) > 16));
     }
@@ -4288,7 +4612,53 @@ function_arg_padding (mode, type)
 	      : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
 	     ? downward : upward));
 }
-
+
+/* Handle FUNCTION_VALUE, FUNCTION_OUTGOING_VALUE, and LIBCALL_VALUE macros.
+   For v9, function return values are subject to the same rules as arguments,
+   except that up to 32-bytes may be returned in registers.  */
+
+rtx
+function_value (type, mode, incoming_p)
+     tree type;
+     enum machine_mode mode;
+     int incoming_p;
+{
+  int regno;
+  int regbase = (incoming_p
+		 ? SPARC_OUTGOING_INT_ARG_FIRST
+		 : SPARC_INCOMING_INT_ARG_FIRST);
+
+  if (TARGET_ARCH64 && type)
+    {
+      if (TREE_CODE (type) == RECORD_TYPE)
+	{
+	  /* Structures up to 32 bytes in size are passed in registers,
+	     promoted to fp registers where possible.  */
+
+	  if (int_size_in_bytes (type) > 32)
+	    abort (); /* shouldn't get here */
+
+	  return function_arg_record_value (type, 0, 1, regbase);
+	}
+      else if (TREE_CODE (type) == UNION_TYPE)
+	{
+	  int bytes = int_size_in_bytes (type);
+
+	  if (bytes > 32)
+	    abort ();
+
+	  mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
+	}
+    }
+
+  if (incoming_p)
+    regno = BASE_RETURN_VALUE_REG (mode);
+  else
+    regno = BASE_OUTGOING_VALUE_REG (mode);
+
+  return gen_rtx (REG, mode, regno);
+}
+
 /* Do what is necessary for `va_start'.  The argument is ignored.
 
    We look at the current function to determine if stdarg or varargs
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index 26a8231..629d542 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -276,8 +276,8 @@ Unrecognized value in TARGET_CPU_DEFAULT.
 #define NO_BUILTIN_PTRDIFF_TYPE
 #define NO_BUILTIN_SIZE_TYPE
 #endif
-#define PTRDIFF_TYPE (TARGET_ARCH64 ? "long long int" : "int")
-#define SIZE_TYPE (TARGET_ARCH64 ? "long long unsigned int" : "unsigned int")
+#define PTRDIFF_TYPE (TARGET_ARCH64 ? "long int" : "int")
+#define SIZE_TYPE (TARGET_ARCH64 ? "long unsigned int" : "unsigned int")
 
 /* ??? This should be 32 bits for v9 but what can we do?  */
 #define WCHAR_TYPE "short unsigned int"
@@ -1078,18 +1078,24 @@ extern int sparc_mode_class[];
 #define INITIALIZE_PIC initialize_pic ()
 #define FINALIZE_PIC finalize_pic ()
 
+/* Pick a default value we can notice from override_options:
+   !v9: Default is on.
+   v9: Default is off.  */
+
+#define DEFAULT_PCC_STRUCT_RETURN -1
+
 /* Sparc ABI says that quad-precision floats and all structures are returned
    in memory.
    For v9: unions <= 32 bytes in size are returned in int regs,
-   structures up to 32 bytes are returned in int and fp regs.
-   FIXME: wip */
+   structures up to 32 bytes are returned in int and fp regs.  */
 
 #define RETURN_IN_MEMORY(TYPE)				\
 (TARGET_ARCH32						\
  ? (TYPE_MODE (TYPE) == BLKmode				\
     || TYPE_MODE (TYPE) == TFmode			\
     || TYPE_MODE (TYPE) == TCmode)			\
- : TYPE_MODE (TYPE) == BLKmode)
+ : (TYPE_MODE (TYPE) == BLKmode				\
+    && int_size_in_bytes (TYPE) > 32))
 
 /* Functions which return large structures get the address
    to place the wanted value at offset 64 from the frame.
@@ -1449,9 +1455,14 @@ extern char leaf_reg_remap[];
    : (STRUCT_VALUE_OFFSET + UNITS_PER_WORD))
 
 /* When a parameter is passed in a register, stack space is still
-   allocated for it.  */
-/* This only takes into account the int regs.
-   fp regs are handled elsewhere.  */
+   allocated for it.
+   !v9: All 6 possible integer registers have backing store allocated.
+   v9: Only space for the arguments passed is allocated. */
+/* ??? Ideally, we'd use zero here (as the minimum), but zero has special
+   meaning to the backend.  Further, we need to be able to detect if a
+   varargs/unprototyped function is called, as they may want to spill more
+   registers than we've provided space.  Ugly, ugly.  So for now we retain
+   all 6 slots even for v9.  */
 #define REG_PARM_STACK_SPACE(DECL) (6 * UNITS_PER_WORD)
 
 /* Keep the stack pointer constant throughout the function.
@@ -1472,24 +1483,28 @@ extern char leaf_reg_remap[];
 /* Some subroutine macros specific to this machine.
    When !TARGET_FPU, put float return values in the general registers,
    since we don't have any fp registers.  */
-#define BASE_RETURN_VALUE_REG(MODE) \
-  (TARGET_ARCH64 \
-   ? (TARGET_FPU && GET_MODE_CLASS (MODE) == MODE_FLOAT ? 32 : 8) \
+#define BASE_RETURN_VALUE_REG(MODE)					\
+  (TARGET_ARCH64							\
+   ? (TARGET_FPU && FLOAT_MODE_P (MODE) ? 32 : 8)			\
    : (((MODE) == SFmode || (MODE) == DFmode) && TARGET_FPU ? 32 : 8))
-#define BASE_OUTGOING_VALUE_REG(MODE) \
-  (TARGET_ARCH64 \
-   ? (TARGET_FPU && GET_MODE_CLASS (MODE) == MODE_FLOAT ? 32 \
-      : TARGET_FLAT ? 8 : 24) \
+
+#define BASE_OUTGOING_VALUE_REG(MODE)				\
+  (TARGET_ARCH64						\
+   ? (TARGET_FPU && FLOAT_MODE_P (MODE) ? 32			\
+      : TARGET_FLAT ? 8 : 24)					\
    : (((MODE) == SFmode || (MODE) == DFmode) && TARGET_FPU ? 32	\
       : (TARGET_FLAT ? 8 : 24)))
-#define BASE_PASSING_ARG_REG(MODE) \
-  (TARGET_ARCH64 \
-   ? (TARGET_FPU && GET_MODE_CLASS (MODE) == MODE_FLOAT ? 32 : 8) \
+
+#define BASE_PASSING_ARG_REG(MODE)				\
+  (TARGET_ARCH64						\
+   ? (TARGET_FPU && FLOAT_MODE_P (MODE) ? 32 : 8)		\
    : 8)
-#define BASE_INCOMING_ARG_REG(MODE) \
-  (TARGET_ARCH64 \
-   ? (TARGET_FPU && GET_MODE_CLASS (MODE) == MODE_FLOAT ? 32 \
-      : TARGET_FLAT ? 8 : 24) \
+
+/* ??? FIXME -- seems wrong for v9 structure passing... */
+#define BASE_INCOMING_ARG_REG(MODE)				\
+  (TARGET_ARCH64						\
+   ? (TARGET_FPU && FLOAT_MODE_P (MODE) ? 32			\
+      : TARGET_FLAT ? 8 : 24)					\
    : (TARGET_FLAT ? 8 : 24))
 
 /* Define this macro if the target machine has "register windows".  This
@@ -1515,19 +1530,20 @@ extern char leaf_reg_remap[];
 
 /* On SPARC the value is found in the first "output" register.  */
 
-#define FUNCTION_VALUE(VALTYPE, FUNC)  \
-  gen_rtx_REG (TYPE_MODE (VALTYPE), BASE_RETURN_VALUE_REG (TYPE_MODE (VALTYPE)))
+extern struct rtx_def *function_value ();
+#define FUNCTION_VALUE(VALTYPE, FUNC) \
+  function_value ((VALTYPE), TYPE_MODE (VALTYPE), 1)
 
 /* But the called function leaves it in the first "input" register.  */
 
-#define FUNCTION_OUTGOING_VALUE(VALTYPE, FUNC)  \
-  gen_rtx_REG (TYPE_MODE (VALTYPE), BASE_OUTGOING_VALUE_REG (TYPE_MODE (VALTYPE)))
+#define FUNCTION_OUTGOING_VALUE(VALTYPE, FUNC) \
+  function_value ((VALTYPE), TYPE_MODE (VALTYPE), 0)
 
 /* Define how to find the value returned by a library function
    assuming the value has mode MODE.  */
 
-#define LIBCALL_VALUE(MODE)	\
-  gen_rtx_REG (MODE, BASE_RETURN_VALUE_REG (MODE))
+#define LIBCALL_VALUE(MODE) \
+  function_value (NULL_TREE, (MODE), 1)
 
 /* 1 if N is a possible register number for a function value
    as seen by the caller.
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 4076f22..a86434c 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -2202,7 +2202,7 @@
 (define_insn "*movdi_sp32_v9"
   [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,Q,r,r,?e,?e,?Q,?b")
 	(match_operand:DI 1 "general_operand" "r,J,r,Q,i,e,Q,e,J"))]
-  "TARGET_V9
+  "TARGET_V9 && ! TARGET_ARCH64
    && (register_operand (operands[0], DImode)
        || register_operand (operands[1], DImode)
        || operands[1] == const0_rtx)
@@ -2297,7 +2297,7 @@
     case 3:
       return \"stx %r1,%0\";
     case 4:
-      return \"mov %1,%0\";
+      return \"fmovd %1,%0\";
     case 5:
       return \"ldd %1,%0\";
     case 6:
@@ -2367,8 +2367,8 @@
 ;; to be reloaded by putting the constant into memory.
 ;; It must come before the more general movsf pattern.
 (define_insn "*movsf_const_insn"
-  [(set (match_operand:SF 0 "general_operand" "=?r,f,m,d")
-	(match_operand:SF 1 "" "?F,m,G,G"))]
+  [(set (match_operand:SF 0 "general_operand" "=f,d,m,?r")
+	(match_operand:SF 1 "" "m,G,G,?F"))]
   "TARGET_FPU
    && GET_CODE (operands[1]) == CONST_DOUBLE
    && (GET_CODE (operands[0]) == REG
@@ -2378,19 +2378,19 @@
   switch (which_alternative)
     {
     case 0:
-      return singlemove_string (operands);
-    case 1:
       return \"ld %1,%0\";
+    case 1:
+      return \"fzeros %0\";
     case 2:
       return \"st %%g0,%0\";
     case 3:
-      return \"fzeros %0\";
+      return singlemove_string (operands);
     default:
       abort ();
     }
 }"
-  [(set_attr "type" "load,fpload,store,fpmove")
-   (set_attr "length" "2,1,1,1")])
+  [(set_attr "type" "fpload,fpmove,store,load")
+   (set_attr "length" "1,1,1,2")])
 
 (define_expand "movsf"
   [(set (match_operand:SF 0 "general_operand" "")
@@ -2403,19 +2403,19 @@
 }")
 
 (define_insn "*movsf_insn"
-  [(set (match_operand:SF 0 "reg_or_nonsymb_mem_operand" "=f,r,f,r,Q,Q")
-	(match_operand:SF 1 "reg_or_nonsymb_mem_operand" "f,r,Q,Q,f,r"))]
+  [(set (match_operand:SF 0 "reg_or_nonsymb_mem_operand" "=f,f,Q,r,r,Q")
+	(match_operand:SF 1 "reg_or_nonsymb_mem_operand"  "f,Q,f,r,Q,r"))]
   "TARGET_FPU
    && (register_operand (operands[0], SFmode)
        || register_operand (operands[1], SFmode))"
   "@
    fmovs %1,%0
-   mov %1,%0
-   ld %1,%0
    ld %1,%0
    st %1,%0
+   mov %1,%0
+   ld %1,%0
    st %1,%0"
-  [(set_attr "type" "fpmove,move,fpload,load,fpstore,store")])
+  [(set_attr "type" "fpmove,fpload,fpstore,move,load,store")])
 
 ;; Exactly the same as above, except that all `f' cases are deleted.
 ;; This is necessary to prevent reload from ever trying to use a `f' reg
@@ -2492,8 +2492,8 @@
 }")
 
 (define_insn "*movdf_insn"
-  [(set (match_operand:DF 0 "reg_or_nonsymb_mem_operand" "=T,U,e,r,Q,Q,e,r")
-	(match_operand:DF 1 "reg_or_nonsymb_mem_operand" "U,T,e,r,e,r,Q,Q"))]
+  [(set (match_operand:DF 0 "reg_or_nonsymb_mem_operand" "=e,Q,e,T,U,r,Q,r")
+	(match_operand:DF 1 "reg_or_nonsymb_mem_operand"  "e,e,Q,U,T,r,r,Q"))]
   "TARGET_FPU
    && (register_operand (operands[0], DFmode)
        || register_operand (operands[1], DFmode))"
@@ -2512,7 +2512,7 @@
 
 (define_insn "*movdf_no_e_insn"
   [(set (match_operand:DF 0 "reg_or_nonsymb_mem_operand" "=T,U,r,Q,&r")
-	(match_operand:DF 1 "reg_or_nonsymb_mem_operand" "U,T,r,r,Q"))]
+	(match_operand:DF 1 "reg_or_nonsymb_mem_operand"  "U,T,r,r,Q"))]
   "! TARGET_FPU
    && (register_operand (operands[0], DFmode)
        || register_operand (operands[1], DFmode))"
@@ -2619,8 +2619,8 @@
 }")
 
 (define_insn "*movtf_insn"
-  [(set (match_operand:TF 0 "reg_or_nonsymb_mem_operand" "=e,r,Q,Q,e,&r")
-	(match_operand:TF 1 "reg_or_nonsymb_mem_operand" "e,r,e,r,Q,Q"))]
+  [(set (match_operand:TF 0 "reg_or_nonsymb_mem_operand" "=e,Q,e,r,Q,r")
+	(match_operand:TF 1 "reg_or_nonsymb_mem_operand"  "e,e,Q,r,r,Q"))]
   "TARGET_FPU
    && (register_operand (operands[0], TFmode)
        || register_operand (operands[1], TFmode))"
@@ -5517,7 +5517,8 @@ if (! TARGET_ARCH64)
   ""
   "*
 {
-  /* Some implementations are reported to have problems with
+  /* Some implementations (e.g. TurboSparc) are reported to have problems
+     with
 	foo: b,a foo
      i.e. an empty loop with the annul bit set.  The workaround is to use 
         foo: b foo; nop
@@ -5824,7 +5825,7 @@ if (! TARGET_ARCH64)
   [(set_attr "type" "call")])
 
 (define_insn "*call_value_address_sp64"
-  [(set (match_operand 0 "" "=rf")
+  [(set (match_operand 0 "" "")
 	(call (mem:SI (match_operand:DI 1 "address_operand" "p"))
 	      (match_operand 2 "" "")))
    (clobber (reg:DI 15))]
@@ -5834,7 +5835,7 @@ if (! TARGET_ARCH64)
   [(set_attr "type" "call")])
 
 (define_insn "*call_value_symbolic_sp64"
-  [(set (match_operand 0 "" "=rf")
+  [(set (match_operand 0 "" "")
 	(call (mem:SI (match_operand:DI 1 "symbolic_operand" "s"))
 	      (match_operand 2 "" "")))
    (clobber (reg:DI 15))]
@@ -5982,6 +5983,7 @@ if (! TARGET_ARCH64)
   rtx fp = operands[1];
   rtx stack = operands[2];
   rtx lab = operands[3];
+  rtx labreg;
 
   /* Trap instruction to flush all the register windows.  */
   emit_insn (gen_flush_register_windows ());
@@ -5995,7 +5997,8 @@ if (! TARGET_ARCH64)
 
   /* Find the containing function's current nonlocal goto handler,
      which will do any cleanups and then jump to the label.  */
-  emit_move_insn (gen_rtx (REG, Pmode, 8), lab);
+  labreg = gen_rtx (REG, Pmode, 8);
+  emit_move_insn (labreg, lab);
 
   /* Restore %fp from stack pointer value for containing function.
      The restore insn that follows will move this to %sp,
@@ -6007,16 +6010,18 @@ if (! TARGET_ARCH64)
   /*emit_insn (gen_rtx (USE, VOIDmode, frame_pointer_rtx));*/
   emit_insn (gen_rtx (USE, VOIDmode, stack_pointer_rtx));
   /* Return, restoring reg window and jumping to goto handler.  */
-  if (TARGET_V9 && GET_CODE (chain) == CONST_INT)
+  if (TARGET_V9 && GET_CODE (chain) == CONST_INT
+      && ! (INTVAL (chain) & ~(HOST_WIDE_INT)0xffffffff))
     {
-      emit_insn (gen_goto_handler_and_restore_v9 (static_chain_rtx, chain));
+      emit_insn (gen_goto_handler_and_restore_v9 (labreg, static_chain_rtx,
+						  chain));
       emit_barrier ();
       DONE;
     }
   /* Put in the static chain register the nonlocal label address.  */
   emit_move_insn (static_chain_rtx, chain);
   emit_insn (gen_rtx (USE, VOIDmode, static_chain_rtx));
-  emit_insn (gen_goto_handler_and_restore ());
+  emit_insn (gen_goto_handler_and_restore (labreg));
   emit_barrier ();
   DONE;
 }")
@@ -6029,20 +6034,31 @@ if (! TARGET_ARCH64)
   [(set_attr "type" "misc")])
 
 (define_insn "goto_handler_and_restore"
-  [(unspec_volatile [(reg:SI 8)] 2)]
-  "! TARGET_V9"
-  "jmp %%o0+0\;restore"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "=r")] 2)]
+  ""
+  "jmp %0+0\;restore"
   [(set_attr "type" "misc")
    (set_attr "length" "2")])
 
 (define_insn "goto_handler_and_restore_v9"
-  [(unspec_volatile [(reg:SI 8)
-		     (match_operand:SI 0 "register_operand" "=r,r")
-		     (match_operand:SI 1 "const_int_operand" "I,n")] 3)]
-  "TARGET_V9"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "=r,r")
+		     (match_operand:SI 1 "register_operand" "=r,r")
+		     (match_operand:SI 2 "const_int_operand" "I,n")] 3)]
+  "TARGET_V9 && ! TARGET_ARCH64"
+  "@
+   return %0+0\;mov %2,%Y1
+   sethi %%hi(%2),%1\;return %0+0\;or %Y1,%%lo(%2),%Y1"
+  [(set_attr "type" "misc")
+   (set_attr "length" "2,3")])
+
+(define_insn "*goto_handler_and_restore_v9_sp64"
+  [(unspec_volatile [(match_operand:DI 0 "register_operand" "=r,r")
+		     (match_operand:DI 1 "register_operand" "=r,r")
+		     (match_operand:SI 2 "const_int_operand" "I,n")] 3)]
+  "TARGET_V9 && TARGET_ARCH64"
   "@
-   return %%o0+0\;mov %1,%Y0
-   sethi %%hi(%1),%0\;return %%o0+0\;or %Y0,%%lo(%1),%Y0"
+   return %0+0\;mov %2,%Y1
+   sethi %%hi(%2),%1\;return %0+0\;or %Y1,%%lo(%2),%Y1"
   [(set_attr "type" "misc")
    (set_attr "length" "2,3")])
 
diff --git a/gcc/config/sparc/sysv4.h b/gcc/config/sparc/sysv4.h
index 3a1a82d..7e90bdd 100644
--- a/gcc/config/sparc/sysv4.h
+++ b/gcc/config/sparc/sysv4.h
@@ -33,6 +33,16 @@ Boston, MA 02111-1307, USA.  */
 
 #include "svr4.h"
 
+/* ??? Put back the SIZE_TYPE/PTRDIFF_TYPE definitions set by sparc.h.
+   Why, exactly, is svr4.h messing with this?  Seems like the chip 
+   would know best.  */
+
+#undef SIZE_TYPE
+#define SIZE_TYPE (TARGET_ARCH64 ? "long unsigned int" : "unsigned int")
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_ARCH64 ? "long int" : "int")
+
 /* Undefined some symbols which are defined in "svr4.h" but which are
    appropriate only for typical svr4 systems, but not for the specific
    case of svr4 running on a Sparc.  */
author	Richard Henderson <rth@cygnus.com>	1998-05-03 07:19:46 -0700
committer	Richard Henderson <rth@gcc.gnu.org>	1998-05-03 07:19:46 -0700
commit	82d6b402bb0e637381ec610a1a448237afaab48c (patch)
tree	74046c7306c56e7c32bd1dc66a7f2387740a3fbe /gcc
parent	959d87966a28c5f67a33db114e4d342fe7072f0c (diff)
download	gcc-82d6b402bb0e637381ec610a1a448237afaab48c.zip gcc-82d6b402bb0e637381ec610a1a448237afaab48c.tar.gz gcc-82d6b402bb0e637381ec610a1a448237afaab48c.tar.bz2