aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/sparc
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/sparc')
-rw-r--r--sysdeps/sparc/DEFS.h4
-rw-r--r--sysdeps/sparc/Dist4
-rw-r--r--sysdeps/sparc/Implies2
-rw-r--r--sysdeps/sparc/Makefile57
-rw-r--r--sysdeps/sparc/__longjmp.S47
-rw-r--r--sysdeps/sparc/add_n.S134
-rw-r--r--sysdeps/sparc/addmul_1.S146
-rw-r--r--sysdeps/sparc/alloca.S32
-rw-r--r--sysdeps/sparc/bsd-_setjmp.S26
-rw-r--r--sysdeps/sparc/bsd-setjmp.S26
-rw-r--r--sysdeps/sparc/bytesex.h3
-rw-r--r--sysdeps/sparc/divrem.m4234
-rw-r--r--sysdeps/sparc/jmp_buf.h14
-rw-r--r--sysdeps/sparc/memcopy.h21
-rw-r--r--sysdeps/sparc/mul_1.S198
-rw-r--r--sysdeps/sparc/rem.S365
-rw-r--r--sysdeps/sparc/sdiv.S365
-rw-r--r--sysdeps/sparc/setjmp.S31
-rw-r--r--sysdeps/sparc/sparc8/addmul_1.S116
-rw-r--r--sysdeps/sparc/sparc8/mul_1.S91
-rw-r--r--sysdeps/sparc/sparc8/submul_1.S57
-rw-r--r--sysdeps/sparc/sparc8/udiv_qrnnd.S186
-rw-r--r--sysdeps/sparc/sqrt.c34
-rw-r--r--sysdeps/sparc/sub_n.S134
-rw-r--r--sysdeps/sparc/submul_1.S146
-rw-r--r--sysdeps/sparc/udiv.S348
-rw-r--r--sysdeps/sparc/udiv_qrnnd.S143
-rw-r--r--sysdeps/sparc/umul.S153
-rw-r--r--sysdeps/sparc/urem.S348
29 files changed, 3465 insertions, 0 deletions
diff --git a/sysdeps/sparc/DEFS.h b/sysdeps/sparc/DEFS.h
new file mode 100644
index 0000000..ef69663
--- /dev/null
+++ b/sysdeps/sparc/DEFS.h
@@ -0,0 +1,4 @@
+#define FUNC(name) \
+ .global name; \
+ .align 4; \
+ name:
diff --git a/sysdeps/sparc/Dist b/sysdeps/sparc/Dist
new file mode 100644
index 0000000..49cd4dc
--- /dev/null
+++ b/sysdeps/sparc/Dist
@@ -0,0 +1,4 @@
+DEFS.h
+mul.S umul.S
+divrem.m4 sdiv.S udiv.S rem.S urem.S
+alloca.S
diff --git a/sysdeps/sparc/Implies b/sysdeps/sparc/Implies
new file mode 100644
index 0000000..da719e1
--- /dev/null
+++ b/sysdeps/sparc/Implies
@@ -0,0 +1,2 @@
+# SPARC uses IEEE 754 floating point.
+ieee754
diff --git a/sysdeps/sparc/Makefile b/sysdeps/sparc/Makefile
new file mode 100644
index 0000000..1154d05
--- /dev/null
+++ b/sysdeps/sparc/Makefile
@@ -0,0 +1,57 @@
+# Copyright (C) 1991, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Library General Public License
+# as published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Library General Public License for more details.
+
+# You should have received a copy of the GNU Library General Public
+# License along with the GNU C Library; see the file COPYING.LIB. If
+# not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+# Cambridge, MA 02139, USA.
+
+ifeq ($(subdir),gnulib)
+routines = mul umul $(divrem) alloca
+endif # gnulib
+
+# We distribute these files, even though they are generated,
+# so as to avoid the need for a functioning m4 to build the library.
+divrem := sdiv udiv rem urem
+
++divrem-NAME-sdiv := div
++divrem-NAME-udiv := udiv
++divrem-NAME-rem := rem
++divrem-NAME-urem := urem
++divrem-NAME = $(+divrem-NAME-$(basename $(notdir $@)))
++divrem-OP-div := div
++divrem-OP-udiv := div
++divrem-OP-rem := rem
++divrem-OP-urem := rem
++divrem-S-div := true
++divrem-S-rem := true
++divrem-S-udiv := false
++divrem-S-urem := false
+$(divrem:%=$(sysdep_dir)/sparc/%.S): $(sysdep_dir)/sparc/divrem.m4
+ (echo "define(NAME,\`.$(+divrem-NAME)')\
+ define(OP,\`$(+divrem-OP-$(+divrem-NAME))')\
+ define(S,\`$(+divrem-S-$(+divrem-NAME))')\
+ /* This file is generated from divrem.m4; DO NOT EDIT! */"; \
+ cat $<) | $(M4) > $@-tmp
+# Make it unwritable so noone will edit it by mistake.
+ -chmod a-w $@-tmp
+ mv -f $@-tmp $@
+ test -d CVS && cvs commit -m'Regenerated from $<' $@
+
+sysdep-realclean := $(sysdep-realclean) $(divrem:%=sysdeps/sparc/%.S)
+
+ifeq ($(subdir),crypt)
+
+crypt := crypt.sparc # Use crypt/crypt.sparc.S.
+
+endif # crypt
diff --git a/sysdeps/sparc/__longjmp.S b/sysdeps/sparc/__longjmp.S
new file mode 100644
index 0000000..adff06e
--- /dev/null
+++ b/sysdeps/sparc/__longjmp.S
@@ -0,0 +1,47 @@
+/* Copyright (C) 1991, 1993 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#include <sysdep.h>
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include <machine/trap.h>
+#endif
+
+/* NOTE: This code depends on the definition of `__jmp_buf' in <jmp_buf.h>. */
+
+ENTRY (__longjmp)
+ /* Do a "flush register windows trap". The trap handler in the
+ kernel writes all the register windows to their stack slots, and
+ marks them all as invalid (needing to be sucked up from the
+ stack when used). This ensures that all information needed to
+ unwind to these callers is in memory, not in the register
+ windows. */
+ ta ST_FLUSH_WINDOWS
+ ld [%o0], %o7 /* Return PC. */
+ ld [%o0 + 4], %fp /* Saved SP. */
+ sub %fp, 64, %sp /* Allocate a register save area. */
+
+ /* if (%o1 == 0) %o1 = 1; */
+ tst %o1
+ be,a Ldone
+ mov 1, %o1
+
+Ldone: retl
+ /* On the way out, put the return value in %o0. */
+ restore %o1, 0, %o0
diff --git a/sysdeps/sparc/add_n.S b/sysdeps/sparc/add_n.S
new file mode 100644
index 0000000..3be3e39
--- /dev/null
+++ b/sysdeps/sparc/add_n.S
@@ -0,0 +1,134 @@
+! sparc __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+! sum in a third limb vector.
+
+! Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! s1_ptr %o1
+! s2_ptr %o2
+! size %o3
+
+#include "sysdep.h"
+
+ .text
+ .align 4
+ .global C_SYMBOL_NAME(__mpn_add_n)
+C_SYMBOL_NAME(__mpn_add_n):
+ ld [%o1+0],%o4 ! read first limb from s1_ptr
+ srl %o3,4,%g1
+ ld [%o2+0],%o5 ! read first limb from s2_ptr
+
+ sub %g0,%o3,%o3
+ andcc %o3,(16-1),%o3
+ be Lzero
+ nop
+
+ sll %o3,2,%o3 ! multiply by 4
+ sub %o0,%o3,%o0 ! adjust res_ptr
+ sub %o1,%o3,%o1 ! adjust s1_ptr
+ sub %o2,%o3,%o2 ! adjust s2_ptr
+
+ mov %o4,%g2
+
+ sethi %hi(Lbase),%g3
+ or %g3,%lo(Lbase),%g3
+ sll %o3,2,%o3 ! multiply by 4
+ jmp %g3+%o3
+ mov %o5,%g3
+
+Loop: addxcc %g2,%g3,%o3
+ add %o1,64,%o1
+ st %o3,[%o0+60]
+ add %o2,64,%o2
+ ld [%o1+0],%o4
+ add %o0,64,%o0
+ ld [%o2+0],%o5
+Lzero: sub %g1,1,%g1 ! add 0 + 16r limbs (adjust loop counter)
+Lbase: ld [%o1+4],%g2
+ addxcc %o4,%o5,%o3
+ ld [%o2+4],%g3
+ st %o3,[%o0+0]
+ ld [%o1+8],%o4 ! add 15 + 16r limbs
+ addxcc %g2,%g3,%o3
+ ld [%o2+8],%o5
+ st %o3,[%o0+4]
+ ld [%o1+12],%g2 ! add 14 + 16r limbs
+ addxcc %o4,%o5,%o3
+ ld [%o2+12],%g3
+ st %o3,[%o0+8]
+ ld [%o1+16],%o4 ! add 13 + 16r limbs
+ addxcc %g2,%g3,%o3
+ ld [%o2+16],%o5
+ st %o3,[%o0+12]
+ ld [%o1+20],%g2 ! add 12 + 16r limbs
+ addxcc %o4,%o5,%o3
+ ld [%o2+20],%g3
+ st %o3,[%o0+16]
+ ld [%o1+24],%o4 ! add 11 + 16r limbs
+ addxcc %g2,%g3,%o3
+ ld [%o2+24],%o5
+ st %o3,[%o0+20]
+ ld [%o1+28],%g2 ! add 10 + 16r limbs
+ addxcc %o4,%o5,%o3
+ ld [%o2+28],%g3
+ st %o3,[%o0+24]
+ ld [%o1+32],%o4 ! add 9 + 16r limbs
+ addxcc %g2,%g3,%o3
+ ld [%o2+32],%o5
+ st %o3,[%o0+28]
+ ld [%o1+36],%g2 ! add 8 + 16r limbs
+ addxcc %o4,%o5,%o3
+ ld [%o2+36],%g3
+ st %o3,[%o0+32]
+ ld [%o1+40],%o4 ! add 7 + 16r limbs
+ addxcc %g2,%g3,%o3
+ ld [%o2+40],%o5
+ st %o3,[%o0+36]
+ ld [%o1+44],%g2 ! add 6 + 16r limbs
+ addxcc %o4,%o5,%o3
+ ld [%o2+44],%g3
+ st %o3,[%o0+40]
+ ld [%o1+48],%o4 ! add 5 + 16r limbs
+ addxcc %g2,%g3,%o3
+ ld [%o2+48],%o5
+ st %o3,[%o0+44]
+ ld [%o1+52],%g2 ! add 4 + 16r limbs
+ addxcc %o4,%o5,%o3
+ ld [%o2+52],%g3
+ st %o3,[%o0+48]
+ ld [%o1+56],%o4 ! add 3 + 16r limbs
+ addxcc %g2,%g3,%o3
+ ld [%o2+56],%o5
+ st %o3,[%o0+52]
+ ld [%o1+60],%g2 ! add 2 + 16r limbs
+ addxcc %o4,%o5,%o3
+ ld [%o2+60],%g3
+ st %o3,[%o0+56]
+ addx %g0,%g0,%o4
+ tst %g1
+ bne Loop
+ subcc %g0,%o4,%g0 ! restore cy (delay slot)
+
+ addxcc %g2,%g3,%o3
+ st %o3,[%o0+60] ! store most significant limb
+
+ retl
+ addx %g0,%g0,%o0 ! return carry-out from most sign. limb
diff --git a/sysdeps/sparc/addmul_1.S b/sysdeps/sparc/addmul_1.S
new file mode 100644
index 0000000..63e7db0
--- /dev/null
+++ b/sysdeps/sparc/addmul_1.S
@@ -0,0 +1,146 @@
+! SPARC __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+! the result to a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+#include "sysdep.h"
+
+.text
+ .align 4
+ .global C_SYMBOL_NAME(__mpn_addmul_1)
+C_SYMBOL_NAME(__mpn_addmul_1):
+ ! Make S1_PTR and RES_PTR point at the end of their blocks
+ ! and put (- 4 x SIZE) in index/loop counter.
+ sll %o2,2,%o2
+ add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval
+ add %o1,%o2,%o1
+ sub %g0,%o2,%o2
+
+ cmp %o3,0xfff
+ bgu Large
+ nop
+
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ b L0
+ add %o4,-4,%o4
+Loop0:
+ addcc %o5,%g1,%g1
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g1,[%o4+%o2]
+L0: wr %g0,%o3,%y
+ sra %o5,31,%g2
+ and %o3,%g2,%g2
+ andcc %g1,0,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,0,%g1
+ sra %g1,20,%g4
+ sll %g1,12,%g1
+ rd %y,%g3
+ srl %g3,20,%g3
+ or %g1,%g3,%g1
+
+ addcc %g1,%o0,%g1
+ addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 ! loop counter
+ bne Loop0
+ ld [%o4+%o2],%o5
+
+ addcc %o5,%g1,%g1
+ addx %o0,%g0,%o0
+ retl
+ st %g1,[%o4+%o2]
+
+
+Large: ld [%o1+%o2],%o5
+ mov 0,%o0
+ sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
+ b L1
+ add %o4,-4,%o4
+Loop:
+ addcc %o5,%g3,%g3
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g3,[%o4+%o2]
+L1: wr %g0,%o5,%y
+ and %o5,%g4,%g2
+ andcc %g0,%g0,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%g0,%g1
+ rd %y,%g3
+ addcc %g3,%o0,%g3
+ addx %g2,%g1,%o0
+ addcc %o2,4,%o2
+ bne Loop
+ ld [%o4+%o2],%o5
+
+ addcc %o5,%g3,%g3
+ addx %o0,%g0,%o0
+ retl
+ st %g3,[%o4+%o2]
diff --git a/sysdeps/sparc/alloca.S b/sysdeps/sparc/alloca.S
new file mode 100644
index 0000000..0a8718d
--- /dev/null
+++ b/sysdeps/sparc/alloca.S
@@ -0,0 +1,32 @@
+/* Copyright (C) 1994 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#include "DEFS.h"
+
+/* Code produced by Sun's C compiler calls this function with two extra
+ arguments which it makes relocatable symbols but seem always to be
+ the constant 96; I have no idea what they are for. */
+
+#ifndef NO_UNDERSCORES
+#define __builtin_alloca ___builtin_alloca
+#endif
+
+FUNC (__builtin_alloca)
+ sub %sp, %o0, %sp /* Push some stack space. */
+ retl /* Return; the returned buffer leaves 96 */
+ add %sp, 96, %o0 /* bytes of register save area at the top. */
diff --git a/sysdeps/sparc/bsd-_setjmp.S b/sysdeps/sparc/bsd-_setjmp.S
new file mode 100644
index 0000000..5b685d5
--- /dev/null
+++ b/sysdeps/sparc/bsd-_setjmp.S
@@ -0,0 +1,26 @@
+/* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'. Sparc version.
+Copyright (C) 1994 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#include <sysdep.h>
+
+ENTRY (setjmp)
+ sethi %hi(C_SYMBOL_NAME (__sigsetjmp)), %g1
+ or %lo(C_SYMBOL_NAME (__sigsetjmp)), %g1, %g1
+ jmp %g1
+ mov %g0, %o1 /* Pass second argument of zero. */
diff --git a/sysdeps/sparc/bsd-setjmp.S b/sysdeps/sparc/bsd-setjmp.S
new file mode 100644
index 0000000..b0a6326
--- /dev/null
+++ b/sysdeps/sparc/bsd-setjmp.S
@@ -0,0 +1,26 @@
+/* BSD `setjmp' entry point to `sigsetjmp (..., 1)'. Sparc version.
+Copyright (C) 1994 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#include <sysdep.h>
+
+ENTRY (setjmp)
+ sethi %hi(C_SYMBOL_NAME (__sigsetjmp)), %g1
+ or %lo(C_SYMBOL_NAME (__sigsetjmp)), %g1, %g1
+ jmp %g1
+ mov 1, %o1 /* Pass second argument of one. */
diff --git a/sysdeps/sparc/bytesex.h b/sysdeps/sparc/bytesex.h
new file mode 100644
index 0000000..f1a75c0
--- /dev/null
+++ b/sysdeps/sparc/bytesex.h
@@ -0,0 +1,3 @@
+/* SPARC is big-endian. */
+
+#define __BYTE_ORDER __BIG_ENDIAN
diff --git a/sysdeps/sparc/divrem.m4 b/sysdeps/sparc/divrem.m4
new file mode 100644
index 0000000..bde8a21
--- /dev/null
+++ b/sysdeps/sparc/divrem.m4
@@ -0,0 +1,234 @@
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ * NAME name of function to generate
+ * OP OP=div => %o0 / %o1; OP=rem => %o0 % %o1
+ * S S=true => signed; S=false => unsigned
+ *
+ * Algorithm parameters:
+ * N how many bits per iteration we try to get (4)
+ * WORDSIZE total number of bits (32)
+ *
+ * Derived constants:
+ * TOPBITS number of bits in the top `decade' of a number
+ *
+ * Important variables:
+ * Q the partial quotient under development (initially 0)
+ * R the remainder so far, initially the dividend
+ * ITER number of main division loop iterations required;
+ * equal to ceil(log2(quotient) / N). Note that this
+ * is the log base (2^N) of the quotient.
+ * V the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ * Current estimate for non-large dividend is
+ * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ */
+
+define(N, `4')dnl
+define(WORDSIZE, `32')dnl
+define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))dnl
+dnl
+define(dividend, `%o0')dnl
+define(divisor, `%o1')dnl
+define(Q, `%o2')dnl
+define(R, `%o3')dnl
+define(ITER, `%o4')dnl
+define(V, `%o5')dnl
+dnl
+dnl m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d
+define(T, `%g1')dnl
+define(SC, `%g7')dnl
+ifelse(S, `true', `define(SIGN, `%g6')')dnl
+
+dnl
+dnl This is the recursive definition for developing quotient digits.
+dnl
+dnl Parameters:
+dnl $1 the current depth, 1 <= $1 <= N
+dnl $2 the current accumulation of quotient bits
+dnl N max depth
+dnl
+dnl We add a new bit to $2 and either recurse or insert the bits in
+dnl the quotient. R, Q, and V are inputs and outputs as defined above;
+dnl the condition codes are expected to reflect the input R, and are
+dnl modified to reflect the output R.
+dnl
+define(DEVELOP_QUOTIENT_BITS,
+` ! depth $1, accumulated bits $2
+ bl L.$1.eval(2**N+$2)
+ srl V,1,V
+ ! remainder is positive
+ subcc R,V,R
+ ifelse($1, N,
+ ` b 9f
+ add Q, ($2*2+1), Q
+ ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
+L.$1.eval(2**N+$2):
+ ! remainder is negative
+ addcc R,V,R
+ ifelse($1, N,
+ ` b 9f
+ add Q, ($2*2-1), Q
+ ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
+ ifelse($1, 1, `9:')')dnl
+
+#include "DEFS.h"
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include <machine/trap.h>
+#endif
+
+FUNC(NAME)
+ifelse(S, `true',
+` ! compute sign of result; if neither is negative, no problem
+ orcc divisor, dividend, %g0 ! either negative?
+ bge 2f ! no, go do the divide
+ifelse(OP, `div',
+` xor divisor, dividend, SIGN ! compute sign in any case',
+` mov dividend, SIGN ! sign of remainder matches dividend')
+ tst divisor
+ bge 1f
+ tst dividend
+ ! divisor is definitely negative; dividend might also be negative
+ bge 2f ! if dividend not negative...
+ sub %g0, divisor, divisor ! in any case, make divisor nonneg
+1: ! dividend is negative, divisor is nonnegative
+ sub %g0, dividend, dividend ! make dividend nonnegative
+2:
+')
+ ! Ready to divide. Compute size of quotient; scale comparand.
+ orcc divisor, %g0, V
+ bne 1f
+ mov dividend, R
+
+ ! Divide by zero trap. If it returns, return 0 (about as
+ ! wrong as possible, but that is what SunOS does...).
+ ta ST_DIV0
+ retl
+ clr %o0
+
+1:
+ cmp R, V ! if divisor exceeds dividend, done
+ blu Lgot_result ! (and algorithm fails otherwise)
+ clr Q
+ sethi %hi(1 << (WORDSIZE - TOPBITS - 1)), T
+ cmp R, T
+ blu Lnot_really_big
+ clr ITER
+
+ ! `Here the dividend is >= 2**(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to shift V into
+ ! the top decade: so do not even bother to compare to R.'
+ 1:
+ cmp V, T
+ bgeu 3f
+ mov 1, SC
+ sll V, N, V
+ b 1b
+ add ITER, 1, ITER
+
+ ! Now compute SC.
+ 2: addcc V, V, V
+ bcc Lnot_too_big
+ add SC, 1, SC
+
+ ! We get here if the divisor overflowed while shifting.
+ ! This means that R has the high-order bit set.
+ ! Restore V and subtract from R.
+ sll T, TOPBITS, T ! high order bit
+ srl V, 1, V ! rest of V
+ add V, T, V
+ b Ldo_single_div
+ sub SC, 1, SC
+
+ Lnot_too_big:
+ 3: cmp V, R
+ blu 2b
+ nop
+ be Ldo_single_div
+ nop
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ ! V > R: went too far: back up 1 step
+ ! srl V, 1, V
+ ! dec SC
+ ! do single-bit divide steps
+ !
+ ! We have to be careful here. We know that R >= V, so we can do the
+ ! first divide step without thinking. BUT, the others are conditional,
+ ! and are only done if R >= 0. Because both R and V may have the high-
+ ! order bit set in the first step, just falling into the regular
+ ! division loop will mess up the first time around.
+ ! So we unroll slightly...
+ Ldo_single_div:
+ subcc SC, 1, SC
+ bl Lend_regular_divide
+ nop
+ sub R, V, R
+ mov 1, Q
+ b Lend_single_divloop
+ nop
+ Lsingle_divloop:
+ sll Q, 1, Q
+ bl 1f
+ srl V, 1, V
+ ! R >= 0
+ sub R, V, R
+ b 2f
+ add Q, 1, Q
+ 1: ! R < 0
+ add R, V, R
+ sub Q, 1, Q
+ 2:
+ Lend_single_divloop:
+ subcc SC, 1, SC
+ bge Lsingle_divloop
+ tst R
+ b,a Lend_regular_divide
+
+Lnot_really_big:
+1:
+ sll V, N, V
+ cmp V, R
+ bleu 1b
+ addcc ITER, 1, ITER
+ be Lgot_result
+ sub ITER, 1, ITER
+
+ tst R ! set up for initial iteration
+Ldivloop:
+ sll Q, N, Q
+ DEVELOP_QUOTIENT_BITS(1, 0)
+Lend_regular_divide:
+ subcc ITER, 1, ITER
+ bge Ldivloop
+ tst R
+ bl,a Lgot_result
+ ! non-restoring fixup here (one instruction only!)
+ifelse(OP, `div',
+` sub Q, 1, Q
+', ` add R, divisor, R
+')
+
+Lgot_result:
+ifelse(S, `true',
+` ! check to see if answer should be < 0
+ tst SIGN
+ bl,a 1f
+ ifelse(OP, `div', `sub %g0, Q, Q', `sub %g0, R, R')
+1:')
+ retl
+ ifelse(OP, `div', `mov Q, %o0', `mov R, %o0')
diff --git a/sysdeps/sparc/jmp_buf.h b/sysdeps/sparc/jmp_buf.h
new file mode 100644
index 0000000..a5a592f
--- /dev/null
+++ b/sysdeps/sparc/jmp_buf.h
@@ -0,0 +1,14 @@
+/* Define the machine-dependent type `jmp_buf'. SPARC version. */
+
+/* NOTE: The assembly code in __longjmp.S and setjmp.S knows the layout
+ of this structure. You must hack the assembly code if you want to change
+ the order of the members. */
+
+typedef struct
+ {
+ /* Return PC (register o7). */
+ __ptr_t __pc;
+
+ /* Saved FP. */
+ __ptr_t __fp;
+ } __jmp_buf[1];
diff --git a/sysdeps/sparc/memcopy.h b/sysdeps/sparc/memcopy.h
new file mode 100644
index 0000000..3fd4f3d
--- /dev/null
+++ b/sysdeps/sparc/memcopy.h
@@ -0,0 +1,21 @@
+/* Copyright (C) 1991 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#include <sysdeps/generic/memcopy.h>
+#undef reg_char
+#define reg_char int
diff --git a/sysdeps/sparc/mul_1.S b/sysdeps/sparc/mul_1.S
new file mode 100644
index 0000000..84aa12b
--- /dev/null
+++ b/sysdeps/sparc/mul_1.S
@@ -0,0 +1,198 @@
+! SPARC __mpn_mul_1 -- Multiply a limb vector with a limb and store
+! the result in a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+! ADD CODE FOR SMALL MULTIPLIERS!
+!1: ld
+! st
+!
+!2: ld ,a
+! addxcc a,a,x
+! st x,
+!
+!3_unrolled:
+! ld ,a
+! addxcc a,a,x1 ! 2a + cy
+! addx %g0,%g0,x2
+! addcc a,x1,x ! 3a + c
+! st x,
+!
+! ld ,a
+! addxcc a,a,y1
+! addx %g0,%g0,y2
+! addcc a,y1,x
+! st x,
+!
+!4_unrolled:
+! ld ,a
+! srl a,2,x1 ! 4a
+! addxcc y2,x1,x
+! sll a,30,x2
+! st x,
+!
+! ld ,a
+! srl a,2,y1
+! addxcc x2,y1,y
+! sll a,30,y2
+! st x,
+!
+!5_unrolled:
+! ld ,a
+! srl a,2,x1 ! 4a
+! addxcc a,x1,x ! 5a + c
+! sll a,30,x2
+! addx %g0,x2,x2
+! st x,
+!
+! ld ,a
+! srl a,2,y1
+! addxcc a,y1,x
+! sll a,30,y2
+! addx %g0,y2,y2
+! st x,
+!
+!8_unrolled:
+! ld ,a
+! srl a,3,x1 ! 8a
+! addxcc y2,x1,x
+! sll a,29,x2
+! st x,
+!
+! ld ,a
+! srl a,3,y1
+! addxcc x2,y1,y
+! sll a,29,y2
+! st x,
+
+#include "sysdep.h"
+
+.text
+ .align 4
+ .global C_SYMBOL_NAME(__mpn_mul_1)
+C_SYMBOL_NAME(__mpn_mul_1):
+ ! Make S1_PTR and RES_PTR point at the end of their blocks
+ ! and put (- 4 x SIZE) in index/loop counter.
+ sll %o2,2,%o2
+ add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval
+ add %o1,%o2,%o1
+ sub %g0,%o2,%o2
+
+ cmp %o3,0xfff
+ bgu Large
+ nop
+
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ b L0
+ add %o4,-4,%o4
+Loop0:
+ st %g1,[%o4+%o2]
+L0: wr %g0,%o3,%y
+ sra %o5,31,%g2
+ and %o3,%g2,%g2
+ andcc %g1,0,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,0,%g1
+ sra %g1,20,%g4
+ sll %g1,12,%g1
+ rd %y,%g3
+ srl %g3,20,%g3
+ or %g1,%g3,%g1
+
+ addcc %g1,%o0,%g1
+ addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 ! loop counter
+ bne,a Loop0
+ ld [%o1+%o2],%o5
+
+ retl
+ st %g1,[%o4+%o2]
+
+
+Large: ld [%o1+%o2],%o5
+ mov 0,%o0
+ sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
+ b L1
+ add %o4,-4,%o4
+Loop:
+ st %g3,[%o4+%o2]
+L1: wr %g0,%o5,%y
+ and %o5,%g4,%g2 ! g2 = S1_LIMB iff S2_LIMB < 0, else 0
+ andcc %g0,%g0,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%g0,%g1
+ rd %y,%g3
+ addcc %g3,%o0,%g3
+ addx %g2,%g1,%o0 ! add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 ! loop counter
+ bne,a Loop
+ ld [%o1+%o2],%o5
+
+ retl
+ st %g3,[%o4+%o2]
diff --git a/sysdeps/sparc/rem.S b/sysdeps/sparc/rem.S
new file mode 100644
index 0000000..17662f7
--- /dev/null
+++ b/sysdeps/sparc/rem.S
@@ -0,0 +1,365 @@
+ /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ * .rem name of function to generate
+ * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
+ * true true=true => signed; true=false => unsigned
+ *
+ * Algorithm parameters:
+ * N how many bits per iteration we try to get (4)
+ * WORDSIZE total number of bits (32)
+ *
+ * Derived constants:
+ * TOPBITS number of bits in the top decade of a number
+ *
+ * Important variables:
+ * Q the partial quotient under development (initially 0)
+ * R the remainder so far, initially the dividend
+ * ITER number of main division loop iterations required;
+ * equal to ceil(log2(quotient) / N). Note that this
+ * is the log base (2^N) of the quotient.
+ * V the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ * Current estimate for non-large dividend is
+ * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ */
+
+
+
+#include "DEFS.h"
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include <machine/trap.h>
+#endif
+
+FUNC(.rem)
+ ! compute sign of result; if neither is negative, no problem
+ orcc %o1, %o0, %g0 ! either negative?
+ bge 2f ! no, go do the divide
+ mov %o0, %g6 ! sign of remainder matches %o0
+ tst %o1
+ bge 1f
+ tst %o0
+ ! %o1 is definitely negative; %o0 might also be negative
+ bge 2f ! if %o0 not negative...
+ sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
+1: ! %o0 is negative, %o1 is nonnegative
+ sub %g0, %o0, %o0 ! make %o0 nonnegative
+2:
+
+ ! Ready to divide. Compute size of quotient; scale comparand.
+ orcc %o1, %g0, %o5
+ bne 1f
+ mov %o0, %o3
+
+ ! Divide by zero trap. If it returns, return 0 (about as
+ ! wrong as possible, but that is what SunOS does...).
+ ta ST_DIV0
+ retl
+ clr %o0
+
+1:
+ cmp %o3, %o5 ! if %o1 exceeds %o0, done
+ blu Lgot_result ! (and algorithm fails otherwise)
+ clr %o2
+ sethi %hi(1 << (32 - 4 - 1)), %g1
+ cmp %o3, %g1
+ blu Lnot_really_big
+ clr %o4
+
+ ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to shift V into
+ ! the top decade: so do not even bother to compare to R.
+ 1:
+ cmp %o5, %g1
+ bgeu 3f
+ mov 1, %g7
+ sll %o5, 4, %o5
+ b 1b
+ add %o4, 1, %o4
+
+ ! Now compute %g7.
+ 2: addcc %o5, %o5, %o5
+ bcc Lnot_too_big
+ add %g7, 1, %g7
+
+ ! We get here if the %o1 overflowed while shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1, 4, %g1 ! high order bit
+ srl %o5, 1, %o5 ! rest of %o5
+ add %o5, %g1, %o5
+ b Ldo_single_div
+ sub %g7, 1, %g7
+
+ Lnot_too_big:
+ 3: cmp %o5, %o3
+ blu 2b
+ nop
+ be Ldo_single_div
+ nop
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ ! %o5 > %o3: went too far: back up 1 step
+ ! srl %o5, 1, %o5
+ ! dec %g7
+ ! do single-bit divide steps
+ !
+ ! We have to be careful here. We know that %o3 >= %o5, so we can do the
+ ! first divide step without thinking. BUT, the others are conditional,
+ ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+ ! order bit set in the first step, just falling into the regular
+ ! division loop will mess up the first time around.
+ ! So we unroll slightly...
+ Ldo_single_div:
+ subcc %g7, 1, %g7
+ bl Lend_regular_divide
+ nop
+ sub %o3, %o5, %o3
+ mov 1, %o2
+ b Lend_single_divloop
+ nop
+ Lsingle_divloop:
+ sll %o2, 1, %o2
+ bl 1f
+ srl %o5, 1, %o5
+ ! %o3 >= 0
+ sub %o3, %o5, %o3
+ b 2f
+ add %o2, 1, %o2
+ 1: ! %o3 < 0
+ add %o3, %o5, %o3
+ sub %o2, 1, %o2
+ 2:
+ Lend_single_divloop:
+ subcc %g7, 1, %g7
+ bge Lsingle_divloop
+ tst %o3
+ b,a Lend_regular_divide
+
+Lnot_really_big:
+1:
+ sll %o5, 4, %o5
+ cmp %o5, %o3
+ bleu 1b
+ addcc %o4, 1, %o4
+ be Lgot_result
+ sub %o4, 1, %o4
+
+ tst %o3 ! set up for initial iteration
+Ldivloop:
+ sll %o2, 4, %o2
+ ! depth 1, accumulated bits 0
+ bl L.1.16
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 2, accumulated bits 1
+ bl L.2.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 3
+ bl L.3.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 7
+ bl L.4.23
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+
+L.4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+
+
+L.3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 5
+ bl L.4.21
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+
+L.4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+
+
+
+L.2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 1
+ bl L.3.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 3
+ bl L.4.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+
+L.4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+
+
+L.3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 1
+ bl L.4.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+
+L.4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+
+
+
+
+L.1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 2, accumulated bits -1
+ bl L.2.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -1
+ bl L.3.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -1
+ bl L.4.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+
+L.4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+
+
+L.3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -3
+ bl L.4.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+
+L.4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+
+
+
+L.2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -3
+ bl L.3.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -5
+ bl L.4.11
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+
+L.4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+
+
+L.3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -7
+ bl L.4.9
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+
+L.4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+
+
+
+
+ 9:
+Lend_regular_divide:
+ subcc %o4, 1, %o4
+ bge Ldivloop
+ tst %o3
+ bl,a Lgot_result
+ ! non-restoring fixup here (one instruction only!)
+ add %o3, %o1, %o3
+
+
+Lgot_result:
+ ! check to see if answer should be < 0
+ tst %g6
+ bl,a 1f
+ sub %g0, %o3, %o3
+1:
+ retl
+ mov %o3, %o0
diff --git a/sysdeps/sparc/sdiv.S b/sysdeps/sparc/sdiv.S
new file mode 100644
index 0000000..dadbb36
--- /dev/null
+++ b/sysdeps/sparc/sdiv.S
@@ -0,0 +1,365 @@
+ /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ * .div name of function to generate
+ * div div=div => %o0 / %o1; div=rem => %o0 % %o1
+ * true true=true => signed; true=false => unsigned
+ *
+ * Algorithm parameters:
+ * N how many bits per iteration we try to get (4)
+ * WORDSIZE total number of bits (32)
+ *
+ * Derived constants:
+ * TOPBITS number of bits in the top decade of a number
+ *
+ * Important variables:
+ * Q the partial quotient under development (initially 0)
+ * R the remainder so far, initially the dividend
+ * ITER number of main division loop iterations required;
+ * equal to ceil(log2(quotient) / N). Note that this
+ * is the log base (2^N) of the quotient.
+ * V the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ * Current estimate for non-large dividend is
+ * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ */
+
+
+
+#include "DEFS.h"
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include <machine/trap.h>
+#endif
+
+FUNC(.div)
+ ! compute sign of result; if neither is negative, no problem
+ orcc %o1, %o0, %g0 ! either negative?
+ bge 2f ! no, go do the divide
+ xor %o1, %o0, %g6 ! compute sign in any case
+ tst %o1
+ bge 1f
+ tst %o0
+ ! %o1 is definitely negative; %o0 might also be negative
+ bge 2f ! if %o0 not negative...
+ sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
+1: ! %o0 is negative, %o1 is nonnegative
+ sub %g0, %o0, %o0 ! make %o0 nonnegative
+2:
+
+ ! Ready to divide. Compute size of quotient; scale comparand.
+ orcc %o1, %g0, %o5
+ bne 1f
+ mov %o0, %o3
+
+ ! Divide by zero trap. If it returns, return 0 (about as
+ ! wrong as possible, but that is what SunOS does...).
+ ta ST_DIV0
+ retl
+ clr %o0
+
+1:
+ cmp %o3, %o5 ! if %o1 exceeds %o0, done
+ blu Lgot_result ! (and algorithm fails otherwise)
+ clr %o2
+ sethi %hi(1 << (32 - 4 - 1)), %g1
+ cmp %o3, %g1
+ blu Lnot_really_big
+ clr %o4
+
+ ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to shift V into
+ ! the top decade: so do not even bother to compare to R.
+ 1:
+ cmp %o5, %g1
+ bgeu 3f
+ mov 1, %g7
+ sll %o5, 4, %o5
+ b 1b
+ add %o4, 1, %o4
+
+ ! Now compute %g7.
+ 2: addcc %o5, %o5, %o5
+ bcc Lnot_too_big
+ add %g7, 1, %g7
+
+ ! We get here if the %o1 overflowed while shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1, 4, %g1 ! high order bit
+ srl %o5, 1, %o5 ! rest of %o5
+ add %o5, %g1, %o5
+ b Ldo_single_div
+ sub %g7, 1, %g7
+
+ Lnot_too_big:
+ 3: cmp %o5, %o3
+ blu 2b
+ nop
+ be Ldo_single_div
+ nop
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ ! %o5 > %o3: went too far: back up 1 step
+ ! srl %o5, 1, %o5
+ ! dec %g7
+ ! do single-bit divide steps
+ !
+ ! We have to be careful here. We know that %o3 >= %o5, so we can do the
+ ! first divide step without thinking. BUT, the others are conditional,
+ ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+ ! order bit set in the first step, just falling into the regular
+ ! division loop will mess up the first time around.
+ ! So we unroll slightly...
+ Ldo_single_div:
+ subcc %g7, 1, %g7
+ bl Lend_regular_divide
+ nop
+ sub %o3, %o5, %o3
+ mov 1, %o2
+ b Lend_single_divloop
+ nop
+ Lsingle_divloop:
+ sll %o2, 1, %o2
+ bl 1f
+ srl %o5, 1, %o5
+ ! %o3 >= 0
+ sub %o3, %o5, %o3
+ b 2f
+ add %o2, 1, %o2
+ 1: ! %o3 < 0
+ add %o3, %o5, %o3
+ sub %o2, 1, %o2
+ 2:
+ Lend_single_divloop:
+ subcc %g7, 1, %g7
+ bge Lsingle_divloop
+ tst %o3
+ b,a Lend_regular_divide
+
+Lnot_really_big:
+1:
+ sll %o5, 4, %o5
+ cmp %o5, %o3
+ bleu 1b
+ addcc %o4, 1, %o4
+ be Lgot_result
+ sub %o4, 1, %o4
+
+ tst %o3 ! set up for initial iteration
+Ldivloop:
+ sll %o2, 4, %o2
+ ! depth 1, accumulated bits 0
+ bl L.1.16
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 2, accumulated bits 1
+ bl L.2.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 3
+ bl L.3.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 7
+ bl L.4.23
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+
+L.4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+
+
+L.3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 5
+ bl L.4.21
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+
+L.4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+
+
+
+L.2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 1
+ bl L.3.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 3
+ bl L.4.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+
+L.4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+
+
+L.3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 1
+ bl L.4.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+
+L.4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+
+
+
+
+L.1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 2, accumulated bits -1
+ bl L.2.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -1
+ bl L.3.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -1
+ bl L.4.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+
+L.4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+
+
+L.3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -3
+ bl L.4.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+
+L.4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+
+
+
+L.2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -3
+ bl L.3.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -5
+ bl L.4.11
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+
+L.4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+
+
+L.3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -7
+ bl L.4.9
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+
+L.4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+
+
+
+
+ 9:
+Lend_regular_divide:
+ subcc %o4, 1, %o4
+ bge Ldivloop
+ tst %o3
+ bl,a Lgot_result
+ ! non-restoring fixup here (one instruction only!)
+ sub %o2, 1, %o2
+
+
+Lgot_result:
+ ! check to see if answer should be < 0
+ tst %g6
+ bl,a 1f
+ sub %g0, %o2, %o2
+1:
+ retl
+ mov %o2, %o0
diff --git a/sysdeps/sparc/setjmp.S b/sysdeps/sparc/setjmp.S
new file mode 100644
index 0000000..3c9c18d
--- /dev/null
+++ b/sysdeps/sparc/setjmp.S
@@ -0,0 +1,31 @@
+/* Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#include <sysdep.h>
+
+/* NOTE: This code depends on the definition of `__jmp_buf' in <jmp_buf.h>. */
+
+ENTRY (__sigsetjmp)
+ /* Save our return PC and SP (second store in the jmp delay slot). */
+ st %o7, [%o0]
+ /* Save the signal mask if requested. We do this as a tail-call
+ for simplicity; it always returns zero. */
+ sethi %hi(C_SYMBOL_NAME (__sigjmp_save)), %g1
+ or %lo(C_SYMBOL_NAME (__sigjmp_save)), %g1, %g1
+ jmp %g1
+ st %sp, [%o0 + 4]
diff --git a/sysdeps/sparc/sparc8/addmul_1.S b/sysdeps/sparc/sparc8/addmul_1.S
new file mode 100644
index 0000000..fbaacfd
--- /dev/null
+++ b/sysdeps/sparc/sparc8/addmul_1.S
@@ -0,0 +1,116 @@
+! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and
+! add the result to a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+#include "sysdep.h"
+
+.text
+ .align 4
+ .global C_SYMBOL_NAME(__mpn_addmul_1)
+C_SYMBOL_NAME(__mpn_addmul_1):
+ orcc %g0,%g0,%g2
+ ld [%o1+0],%o4 ! 1
+
+ sll %o2,4,%g1
+ and %g1,(4-1)<<4,%g1
+ sethi %hi(LL),%g3
+ or %g3,%lo(LL),%g3
+ jmp %g3+%g1
+ nop
+LL:
+LL00: add %o0,-4,%o0
+ b Loop00 /* 4, 8, 12, ... */
+ add %o1,-4,%o1
+ nop
+LL01: b Loop01 /* 1, 5, 9, ... */
+ nop
+ nop
+ nop
+LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */
+ b Loop10
+ add %o1,4,%o1
+ nop
+LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */
+ b Loop11
+ add %o1,-8,%o1
+ nop
+
+1: addcc %g3,%g2,%g3 ! 1
+ ld [%o1+4],%o4 ! 2
+ rd %y,%g2 ! 1
+ addx %g0,%g2,%g2
+ ld [%o0+0],%g1 ! 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+0] ! 1
+Loop00: umul %o4,%o3,%g3 ! 2
+ ld [%o0+4],%g1 ! 2
+ addxcc %g3,%g2,%g3 ! 2
+ ld [%o1+8],%o4 ! 3
+ rd %y,%g2 ! 2
+ addx %g0,%g2,%g2
+ nop
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+4] ! 2
+Loop11: umul %o4,%o3,%g3 ! 3
+ addxcc %g3,%g2,%g3 ! 3
+ ld [%o1+12],%o4 ! 4
+ rd %y,%g2 ! 3
+ add %o1,16,%o1
+ addx %g0,%g2,%g2
+ ld [%o0+8],%g1 ! 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+8] ! 3
+Loop10: umul %o4,%o3,%g3 ! 4
+ addxcc %g3,%g2,%g3 ! 4
+ ld [%o1+0],%o4 ! 1
+ rd %y,%g2 ! 4
+ addx %g0,%g2,%g2
+ ld [%o0+12],%g1 ! 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+12] ! 4
+ add %o0,16,%o0
+ addx %g0,%g2,%g2
+Loop01: addcc %o2,-4,%o2
+ bg 1b
+ umul %o4,%o3,%g3 ! 1
+
+ addcc %g3,%g2,%g3 ! 4
+ rd %y,%g2 ! 4
+ addx %g0,%g2,%g2
+ ld [%o0+0],%g1 ! 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+0] ! 4
+ addx %g0,%g2,%o0
+
+ retl
+ nop
+
+
+! umul, ld, addxcc, rd, st
+
+! umul, ld, addxcc, rd, ld, addcc, st, addx
+
diff --git a/sysdeps/sparc/sparc8/mul_1.S b/sysdeps/sparc/sparc8/mul_1.S
new file mode 100644
index 0000000..9c21768
--- /dev/null
+++ b/sysdeps/sparc/sparc8/mul_1.S
@@ -0,0 +1,91 @@
+! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and
+! store the product in a second limb vector.
+
+! Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+#include "sysdep.h"
+
+.text
+ .align 8
+ .global C_SYMBOL_NAME(__mpn_mul_1)
+C_SYMBOL_NAME(__mpn_mul_1):
+ sll %o2,4,%g1
+ and %g1,(4-1)<<4,%g1
+ sethi %hi(LL),%g3
+ or %g3,%lo(LL),%g3
+ jmp %g3+%g1
+ ld [%o1+0],%o4 ! 1
+LL:
+LL00: add %o0,-4,%o0
+ add %o1,-4,%o1
+ b Loop00 /* 4, 8, 12, ... */
+ orcc %g0,%g0,%g2
+LL01: b Loop01 /* 1, 5, 9, ... */
+ orcc %g0,%g0,%g2
+ nop
+ nop
+LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */
+ add %o1,4,%o1
+ b Loop10
+ orcc %g0,%g0,%g2
+ nop
+LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */
+ add %o1,-8,%o1
+ b Loop11
+ orcc %g0,%g0,%g2
+
+Loop: addcc %g3,%g2,%g3 ! 1
+ ld [%o1+4],%o4 ! 2
+ st %g3,[%o0+0] ! 1
+ rd %y,%g2 ! 1
+Loop00: umul %o4,%o3,%g3 ! 2
+ addxcc %g3,%g2,%g3 ! 2
+ ld [%o1+8],%o4 ! 3
+ st %g3,[%o0+4] ! 2
+ rd %y,%g2 ! 2
+Loop11: umul %o4,%o3,%g3 ! 3
+ addxcc %g3,%g2,%g3 ! 3
+ ld [%o1+12],%o4 ! 4
+ add %o1,16,%o1
+ st %g3,[%o0+8] ! 3
+ rd %y,%g2 ! 3
+Loop10: umul %o4,%o3,%g3 ! 4
+ addxcc %g3,%g2,%g3 ! 4
+ ld [%o1+0],%o4 ! 1
+ st %g3,[%o0+12] ! 4
+ add %o0,16,%o0
+ rd %y,%g2 ! 4
+ addx %g0,%g2,%g2
+Loop01: addcc %o2,-4,%o2
+ bg Loop
+ umul %o4,%o3,%g3 ! 1
+
+ addcc %g3,%g2,%g3 ! 4
+ st %g3,[%o0+0] ! 4
+ rd %y,%g2 ! 4
+
+ retl
+ addx %g0,%g2,%o0
diff --git a/sysdeps/sparc/sparc8/submul_1.S b/sysdeps/sparc/sparc8/submul_1.S
new file mode 100644
index 0000000..5c4d688
--- /dev/null
+++ b/sysdeps/sparc/sparc8/submul_1.S
@@ -0,0 +1,57 @@
+! SPARC v8 __mpn_submul_1 -- Multiply a limb vector with a limb and
+! subtract the result from a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+#include "sysdep.h"
+
+.text
+ .align 4
+ .global C_SYMBOL_NAME(__mpn_submul_1)
+C_SYMBOL_NAME(__mpn_submul_1):
+ sub %g0,%o2,%o2 ! negate ...
+ sll %o2,2,%o2 ! ... and scale size
+ sub %o1,%o2,%o1 ! o1 is offset s1_ptr
+ sub %o0,%o2,%g1 ! g1 is offset res_ptr
+
+ mov 0,%o0 ! clear cy_limb
+
+Loop: ld [%o1+%o2],%o4
+ ld [%g1+%o2],%g2
+ umul %o4,%o3,%o5
+ rd %y,%g3
+ addcc %o5,%o0,%o5
+ addx %g3,0,%o0
+ subcc %g2,%o5,%g2
+ addx %o0,0,%o0
+ st %g2,[%g1+%o2]
+
+ addcc %o2,4,%o2
+ bne Loop
+ nop
+
+ retl
+ nop
diff --git a/sysdeps/sparc/sparc8/udiv_qrnnd.S b/sysdeps/sparc/sparc8/udiv_qrnnd.S
new file mode 100644
index 0000000..49c2398
--- /dev/null
+++ b/sysdeps/sparc/sparc8/udiv_qrnnd.S
@@ -0,0 +1,186 @@
+! SPARC __udiv_qrnnd division support, used from longlong.h.
+
+! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! rem_ptr o0
+! n1 o1
+! n0 o2
+! d o3
+
+#include "sysdep.h"
+
+ .text
+ .align 4
+ .global C_SYMBOL_NAME(__udiv_qrnnd)
+C_SYMBOL_NAME(__udiv_qrnnd):
+ tst %o3
+ bneg Largedivisor
+ mov 8,%g1
+
+ b Lp1
+ addxcc %o2,%o2,%o2
+
+Lplop: bcc Ln1
+ addxcc %o2,%o2,%o2
+Lp1: addx %o1,%o1,%o1
+ subcc %o1,%o3,%o4
+ bcc Ln2
+ addxcc %o2,%o2,%o2
+Lp2: addx %o1,%o1,%o1
+ subcc %o1,%o3,%o4
+ bcc Ln3
+ addxcc %o2,%o2,%o2
+Lp3: addx %o1,%o1,%o1
+ subcc %o1,%o3,%o4
+ bcc Ln4
+ addxcc %o2,%o2,%o2
+Lp4: addx %o1,%o1,%o1
+ addcc %g1,-1,%g1
+ bne Lplop
+ subcc %o1,%o3,%o4
+ bcc Ln5
+ addxcc %o2,%o2,%o2
+Lp5: st %o1,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+Lnlop: bcc Lp1
+ addxcc %o2,%o2,%o2
+Ln1: addx %o4,%o4,%o4
+ subcc %o4,%o3,%o1
+ bcc Lp2
+ addxcc %o2,%o2,%o2
+Ln2: addx %o4,%o4,%o4
+ subcc %o4,%o3,%o1
+ bcc Lp3
+ addxcc %o2,%o2,%o2
+Ln3: addx %o4,%o4,%o4
+ subcc %o4,%o3,%o1
+ bcc Lp4
+ addxcc %o2,%o2,%o2
+Ln4: addx %o4,%o4,%o4
+ addcc %g1,-1,%g1
+ bne Lnlop
+ subcc %o4,%o3,%o1
+ bcc Lp5
+ addxcc %o2,%o2,%o2
+Ln5: st %o4,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+Largedivisor:
+ and %o2,1,%o5 ! %o5 = n0 & 1
+
+ srl %o2,1,%o2
+ sll %o1,31,%g2
+ or %g2,%o2,%o2 ! %o2 = lo(n1n0 >> 1)
+ srl %o1,1,%o1 ! %o1 = hi(n1n0 >> 1)
+
+ and %o3,1,%g2
+ srl %o3,1,%g3 ! %g3 = floor(d / 2)
+ add %g3,%g2,%g3 ! %g3 = ceil(d / 2)
+
+ b LLp1
+ addxcc %o2,%o2,%o2
+
+LLplop: bcc LLn1
+ addxcc %o2,%o2,%o2
+LLp1: addx %o1,%o1,%o1
+ subcc %o1,%g3,%o4
+ bcc LLn2
+ addxcc %o2,%o2,%o2
+LLp2: addx %o1,%o1,%o1
+ subcc %o1,%g3,%o4
+ bcc LLn3
+ addxcc %o2,%o2,%o2
+LLp3: addx %o1,%o1,%o1
+ subcc %o1,%g3,%o4
+ bcc LLn4
+ addxcc %o2,%o2,%o2
+LLp4: addx %o1,%o1,%o1
+ addcc %g1,-1,%g1
+ bne LLplop
+ subcc %o1,%g3,%o4
+ bcc LLn5
+ addxcc %o2,%o2,%o2
+LLp5: add %o1,%o1,%o1 ! << 1
+ tst %g2
+ bne Oddp
+ add %o5,%o1,%o1
+ st %o1,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+LLnlop: bcc LLp1
+ addxcc %o2,%o2,%o2
+LLn1: addx %o4,%o4,%o4
+ subcc %o4,%g3,%o1
+ bcc LLp2
+ addxcc %o2,%o2,%o2
+LLn2: addx %o4,%o4,%o4
+ subcc %o4,%g3,%o1
+ bcc LLp3
+ addxcc %o2,%o2,%o2
+LLn3: addx %o4,%o4,%o4
+ subcc %o4,%g3,%o1
+ bcc LLp4
+ addxcc %o2,%o2,%o2
+LLn4: addx %o4,%o4,%o4
+ addcc %g1,-1,%g1
+ bne LLnlop
+ subcc %o4,%g3,%o1
+ bcc LLp5
+ addxcc %o2,%o2,%o2
+LLn5: add %o4,%o4,%o4 ! << 1
+ tst %g2
+ bne Oddn
+ add %o5,%o4,%o4
+ st %o4,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+Oddp: xnor %g0,%o2,%o2
+ ! q' in %o2. r' in %o1
+ addcc %o1,%o2,%o1
+ bcc LLp6
+ addx %o2,0,%o2
+ sub %o1,%o3,%o1
+LLp6: subcc %o1,%o3,%g0
+ bcs LLp7
+ subx %o2,-1,%o2
+ sub %o1,%o3,%o1
+LLp7: st %o1,[%o0]
+ retl
+ mov %o2,%o0
+
+Oddn: xnor %g0,%o2,%o2
+ ! q' in %o2. r' in %o4
+ addcc %o4,%o2,%o4
+ bcc LLn6
+ addx %o2,0,%o2
+ sub %o4,%o3,%o4
+LLn6: subcc %o4,%o3,%g0
+ bcs LLn7
+ subx %o2,-1,%o2
+ sub %o4,%o3,%o4
+LLn7: st %o4,[%o0]
+ retl
+ mov %o2,%o0
diff --git a/sysdeps/sparc/sqrt.c b/sysdeps/sparc/sqrt.c
new file mode 100644
index 0000000..bff46e1
--- /dev/null
+++ b/sysdeps/sparc/sqrt.c
@@ -0,0 +1,34 @@
+/* Copyright (C) 1991, 1992 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#include <ansidecl.h>
+#include <errno.h>
+#include <math.h>
+
+#ifndef __GNUC__
+ #error This file uses GNU C extensions; you must compile with GCC.
+#endif
+
+/* Return the square root of X. */
+double
+DEFUN(sqrt, (x), double x)
+{
+ register double result;
+ asm("fsqrtd %1, %0" : "=f" (result) : "f" (x));
+ return result;
+}
diff --git a/sysdeps/sparc/sub_n.S b/sysdeps/sparc/sub_n.S
new file mode 100644
index 0000000..7a167b2
--- /dev/null
+++ b/sysdeps/sparc/sub_n.S
@@ -0,0 +1,134 @@
+! sparc __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+! store difference in a third limb vector.
+
+! Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! s1_ptr %o1
+! s2_ptr %o2
+! size %o3
+
+#include "sysdep.h"
+
+ .text
+ .align 4
+ .global C_SYMBOL_NAME(__mpn_sub_n)
+C_SYMBOL_NAME(__mpn_sub_n):
+ ld [%o1+0],%o4 ! read first limb from s1_ptr
+ srl %o3,4,%g1
+ ld [%o2+0],%o5 ! read first limb from s2_ptr
+
+ sub %g0,%o3,%o3
+ andcc %o3,(16-1),%o3
+ be Lzero
+ nop
+
+ sll %o3,2,%o3 ! multiply by 4
+ sub %o0,%o3,%o0 ! adjust res_ptr
+ sub %o1,%o3,%o1 ! adjust s1_ptr
+ sub %o2,%o3,%o2 ! adjust s2_ptr
+
+ mov %o4,%g2
+
+ sethi %hi(Lbase),%g3
+ or %g3,%lo(Lbase),%g3
+ sll %o3,2,%o3 ! multiply by 4
+ jmp %g3+%o3
+ mov %o5,%g3
+
+Loop: subxcc %g2,%g3,%o3
+ add %o1,64,%o1
+ st %o3,[%o0+60]
+ add %o2,64,%o2
+ ld [%o1+0],%o4
+ add %o0,64,%o0
+ ld [%o2+0],%o5
+Lzero: sub %g1,1,%g1 ! add 0 + 16r limbs (adjust loop counter)
+Lbase: ld [%o1+4],%g2
+ subxcc %o4,%o5,%o3
+ ld [%o2+4],%g3
+ st %o3,[%o0+0]
+ ld [%o1+8],%o4 ! add 15 + 16r limbs
+ subxcc %g2,%g3,%o3
+ ld [%o2+8],%o5
+ st %o3,[%o0+4]
+ ld [%o1+12],%g2 ! add 14 + 16r limbs
+ subxcc %o4,%o5,%o3
+ ld [%o2+12],%g3
+ st %o3,[%o0+8]
+ ld [%o1+16],%o4 ! add 13 + 16r limbs
+ subxcc %g2,%g3,%o3
+ ld [%o2+16],%o5
+ st %o3,[%o0+12]
+ ld [%o1+20],%g2 ! add 12 + 16r limbs
+ subxcc %o4,%o5,%o3
+ ld [%o2+20],%g3
+ st %o3,[%o0+16]
+ ld [%o1+24],%o4 ! add 11 + 16r limbs
+ subxcc %g2,%g3,%o3
+ ld [%o2+24],%o5
+ st %o3,[%o0+20]
+ ld [%o1+28],%g2 ! add 10 + 16r limbs
+ subxcc %o4,%o5,%o3
+ ld [%o2+28],%g3
+ st %o3,[%o0+24]
+ ld [%o1+32],%o4 ! add 9 + 16r limbs
+ subxcc %g2,%g3,%o3
+ ld [%o2+32],%o5
+ st %o3,[%o0+28]
+ ld [%o1+36],%g2 ! add 8 + 16r limbs
+ subxcc %o4,%o5,%o3
+ ld [%o2+36],%g3
+ st %o3,[%o0+32]
+ ld [%o1+40],%o4 ! add 7 + 16r limbs
+ subxcc %g2,%g3,%o3
+ ld [%o2+40],%o5
+ st %o3,[%o0+36]
+ ld [%o1+44],%g2 ! add 6 + 16r limbs
+ subxcc %o4,%o5,%o3
+ ld [%o2+44],%g3
+ st %o3,[%o0+40]
+ ld [%o1+48],%o4 ! add 5 + 16r limbs
+ subxcc %g2,%g3,%o3
+ ld [%o2+48],%o5
+ st %o3,[%o0+44]
+ ld [%o1+52],%g2 ! add 4 + 16r limbs
+ subxcc %o4,%o5,%o3
+ ld [%o2+52],%g3
+ st %o3,[%o0+48]
+ ld [%o1+56],%o4 ! add 3 + 16r limbs
+ subxcc %g2,%g3,%o3
+ ld [%o2+56],%o5
+ st %o3,[%o0+52]
+ ld [%o1+60],%g2 ! add 2 + 16r limbs
+ subxcc %o4,%o5,%o3
+ ld [%o2+60],%g3
+ st %o3,[%o0+56]
+ subx %g0,%g0,%o4
+ tst %g1
+ bne Loop
+ subcc %g0,%o4,%g0 ! restore cy (delay slot)
+
+ subxcc %g2,%g3,%o3
+ st %o3,[%o0+60] ! store most significant limb
+
+ retl
+ addx %g0,%g0,%o0 ! return carry-out from most sign. limb
diff --git a/sysdeps/sparc/submul_1.S b/sysdeps/sparc/submul_1.S
new file mode 100644
index 0000000..ed67c99
--- /dev/null
+++ b/sysdeps/sparc/submul_1.S
@@ -0,0 +1,146 @@
+! SPARC __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+! the result from a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+#include "sysdep.h"
+
+.text
+ .align 4
+ .global C_SYMBOL_NAME(__mpn_submul_1)
+C_SYMBOL_NAME(__mpn_submul_1):
+ ! Make S1_PTR and RES_PTR point at the end of their blocks
+ ! and put (- 4 x SIZE) in index/loop counter.
+ sll %o2,2,%o2
+ add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval
+ add %o1,%o2,%o1
+ sub %g0,%o2,%o2
+
+ cmp %o3,0xfff
+ bgu Large
+ nop
+
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ b L0
+ add %o4,-4,%o4
+Loop0:
+ subcc %o5,%g1,%g1
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g1,[%o4+%o2]
+L0: wr %g0,%o3,%y
+ sra %o5,31,%g2
+ and %o3,%g2,%g2
+ andcc %g1,0,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,0,%g1
+ sra %g1,20,%g4
+ sll %g1,12,%g1
+ rd %y,%g3
+ srl %g3,20,%g3
+ or %g1,%g3,%g1
+
+ addcc %g1,%o0,%g1
+ addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 ! loop counter
+ bne Loop0
+ ld [%o4+%o2],%o5
+
+ subcc %o5,%g1,%g1
+ addx %o0,%g0,%o0
+ retl
+ st %g1,[%o4+%o2]
+
+
+Large: ld [%o1+%o2],%o5
+ mov 0,%o0
+ sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
+ b L1
+ add %o4,-4,%o4
+Loop:
+ subcc %o5,%g3,%g3
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g3,[%o4+%o2]
+L1: wr %g0,%o5,%y
+ and %o5,%g4,%g2
+ andcc %g0,%g0,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%g0,%g1
+ rd %y,%g3
+ addcc %g3,%o0,%g3
+ addx %g2,%g1,%o0
+ addcc %o2,4,%o2
+ bne Loop
+ ld [%o4+%o2],%o5
+
+ subcc %o5,%g3,%g3
+ addx %o0,%g0,%o0
+ retl
+ st %g3,[%o4+%o2]
diff --git a/sysdeps/sparc/udiv.S b/sysdeps/sparc/udiv.S
new file mode 100644
index 0000000..826d011
--- /dev/null
+++ b/sysdeps/sparc/udiv.S
@@ -0,0 +1,348 @@
+ /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ * .udiv name of function to generate
+ * div div=div => %o0 / %o1; div=rem => %o0 % %o1
+ * false false=true => signed; false=false => unsigned
+ *
+ * Algorithm parameters:
+ * N how many bits per iteration we try to get (4)
+ * WORDSIZE total number of bits (32)
+ *
+ * Derived constants:
+ * TOPBITS number of bits in the top decade of a number
+ *
+ * Important variables:
+ * Q the partial quotient under development (initially 0)
+ * R the remainder so far, initially the dividend
+ * ITER number of main division loop iterations required;
+ * equal to ceil(log2(quotient) / N). Note that this
+ * is the log base (2^N) of the quotient.
+ * V the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ * Current estimate for non-large dividend is
+ * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ */
+
+
+
+#include "DEFS.h"
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include <machine/trap.h>
+#endif
+
+FUNC(.udiv)
+
+ ! Ready to divide. Compute size of quotient; scale comparand.
+ orcc %o1, %g0, %o5
+ bne 1f
+ mov %o0, %o3
+
+ ! Divide by zero trap. If it returns, return 0 (about as
+ ! wrong as possible, but that is what SunOS does...).
+ ta ST_DIV0
+ retl
+ clr %o0
+
+1:
+ cmp %o3, %o5 ! if %o1 exceeds %o0, done
+ blu Lgot_result ! (and algorithm fails otherwise)
+ clr %o2
+ sethi %hi(1 << (32 - 4 - 1)), %g1
+ cmp %o3, %g1
+ blu Lnot_really_big
+ clr %o4
+
+ ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to shift V into
+ ! the top decade: so do not even bother to compare to R.
+ 1:
+ cmp %o5, %g1
+ bgeu 3f
+ mov 1, %g7
+ sll %o5, 4, %o5
+ b 1b
+ add %o4, 1, %o4
+
+ ! Now compute %g7.
+ 2: addcc %o5, %o5, %o5
+ bcc Lnot_too_big
+ add %g7, 1, %g7
+
+ ! We get here if the %o1 overflowed while shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1, 4, %g1 ! high order bit
+ srl %o5, 1, %o5 ! rest of %o5
+ add %o5, %g1, %o5
+ b Ldo_single_div
+ sub %g7, 1, %g7
+
+ Lnot_too_big:
+ 3: cmp %o5, %o3
+ blu 2b
+ nop
+ be Ldo_single_div
+ nop
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ ! %o5 > %o3: went too far: back up 1 step
+ ! srl %o5, 1, %o5
+ ! dec %g7
+ ! do single-bit divide steps
+ !
+ ! We have to be careful here. We know that %o3 >= %o5, so we can do the
+ ! first divide step without thinking. BUT, the others are conditional,
+ ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+ ! order bit set in the first step, just falling into the regular
+ ! division loop will mess up the first time around.
+ ! So we unroll slightly...
+ Ldo_single_div:
+ subcc %g7, 1, %g7
+ bl Lend_regular_divide
+ nop
+ sub %o3, %o5, %o3
+ mov 1, %o2
+ b Lend_single_divloop
+ nop
+ Lsingle_divloop:
+ sll %o2, 1, %o2
+ bl 1f
+ srl %o5, 1, %o5
+ ! %o3 >= 0
+ sub %o3, %o5, %o3
+ b 2f
+ add %o2, 1, %o2
+ 1: ! %o3 < 0
+ add %o3, %o5, %o3
+ sub %o2, 1, %o2
+ 2:
+ Lend_single_divloop:
+ subcc %g7, 1, %g7
+ bge Lsingle_divloop
+ tst %o3
+ b,a Lend_regular_divide
+
+Lnot_really_big:
+1:
+ sll %o5, 4, %o5
+ cmp %o5, %o3
+ bleu 1b
+ addcc %o4, 1, %o4
+ be Lgot_result
+ sub %o4, 1, %o4
+
+ tst %o3 ! set up for initial iteration
+Ldivloop:
+ sll %o2, 4, %o2
+ ! depth 1, accumulated bits 0
+ bl L.1.16
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 2, accumulated bits 1
+ bl L.2.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 3
+ bl L.3.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 7
+ bl L.4.23
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+
+L.4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+
+
+L.3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 5
+ bl L.4.21
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+
+L.4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+
+
+
+L.2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 1
+ bl L.3.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 3
+ bl L.4.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+
+L.4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+
+
+L.3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 1
+ bl L.4.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+
+L.4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+
+
+
+
+L.1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 2, accumulated bits -1
+ bl L.2.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -1
+ bl L.3.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -1
+ bl L.4.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+
+L.4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+
+
+L.3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -3
+ bl L.4.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+
+L.4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+
+
+
+L.2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -3
+ bl L.3.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -5
+ bl L.4.11
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+
+L.4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+
+
+L.3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -7
+ bl L.4.9
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+
+L.4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+
+
+
+
+ 9:
+Lend_regular_divide:
+ subcc %o4, 1, %o4
+ bge Ldivloop
+ tst %o3
+ bl,a Lgot_result
+ ! non-restoring fixup here (one instruction only!)
+ sub %o2, 1, %o2
+
+
+Lgot_result:
+
+ retl
+ mov %o2, %o0
diff --git a/sysdeps/sparc/udiv_qrnnd.S b/sysdeps/sparc/udiv_qrnnd.S
new file mode 100644
index 0000000..4cd4f05
--- /dev/null
+++ b/sysdeps/sparc/udiv_qrnnd.S
@@ -0,0 +1,143 @@
+! SPARC __udiv_qrnnd division support, used from longlong.h.
+
+! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! rem_ptr i0
+! n1 i1
+! n0 i2
+! d i3
+
+#include "sysdep.h"
+#undef ret /* Kludge for glibc */
+
+ .text
+ .align 8
+LC0: .double 0r4294967296
+LC1: .double 0r2147483648
+
+ .align 4
+ .global C_SYMBOL_NAME(__udiv_qrnnd)
+C_SYMBOL_NAME(__udiv_qrnnd):
+ !#PROLOGUE# 0
+ save %sp,-104,%sp
+ !#PROLOGUE# 1
+ st %i1,[%fp-8]
+ ld [%fp-8],%f10
+ sethi %hi(LC0),%o7
+ fitod %f10,%f4
+ ldd [%o7+%lo(LC0)],%f8
+ cmp %i1,0
+ bge L248
+ mov %i0,%i5
+ faddd %f4,%f8,%f4
+L248:
+ st %i2,[%fp-8]
+ ld [%fp-8],%f10
+ fmuld %f4,%f8,%f6
+ cmp %i2,0
+ bge L249
+ fitod %f10,%f2
+ faddd %f2,%f8,%f2
+L249:
+ st %i3,[%fp-8]
+ faddd %f6,%f2,%f2
+ ld [%fp-8],%f10
+ cmp %i3,0
+ bge L250
+ fitod %f10,%f4
+ faddd %f4,%f8,%f4
+L250:
+ fdivd %f2,%f4,%f2
+ sethi %hi(LC1),%o7
+ ldd [%o7+%lo(LC1)],%f4
+ fcmped %f2,%f4
+ nop
+ fbge,a L251
+ fsubd %f2,%f4,%f2
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ b L252
+ ld [%fp-8],%i4
+L251:
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ ld [%fp-8],%i4
+ sethi %hi(-2147483648),%g2
+ xor %i4,%g2,%i4
+L252:
+ wr %g0,%i4,%y
+ sra %i3,31,%g2
+ and %i4,%g2,%g2
+ andcc %g0,0,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,0,%g1
+ add %g1,%g2,%i0
+ rd %y,%g3
+ subcc %i2,%g3,%o7
+ subxcc %i1,%i0,%g0
+ be L253
+ cmp %o7,%i3
+
+ add %i4,-1,%i0
+ add %o7,%i3,%o7
+ st %o7,[%i5]
+ ret
+ restore
+L253:
+ blu L246
+ mov %i4,%i0
+ add %i4,1,%i0
+ sub %o7,%i3,%o7
+L246:
+ st %o7,[%i5]
+ ret
+ restore
diff --git a/sysdeps/sparc/umul.S b/sysdeps/sparc/umul.S
new file mode 100644
index 0000000..7a26c29
--- /dev/null
+++ b/sysdeps/sparc/umul.S
@@ -0,0 +1,153 @@
+/*
+ * Unsigned multiply. Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the
+ * upper 32 bits of the 64-bit product).
+ *
+ * This code optimizes short (less than 13-bit) multiplies. Short
+ * multiplies require 25 instruction cycles, and long ones require
+ * 45 instruction cycles.
+ *
+ * On return, overflow has occurred (%o1 is not zero) if and only if
+ * the Z condition code is clear, allowing, e.g., the following:
+ *
+ * call .umul
+ * nop
+ * bnz overflow (or tnz)
+ */
+
+#include "DEFS.h"
+FUNC(.umul)
+ or %o0, %o1, %o4
+ mov %o0, %y ! multiplier -> Y
+ andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args
+ be Lmul_shortway ! if zero, can do it the short way
+ andcc %g0, %g0, %o4 ! zero the partial product and clear N and V
+
+ /*
+ * Long multiply. 32 steps, followed by a final shift step.
+ */
+ mulscc %o4, %o1, %o4 ! 1
+ mulscc %o4, %o1, %o4 ! 2
+ mulscc %o4, %o1, %o4 ! 3
+ mulscc %o4, %o1, %o4 ! 4
+ mulscc %o4, %o1, %o4 ! 5
+ mulscc %o4, %o1, %o4 ! 6
+ mulscc %o4, %o1, %o4 ! 7
+ mulscc %o4, %o1, %o4 ! 8
+ mulscc %o4, %o1, %o4 ! 9
+ mulscc %o4, %o1, %o4 ! 10
+ mulscc %o4, %o1, %o4 ! 11
+ mulscc %o4, %o1, %o4 ! 12
+ mulscc %o4, %o1, %o4 ! 13
+ mulscc %o4, %o1, %o4 ! 14
+ mulscc %o4, %o1, %o4 ! 15
+ mulscc %o4, %o1, %o4 ! 16
+ mulscc %o4, %o1, %o4 ! 17
+ mulscc %o4, %o1, %o4 ! 18
+ mulscc %o4, %o1, %o4 ! 19
+ mulscc %o4, %o1, %o4 ! 20
+ mulscc %o4, %o1, %o4 ! 21
+ mulscc %o4, %o1, %o4 ! 22
+ mulscc %o4, %o1, %o4 ! 23
+ mulscc %o4, %o1, %o4 ! 24
+ mulscc %o4, %o1, %o4 ! 25
+ mulscc %o4, %o1, %o4 ! 26
+ mulscc %o4, %o1, %o4 ! 27
+ mulscc %o4, %o1, %o4 ! 28
+ mulscc %o4, %o1, %o4 ! 29
+ mulscc %o4, %o1, %o4 ! 30
+ mulscc %o4, %o1, %o4 ! 31
+ mulscc %o4, %o1, %o4 ! 32
+ mulscc %o4, %g0, %o4 ! final shift
+
+
+ /*
+ * Normally, with the shift-and-add approach, if both numbers are
+ * positive you get the correct result. With 32-bit two's-complement
+ * numbers, -x is represented as
+ *
+ * x 32
+ * ( 2 - ------ ) mod 2 * 2
+ * 32
+ * 2
+ *
+ * (the `mod 2' subtracts 1 from 1.bbbb). To avoid lots of 2^32s,
+ * we can treat this as if the radix point were just to the left
+ * of the sign bit (multiply by 2^32), and get
+ *
+ * -x = (2 - x) mod 2
+ *
+ * Then, ignoring the `mod 2's for convenience:
+ *
+ * x * y = xy
+ * -x * y = 2y - xy
+ * x * -y = 2x - xy
+ * -x * -y = 4 - 2x - 2y + xy
+ *
+ * For signed multiplies, we subtract (x << 32) from the partial
+ * product to fix this problem for negative multipliers (see mul.s).
+ * Because of the way the shift into the partial product is calculated
+ * (N xor V), this term is automatically removed for the multiplicand,
+ * so we don't have to adjust.
+ *
+ * But for unsigned multiplies, the high order bit wasn't a sign bit,
+ * and the correction is wrong. So for unsigned multiplies where the
+ * high order bit is one, we end up with xy - (y << 32). To fix it
+ * we add y << 32.
+ */
+#if 0
+ tst %o1
+ bl,a 1f ! if %o1 < 0 (high order bit = 1),
+ add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half)
+1: rd %y, %o0 ! get lower half of product
+ retl
+ addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0
+#else
+ /* Faster code from tege@sics.se. */
+ sra %o1, 31, %o2 ! make mask from sign bit
+ and %o0, %o2, %o2 ! %o2 = 0 or %o0, depending on sign of %o1
+ rd %y, %o0 ! get lower half of product
+ retl
+ addcc %o4, %o2, %o1 ! add compensation and put upper half in place
+#endif
+
+Lmul_shortway:
+ /*
+ * Short multiply. 12 steps, followed by a final shift step.
+ * The resulting bits are off by 12 and (32-12) = 20 bit positions,
+ * but there is no problem with %o0 being negative (unlike above),
+ * and overflow is impossible (the answer is at most 24 bits long).
+ */
+ mulscc %o4, %o1, %o4 ! 1
+ mulscc %o4, %o1, %o4 ! 2
+ mulscc %o4, %o1, %o4 ! 3
+ mulscc %o4, %o1, %o4 ! 4
+ mulscc %o4, %o1, %o4 ! 5
+ mulscc %o4, %o1, %o4 ! 6
+ mulscc %o4, %o1, %o4 ! 7
+ mulscc %o4, %o1, %o4 ! 8
+ mulscc %o4, %o1, %o4 ! 9
+ mulscc %o4, %o1, %o4 ! 10
+ mulscc %o4, %o1, %o4 ! 11
+ mulscc %o4, %o1, %o4 ! 12
+ mulscc %o4, %g0, %o4 ! final shift
+
+ /*
+ * %o4 has 20 of the bits that should be in the result; %y has
+ * the bottom 12 (as %y's top 12). That is:
+ *
+ * %o4 %y
+ * +----------------+----------------+
+ * | -12- | -20- | -12- | -20- |
+ * +------(---------+------)---------+
+ * -----result-----
+ *
+ * The 12 bits of %o4 left of the `result' area are all zero;
+ * in fact, all top 20 bits of %o4 are zero.
+ */
+
+ rd %y, %o5
+ sll %o4, 12, %o0 ! shift middle bits left 12
+ srl %o5, 20, %o5 ! shift low bits right 20
+ or %o5, %o0, %o0
+ retl
+ addcc %g0, %g0, %o1 ! %o1 = zero, and set Z
diff --git a/sysdeps/sparc/urem.S b/sysdeps/sparc/urem.S
new file mode 100644
index 0000000..9f64c88
--- /dev/null
+++ b/sysdeps/sparc/urem.S
@@ -0,0 +1,348 @@
+ /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ * .urem name of function to generate
+ * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
+ * false false=true => signed; false=false => unsigned
+ *
+ * Algorithm parameters:
+ * N how many bits per iteration we try to get (4)
+ * WORDSIZE total number of bits (32)
+ *
+ * Derived constants:
+ * TOPBITS number of bits in the top decade of a number
+ *
+ * Important variables:
+ * Q the partial quotient under development (initially 0)
+ * R the remainder so far, initially the dividend
+ * ITER number of main division loop iterations required;
+ * equal to ceil(log2(quotient) / N). Note that this
+ * is the log base (2^N) of the quotient.
+ * V the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ * Current estimate for non-large dividend is
+ * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ */
+
+
+
+#include "DEFS.h"
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include <machine/trap.h>
+#endif
+
+FUNC(.urem)
+
+ ! Ready to divide. Compute size of quotient; scale comparand.
+ orcc %o1, %g0, %o5
+ bne 1f
+ mov %o0, %o3
+
+ ! Divide by zero trap. If it returns, return 0 (about as
+ ! wrong as possible, but that is what SunOS does...).
+ ta ST_DIV0
+ retl
+ clr %o0
+
+1:
+ cmp %o3, %o5 ! if %o1 exceeds %o0, done
+ blu Lgot_result ! (and algorithm fails otherwise)
+ clr %o2
+ sethi %hi(1 << (32 - 4 - 1)), %g1
+ cmp %o3, %g1
+ blu Lnot_really_big
+ clr %o4
+
+ ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to shift V into
+ ! the top decade: so do not even bother to compare to R.
+ 1:
+ cmp %o5, %g1
+ bgeu 3f
+ mov 1, %g7
+ sll %o5, 4, %o5
+ b 1b
+ add %o4, 1, %o4
+
+ ! Now compute %g7.
+ 2: addcc %o5, %o5, %o5
+ bcc Lnot_too_big
+ add %g7, 1, %g7
+
+ ! We get here if the %o1 overflowed while shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1, 4, %g1 ! high order bit
+ srl %o5, 1, %o5 ! rest of %o5
+ add %o5, %g1, %o5
+ b Ldo_single_div
+ sub %g7, 1, %g7
+
+ Lnot_too_big:
+ 3: cmp %o5, %o3
+ blu 2b
+ nop
+ be Ldo_single_div
+ nop
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ ! %o5 > %o3: went too far: back up 1 step
+ ! srl %o5, 1, %o5
+ ! dec %g7
+ ! do single-bit divide steps
+ !
+ ! We have to be careful here. We know that %o3 >= %o5, so we can do the
+ ! first divide step without thinking. BUT, the others are conditional,
+ ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+ ! order bit set in the first step, just falling into the regular
+ ! division loop will mess up the first time around.
+ ! So we unroll slightly...
+ Ldo_single_div:
+ subcc %g7, 1, %g7
+ bl Lend_regular_divide
+ nop
+ sub %o3, %o5, %o3
+ mov 1, %o2
+ b Lend_single_divloop
+ nop
+ Lsingle_divloop:
+ sll %o2, 1, %o2
+ bl 1f
+ srl %o5, 1, %o5
+ ! %o3 >= 0
+ sub %o3, %o5, %o3
+ b 2f
+ add %o2, 1, %o2
+ 1: ! %o3 < 0
+ add %o3, %o5, %o3
+ sub %o2, 1, %o2
+ 2:
+ Lend_single_divloop:
+ subcc %g7, 1, %g7
+ bge Lsingle_divloop
+ tst %o3
+ b,a Lend_regular_divide
+
+Lnot_really_big:
+1:
+ sll %o5, 4, %o5
+ cmp %o5, %o3
+ bleu 1b
+ addcc %o4, 1, %o4
+ be Lgot_result
+ sub %o4, 1, %o4
+
+ tst %o3 ! set up for initial iteration
+Ldivloop:
+ sll %o2, 4, %o2
+ ! depth 1, accumulated bits 0
+ bl L.1.16
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 2, accumulated bits 1
+ bl L.2.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 3
+ bl L.3.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 7
+ bl L.4.23
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+
+L.4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+
+
+L.3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 5
+ bl L.4.21
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+
+L.4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+
+
+
+L.2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 1
+ bl L.3.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 3
+ bl L.4.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+
+L.4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+
+
+L.3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 1
+ bl L.4.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+
+L.4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+
+
+
+
+L.1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 2, accumulated bits -1
+ bl L.2.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -1
+ bl L.3.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -1
+ bl L.4.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+
+L.4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+
+
+L.3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -3
+ bl L.4.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+
+L.4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+
+
+
+L.2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -3
+ bl L.3.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -5
+ bl L.4.11
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+
+L.4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+
+
+L.3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -7
+ bl L.4.9
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+
+L.4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+
+
+
+
+ 9:
+Lend_regular_divide:
+ subcc %o4, 1, %o4
+ bge Ldivloop
+ tst %o3
+ bl,a Lgot_result
+ ! non-restoring fixup here (one instruction only!)
+ add %o3, %o1, %o3
+
+
+Lgot_result:
+
+ retl
+ mov %o3, %o0