diff options
Diffstat (limited to 'sysdeps/sparc')
29 files changed, 3465 insertions, 0 deletions
diff --git a/sysdeps/sparc/DEFS.h b/sysdeps/sparc/DEFS.h new file mode 100644 index 0000000..ef69663 --- /dev/null +++ b/sysdeps/sparc/DEFS.h @@ -0,0 +1,4 @@ +#define FUNC(name) \ + .global name; \ + .align 4; \ + name: diff --git a/sysdeps/sparc/Dist b/sysdeps/sparc/Dist new file mode 100644 index 0000000..49cd4dc --- /dev/null +++ b/sysdeps/sparc/Dist @@ -0,0 +1,4 @@ +DEFS.h +mul.S umul.S +divrem.m4 sdiv.S udiv.S rem.S urem.S +alloca.S diff --git a/sysdeps/sparc/Implies b/sysdeps/sparc/Implies new file mode 100644 index 0000000..da719e1 --- /dev/null +++ b/sysdeps/sparc/Implies @@ -0,0 +1,2 @@ +# SPARC uses IEEE 754 floating point. +ieee754 diff --git a/sysdeps/sparc/Makefile b/sysdeps/sparc/Makefile new file mode 100644 index 0000000..1154d05 --- /dev/null +++ b/sysdeps/sparc/Makefile @@ -0,0 +1,57 @@ +# Copyright (C) 1991, 1992, 1993, 1994, 1995 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public License +# as published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. + +# You should have received a copy of the GNU Library General Public +# License along with the GNU C Library; see the file COPYING.LIB. If +# not, write to the Free Software Foundation, Inc., 675 Mass Ave, +# Cambridge, MA 02139, USA. + +ifeq ($(subdir),gnulib) +routines = mul umul $(divrem) alloca +endif # gnulib + +# We distribute these files, even though they are generated, +# so as to avoid the need for a functioning m4 to build the library. +divrem := sdiv udiv rem urem + ++divrem-NAME-sdiv := div ++divrem-NAME-udiv := udiv ++divrem-NAME-rem := rem ++divrem-NAME-urem := urem ++divrem-NAME = $(+divrem-NAME-$(basename $(notdir $@))) ++divrem-OP-div := div ++divrem-OP-udiv := div ++divrem-OP-rem := rem ++divrem-OP-urem := rem ++divrem-S-div := true ++divrem-S-rem := true ++divrem-S-udiv := false ++divrem-S-urem := false +$(divrem:%=$(sysdep_dir)/sparc/%.S): $(sysdep_dir)/sparc/divrem.m4 + (echo "define(NAME,\`.$(+divrem-NAME)')\ + define(OP,\`$(+divrem-OP-$(+divrem-NAME))')\ + define(S,\`$(+divrem-S-$(+divrem-NAME))')\ + /* This file is generated from divrem.m4; DO NOT EDIT! */"; \ + cat $<) | $(M4) > $@-tmp +# Make it unwritable so noone will edit it by mistake. + -chmod a-w $@-tmp + mv -f $@-tmp $@ + test -d CVS && cvs commit -m'Regenerated from $<' $@ + +sysdep-realclean := $(sysdep-realclean) $(divrem:%=sysdeps/sparc/%.S) + +ifeq ($(subdir),crypt) + +crypt := crypt.sparc # Use crypt/crypt.sparc.S. + +endif # crypt diff --git a/sysdeps/sparc/__longjmp.S b/sysdeps/sparc/__longjmp.S new file mode 100644 index 0000000..adff06e --- /dev/null +++ b/sysdeps/sparc/__longjmp.S @@ -0,0 +1,47 @@ +/* Copyright (C) 1991, 1993 Free Software Foundation, Inc. +This file is part of the GNU C Library. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 675 Mass Ave, +Cambridge, MA 02139, USA. */ + +#include <sysdep.h> +#ifdef __svr4__ +#include <sys/trap.h> +#else +#include <machine/trap.h> +#endif + +/* NOTE: This code depends on the definition of `__jmp_buf' in <jmp_buf.h>. */ + +ENTRY (__longjmp) + /* Do a "flush register windows trap". The trap handler in the + kernel writes all the register windows to their stack slots, and + marks them all as invalid (needing to be sucked up from the + stack when used). This ensures that all information needed to + unwind to these callers is in memory, not in the register + windows. */ + ta ST_FLUSH_WINDOWS + ld [%o0], %o7 /* Return PC. */ + ld [%o0 + 4], %fp /* Saved SP. */ + sub %fp, 64, %sp /* Allocate a register save area. */ + + /* if (%o1 == 0) %o1 = 1; */ + tst %o1 + be,a Ldone + mov 1, %o1 + +Ldone: retl + /* On the way out, put the return value in %o0. */ + restore %o1, 0, %o0 diff --git a/sysdeps/sparc/add_n.S b/sysdeps/sparc/add_n.S new file mode 100644 index 0000000..3be3e39 --- /dev/null +++ b/sysdeps/sparc/add_n.S @@ -0,0 +1,134 @@ +! sparc __mpn_add_n -- Add two limb vectors of the same length > 0 and store +! sum in a third limb vector. + +! Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +! INPUT PARAMETERS +! res_ptr %o0 +! s1_ptr %o1 +! s2_ptr %o2 +! size %o3 + +#include "sysdep.h" + + .text + .align 4 + .global C_SYMBOL_NAME(__mpn_add_n) +C_SYMBOL_NAME(__mpn_add_n): + ld [%o1+0],%o4 ! read first limb from s1_ptr + srl %o3,4,%g1 + ld [%o2+0],%o5 ! read first limb from s2_ptr + + sub %g0,%o3,%o3 + andcc %o3,(16-1),%o3 + be Lzero + nop + + sll %o3,2,%o3 ! multiply by 4 + sub %o0,%o3,%o0 ! adjust res_ptr + sub %o1,%o3,%o1 ! adjust s1_ptr + sub %o2,%o3,%o2 ! adjust s2_ptr + + mov %o4,%g2 + + sethi %hi(Lbase),%g3 + or %g3,%lo(Lbase),%g3 + sll %o3,2,%o3 ! multiply by 4 + jmp %g3+%o3 + mov %o5,%g3 + +Loop: addxcc %g2,%g3,%o3 + add %o1,64,%o1 + st %o3,[%o0+60] + add %o2,64,%o2 + ld [%o1+0],%o4 + add %o0,64,%o0 + ld [%o2+0],%o5 +Lzero: sub %g1,1,%g1 ! add 0 + 16r limbs (adjust loop counter) +Lbase: ld [%o1+4],%g2 + addxcc %o4,%o5,%o3 + ld [%o2+4],%g3 + st %o3,[%o0+0] + ld [%o1+8],%o4 ! add 15 + 16r limbs + addxcc %g2,%g3,%o3 + ld [%o2+8],%o5 + st %o3,[%o0+4] + ld [%o1+12],%g2 ! add 14 + 16r limbs + addxcc %o4,%o5,%o3 + ld [%o2+12],%g3 + st %o3,[%o0+8] + ld [%o1+16],%o4 ! add 13 + 16r limbs + addxcc %g2,%g3,%o3 + ld [%o2+16],%o5 + st %o3,[%o0+12] + ld [%o1+20],%g2 ! add 12 + 16r limbs + addxcc %o4,%o5,%o3 + ld [%o2+20],%g3 + st %o3,[%o0+16] + ld [%o1+24],%o4 ! add 11 + 16r limbs + addxcc %g2,%g3,%o3 + ld [%o2+24],%o5 + st %o3,[%o0+20] + ld [%o1+28],%g2 ! add 10 + 16r limbs + addxcc %o4,%o5,%o3 + ld [%o2+28],%g3 + st %o3,[%o0+24] + ld [%o1+32],%o4 ! add 9 + 16r limbs + addxcc %g2,%g3,%o3 + ld [%o2+32],%o5 + st %o3,[%o0+28] + ld [%o1+36],%g2 ! add 8 + 16r limbs + addxcc %o4,%o5,%o3 + ld [%o2+36],%g3 + st %o3,[%o0+32] + ld [%o1+40],%o4 ! add 7 + 16r limbs + addxcc %g2,%g3,%o3 + ld [%o2+40],%o5 + st %o3,[%o0+36] + ld [%o1+44],%g2 ! add 6 + 16r limbs + addxcc %o4,%o5,%o3 + ld [%o2+44],%g3 + st %o3,[%o0+40] + ld [%o1+48],%o4 ! add 5 + 16r limbs + addxcc %g2,%g3,%o3 + ld [%o2+48],%o5 + st %o3,[%o0+44] + ld [%o1+52],%g2 ! add 4 + 16r limbs + addxcc %o4,%o5,%o3 + ld [%o2+52],%g3 + st %o3,[%o0+48] + ld [%o1+56],%o4 ! add 3 + 16r limbs + addxcc %g2,%g3,%o3 + ld [%o2+56],%o5 + st %o3,[%o0+52] + ld [%o1+60],%g2 ! add 2 + 16r limbs + addxcc %o4,%o5,%o3 + ld [%o2+60],%g3 + st %o3,[%o0+56] + addx %g0,%g0,%o4 + tst %g1 + bne Loop + subcc %g0,%o4,%g0 ! restore cy (delay slot) + + addxcc %g2,%g3,%o3 + st %o3,[%o0+60] ! store most significant limb + + retl + addx %g0,%g0,%o0 ! return carry-out from most sign. limb diff --git a/sysdeps/sparc/addmul_1.S b/sysdeps/sparc/addmul_1.S new file mode 100644 index 0000000..63e7db0 --- /dev/null +++ b/sysdeps/sparc/addmul_1.S @@ -0,0 +1,146 @@ +! SPARC __mpn_addmul_1 -- Multiply a limb vector with a limb and add +! the result to a second limb vector. + +! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +#include "sysdep.h" + +.text + .align 4 + .global C_SYMBOL_NAME(__mpn_addmul_1) +C_SYMBOL_NAME(__mpn_addmul_1): + ! Make S1_PTR and RES_PTR point at the end of their blocks + ! and put (- 4 x SIZE) in index/loop counter. + sll %o2,2,%o2 + add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval + add %o1,%o2,%o1 + sub %g0,%o2,%o2 + + cmp %o3,0xfff + bgu Large + nop + + ld [%o1+%o2],%o5 + mov 0,%o0 + b L0 + add %o4,-4,%o4 +Loop0: + addcc %o5,%g1,%g1 + ld [%o1+%o2],%o5 + addx %o0,%g0,%o0 + st %g1,[%o4+%o2] +L0: wr %g0,%o3,%y + sra %o5,31,%g2 + and %o3,%g2,%g2 + andcc %g1,0,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,0,%g1 + sra %g1,20,%g4 + sll %g1,12,%g1 + rd %y,%g3 + srl %g3,20,%g3 + or %g1,%g3,%g1 + + addcc %g1,%o0,%g1 + addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb + addcc %o2,4,%o2 ! loop counter + bne Loop0 + ld [%o4+%o2],%o5 + + addcc %o5,%g1,%g1 + addx %o0,%g0,%o0 + retl + st %g1,[%o4+%o2] + + +Large: ld [%o1+%o2],%o5 + mov 0,%o0 + sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0 + b L1 + add %o4,-4,%o4 +Loop: + addcc %o5,%g3,%g3 + ld [%o1+%o2],%o5 + addx %o0,%g0,%o0 + st %g3,[%o4+%o2] +L1: wr %g0,%o5,%y + and %o5,%g4,%g2 + andcc %g0,%g0,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%g0,%g1 + rd %y,%g3 + addcc %g3,%o0,%g3 + addx %g2,%g1,%o0 + addcc %o2,4,%o2 + bne Loop + ld [%o4+%o2],%o5 + + addcc %o5,%g3,%g3 + addx %o0,%g0,%o0 + retl + st %g3,[%o4+%o2] diff --git a/sysdeps/sparc/alloca.S b/sysdeps/sparc/alloca.S new file mode 100644 index 0000000..0a8718d --- /dev/null +++ b/sysdeps/sparc/alloca.S @@ -0,0 +1,32 @@ +/* Copyright (C) 1994 Free Software Foundation, Inc. +This file is part of the GNU C Library. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 675 Mass Ave, +Cambridge, MA 02139, USA. */ + +#include "DEFS.h" + +/* Code produced by Sun's C compiler calls this function with two extra + arguments which it makes relocatable symbols but seem always to be + the constant 96; I have no idea what they are for. */ + +#ifndef NO_UNDERSCORES +#define __builtin_alloca ___builtin_alloca +#endif + +FUNC (__builtin_alloca) + sub %sp, %o0, %sp /* Push some stack space. */ + retl /* Return; the returned buffer leaves 96 */ + add %sp, 96, %o0 /* bytes of register save area at the top. */ diff --git a/sysdeps/sparc/bsd-_setjmp.S b/sysdeps/sparc/bsd-_setjmp.S new file mode 100644 index 0000000..5b685d5 --- /dev/null +++ b/sysdeps/sparc/bsd-_setjmp.S @@ -0,0 +1,26 @@ +/* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'. Sparc version. +Copyright (C) 1994 Free Software Foundation, Inc. +This file is part of the GNU C Library. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 675 Mass Ave, +Cambridge, MA 02139, USA. */ + +#include <sysdep.h> + +ENTRY (setjmp) + sethi %hi(C_SYMBOL_NAME (__sigsetjmp)), %g1 + or %lo(C_SYMBOL_NAME (__sigsetjmp)), %g1, %g1 + jmp %g1 + mov %g0, %o1 /* Pass second argument of zero. */ diff --git a/sysdeps/sparc/bsd-setjmp.S b/sysdeps/sparc/bsd-setjmp.S new file mode 100644 index 0000000..b0a6326 --- /dev/null +++ b/sysdeps/sparc/bsd-setjmp.S @@ -0,0 +1,26 @@ +/* BSD `setjmp' entry point to `sigsetjmp (..., 1)'. Sparc version. +Copyright (C) 1994 Free Software Foundation, Inc. +This file is part of the GNU C Library. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 675 Mass Ave, +Cambridge, MA 02139, USA. */ + +#include <sysdep.h> + +ENTRY (setjmp) + sethi %hi(C_SYMBOL_NAME (__sigsetjmp)), %g1 + or %lo(C_SYMBOL_NAME (__sigsetjmp)), %g1, %g1 + jmp %g1 + mov 1, %o1 /* Pass second argument of one. */ diff --git a/sysdeps/sparc/bytesex.h b/sysdeps/sparc/bytesex.h new file mode 100644 index 0000000..f1a75c0 --- /dev/null +++ b/sysdeps/sparc/bytesex.h @@ -0,0 +1,3 @@ +/* SPARC is big-endian. */ + +#define __BYTE_ORDER __BIG_ENDIAN diff --git a/sysdeps/sparc/divrem.m4 b/sysdeps/sparc/divrem.m4 new file mode 100644 index 0000000..bde8a21 --- /dev/null +++ b/sysdeps/sparc/divrem.m4 @@ -0,0 +1,234 @@ +/* + * Division and remainder, from Appendix E of the Sparc Version 8 + * Architecture Manual, with fixes from Gordon Irlam. + */ + +/* + * Input: dividend and divisor in %o0 and %o1 respectively. + * + * m4 parameters: + * NAME name of function to generate + * OP OP=div => %o0 / %o1; OP=rem => %o0 % %o1 + * S S=true => signed; S=false => unsigned + * + * Algorithm parameters: + * N how many bits per iteration we try to get (4) + * WORDSIZE total number of bits (32) + * + * Derived constants: + * TOPBITS number of bits in the top `decade' of a number + * + * Important variables: + * Q the partial quotient under development (initially 0) + * R the remainder so far, initially the dividend + * ITER number of main division loop iterations required; + * equal to ceil(log2(quotient) / N). Note that this + * is the log base (2^N) of the quotient. + * V the current comparand, initially divisor*2^(ITER*N-1) + * + * Cost: + * Current estimate for non-large dividend is + * ceil(log2(quotient) / N) * (10 + 7N/2) + C + * A large dividend is one greater than 2^(31-TOPBITS) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + */ + +define(N, `4')dnl +define(WORDSIZE, `32')dnl +define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))dnl +dnl +define(dividend, `%o0')dnl +define(divisor, `%o1')dnl +define(Q, `%o2')dnl +define(R, `%o3')dnl +define(ITER, `%o4')dnl +define(V, `%o5')dnl +dnl +dnl m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d +define(T, `%g1')dnl +define(SC, `%g7')dnl +ifelse(S, `true', `define(SIGN, `%g6')')dnl + +dnl +dnl This is the recursive definition for developing quotient digits. +dnl +dnl Parameters: +dnl $1 the current depth, 1 <= $1 <= N +dnl $2 the current accumulation of quotient bits +dnl N max depth +dnl +dnl We add a new bit to $2 and either recurse or insert the bits in +dnl the quotient. R, Q, and V are inputs and outputs as defined above; +dnl the condition codes are expected to reflect the input R, and are +dnl modified to reflect the output R. +dnl +define(DEVELOP_QUOTIENT_BITS, +` ! depth $1, accumulated bits $2 + bl L.$1.eval(2**N+$2) + srl V,1,V + ! remainder is positive + subcc R,V,R + ifelse($1, N, + ` b 9f + add Q, ($2*2+1), Q + ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')') +L.$1.eval(2**N+$2): + ! remainder is negative + addcc R,V,R + ifelse($1, N, + ` b 9f + add Q, ($2*2-1), Q + ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')') + ifelse($1, 1, `9:')')dnl + +#include "DEFS.h" +#ifdef __svr4__ +#include <sys/trap.h> +#else +#include <machine/trap.h> +#endif + +FUNC(NAME) +ifelse(S, `true', +` ! compute sign of result; if neither is negative, no problem + orcc divisor, dividend, %g0 ! either negative? + bge 2f ! no, go do the divide +ifelse(OP, `div', +` xor divisor, dividend, SIGN ! compute sign in any case', +` mov dividend, SIGN ! sign of remainder matches dividend') + tst divisor + bge 1f + tst dividend + ! divisor is definitely negative; dividend might also be negative + bge 2f ! if dividend not negative... + sub %g0, divisor, divisor ! in any case, make divisor nonneg +1: ! dividend is negative, divisor is nonnegative + sub %g0, dividend, dividend ! make dividend nonnegative +2: +') + ! Ready to divide. Compute size of quotient; scale comparand. + orcc divisor, %g0, V + bne 1f + mov dividend, R + + ! Divide by zero trap. If it returns, return 0 (about as + ! wrong as possible, but that is what SunOS does...). + ta ST_DIV0 + retl + clr %o0 + +1: + cmp R, V ! if divisor exceeds dividend, done + blu Lgot_result ! (and algorithm fails otherwise) + clr Q + sethi %hi(1 << (WORDSIZE - TOPBITS - 1)), T + cmp R, T + blu Lnot_really_big + clr ITER + + ! `Here the dividend is >= 2**(31-N) or so. We must be careful here, + ! as our usual N-at-a-shot divide step will cause overflow and havoc. + ! The number of bits in the result here is N*ITER+SC, where SC <= N. + ! Compute ITER in an unorthodox manner: know we need to shift V into + ! the top decade: so do not even bother to compare to R.' + 1: + cmp V, T + bgeu 3f + mov 1, SC + sll V, N, V + b 1b + add ITER, 1, ITER + + ! Now compute SC. + 2: addcc V, V, V + bcc Lnot_too_big + add SC, 1, SC + + ! We get here if the divisor overflowed while shifting. + ! This means that R has the high-order bit set. + ! Restore V and subtract from R. + sll T, TOPBITS, T ! high order bit + srl V, 1, V ! rest of V + add V, T, V + b Ldo_single_div + sub SC, 1, SC + + Lnot_too_big: + 3: cmp V, R + blu 2b + nop + be Ldo_single_div + nop + /* NB: these are commented out in the V8-Sparc manual as well */ + /* (I do not understand this) */ + ! V > R: went too far: back up 1 step + ! srl V, 1, V + ! dec SC + ! do single-bit divide steps + ! + ! We have to be careful here. We know that R >= V, so we can do the + ! first divide step without thinking. BUT, the others are conditional, + ! and are only done if R >= 0. Because both R and V may have the high- + ! order bit set in the first step, just falling into the regular + ! division loop will mess up the first time around. + ! So we unroll slightly... + Ldo_single_div: + subcc SC, 1, SC + bl Lend_regular_divide + nop + sub R, V, R + mov 1, Q + b Lend_single_divloop + nop + Lsingle_divloop: + sll Q, 1, Q + bl 1f + srl V, 1, V + ! R >= 0 + sub R, V, R + b 2f + add Q, 1, Q + 1: ! R < 0 + add R, V, R + sub Q, 1, Q + 2: + Lend_single_divloop: + subcc SC, 1, SC + bge Lsingle_divloop + tst R + b,a Lend_regular_divide + +Lnot_really_big: +1: + sll V, N, V + cmp V, R + bleu 1b + addcc ITER, 1, ITER + be Lgot_result + sub ITER, 1, ITER + + tst R ! set up for initial iteration +Ldivloop: + sll Q, N, Q + DEVELOP_QUOTIENT_BITS(1, 0) +Lend_regular_divide: + subcc ITER, 1, ITER + bge Ldivloop + tst R + bl,a Lgot_result + ! non-restoring fixup here (one instruction only!) +ifelse(OP, `div', +` sub Q, 1, Q +', ` add R, divisor, R +') + +Lgot_result: +ifelse(S, `true', +` ! check to see if answer should be < 0 + tst SIGN + bl,a 1f + ifelse(OP, `div', `sub %g0, Q, Q', `sub %g0, R, R') +1:') + retl + ifelse(OP, `div', `mov Q, %o0', `mov R, %o0') diff --git a/sysdeps/sparc/jmp_buf.h b/sysdeps/sparc/jmp_buf.h new file mode 100644 index 0000000..a5a592f --- /dev/null +++ b/sysdeps/sparc/jmp_buf.h @@ -0,0 +1,14 @@ +/* Define the machine-dependent type `jmp_buf'. SPARC version. */ + +/* NOTE: The assembly code in __longjmp.S and setjmp.S knows the layout + of this structure. You must hack the assembly code if you want to change + the order of the members. */ + +typedef struct + { + /* Return PC (register o7). */ + __ptr_t __pc; + + /* Saved FP. */ + __ptr_t __fp; + } __jmp_buf[1]; diff --git a/sysdeps/sparc/memcopy.h b/sysdeps/sparc/memcopy.h new file mode 100644 index 0000000..3fd4f3d --- /dev/null +++ b/sysdeps/sparc/memcopy.h @@ -0,0 +1,21 @@ +/* Copyright (C) 1991 Free Software Foundation, Inc. +This file is part of the GNU C Library. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 675 Mass Ave, +Cambridge, MA 02139, USA. */ + +#include <sysdeps/generic/memcopy.h> +#undef reg_char +#define reg_char int diff --git a/sysdeps/sparc/mul_1.S b/sysdeps/sparc/mul_1.S new file mode 100644 index 0000000..84aa12b --- /dev/null +++ b/sysdeps/sparc/mul_1.S @@ -0,0 +1,198 @@ +! SPARC __mpn_mul_1 -- Multiply a limb vector with a limb and store +! the result in a second limb vector. + +! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +! ADD CODE FOR SMALL MULTIPLIERS! +!1: ld +! st +! +!2: ld ,a +! addxcc a,a,x +! st x, +! +!3_unrolled: +! ld ,a +! addxcc a,a,x1 ! 2a + cy +! addx %g0,%g0,x2 +! addcc a,x1,x ! 3a + c +! st x, +! +! ld ,a +! addxcc a,a,y1 +! addx %g0,%g0,y2 +! addcc a,y1,x +! st x, +! +!4_unrolled: +! ld ,a +! srl a,2,x1 ! 4a +! addxcc y2,x1,x +! sll a,30,x2 +! st x, +! +! ld ,a +! srl a,2,y1 +! addxcc x2,y1,y +! sll a,30,y2 +! st x, +! +!5_unrolled: +! ld ,a +! srl a,2,x1 ! 4a +! addxcc a,x1,x ! 5a + c +! sll a,30,x2 +! addx %g0,x2,x2 +! st x, +! +! ld ,a +! srl a,2,y1 +! addxcc a,y1,x +! sll a,30,y2 +! addx %g0,y2,y2 +! st x, +! +!8_unrolled: +! ld ,a +! srl a,3,x1 ! 8a +! addxcc y2,x1,x +! sll a,29,x2 +! st x, +! +! ld ,a +! srl a,3,y1 +! addxcc x2,y1,y +! sll a,29,y2 +! st x, + +#include "sysdep.h" + +.text + .align 4 + .global C_SYMBOL_NAME(__mpn_mul_1) +C_SYMBOL_NAME(__mpn_mul_1): + ! Make S1_PTR and RES_PTR point at the end of their blocks + ! and put (- 4 x SIZE) in index/loop counter. + sll %o2,2,%o2 + add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval + add %o1,%o2,%o1 + sub %g0,%o2,%o2 + + cmp %o3,0xfff + bgu Large + nop + + ld [%o1+%o2],%o5 + mov 0,%o0 + b L0 + add %o4,-4,%o4 +Loop0: + st %g1,[%o4+%o2] +L0: wr %g0,%o3,%y + sra %o5,31,%g2 + and %o3,%g2,%g2 + andcc %g1,0,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,0,%g1 + sra %g1,20,%g4 + sll %g1,12,%g1 + rd %y,%g3 + srl %g3,20,%g3 + or %g1,%g3,%g1 + + addcc %g1,%o0,%g1 + addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb + addcc %o2,4,%o2 ! loop counter + bne,a Loop0 + ld [%o1+%o2],%o5 + + retl + st %g1,[%o4+%o2] + + +Large: ld [%o1+%o2],%o5 + mov 0,%o0 + sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0 + b L1 + add %o4,-4,%o4 +Loop: + st %g3,[%o4+%o2] +L1: wr %g0,%o5,%y + and %o5,%g4,%g2 ! g2 = S1_LIMB iff S2_LIMB < 0, else 0 + andcc %g0,%g0,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%g0,%g1 + rd %y,%g3 + addcc %g3,%o0,%g3 + addx %g2,%g1,%o0 ! add sign-compensation and cy to hi limb + addcc %o2,4,%o2 ! loop counter + bne,a Loop + ld [%o1+%o2],%o5 + + retl + st %g3,[%o4+%o2] diff --git a/sysdeps/sparc/rem.S b/sysdeps/sparc/rem.S new file mode 100644 index 0000000..17662f7 --- /dev/null +++ b/sysdeps/sparc/rem.S @@ -0,0 +1,365 @@ + /* This file is generated from divrem.m4; DO NOT EDIT! */ +/* + * Division and remainder, from Appendix E of the Sparc Version 8 + * Architecture Manual, with fixes from Gordon Irlam. + */ + +/* + * Input: dividend and divisor in %o0 and %o1 respectively. + * + * m4 parameters: + * .rem name of function to generate + * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1 + * true true=true => signed; true=false => unsigned + * + * Algorithm parameters: + * N how many bits per iteration we try to get (4) + * WORDSIZE total number of bits (32) + * + * Derived constants: + * TOPBITS number of bits in the top decade of a number + * + * Important variables: + * Q the partial quotient under development (initially 0) + * R the remainder so far, initially the dividend + * ITER number of main division loop iterations required; + * equal to ceil(log2(quotient) / N). Note that this + * is the log base (2^N) of the quotient. + * V the current comparand, initially divisor*2^(ITER*N-1) + * + * Cost: + * Current estimate for non-large dividend is + * ceil(log2(quotient) / N) * (10 + 7N/2) + C + * A large dividend is one greater than 2^(31-TOPBITS) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + */ + + + +#include "DEFS.h" +#ifdef __svr4__ +#include <sys/trap.h> +#else +#include <machine/trap.h> +#endif + +FUNC(.rem) + ! compute sign of result; if neither is negative, no problem + orcc %o1, %o0, %g0 ! either negative? + bge 2f ! no, go do the divide + mov %o0, %g6 ! sign of remainder matches %o0 + tst %o1 + bge 1f + tst %o0 + ! %o1 is definitely negative; %o0 might also be negative + bge 2f ! if %o0 not negative... + sub %g0, %o1, %o1 ! in any case, make %o1 nonneg +1: ! %o0 is negative, %o1 is nonnegative + sub %g0, %o0, %o0 ! make %o0 nonnegative +2: + + ! Ready to divide. Compute size of quotient; scale comparand. + orcc %o1, %g0, %o5 + bne 1f + mov %o0, %o3 + + ! Divide by zero trap. If it returns, return 0 (about as + ! wrong as possible, but that is what SunOS does...). + ta ST_DIV0 + retl + clr %o0 + +1: + cmp %o3, %o5 ! if %o1 exceeds %o0, done + blu Lgot_result ! (and algorithm fails otherwise) + clr %o2 + sethi %hi(1 << (32 - 4 - 1)), %g1 + cmp %o3, %g1 + blu Lnot_really_big + clr %o4 + + ! Here the dividend is >= 2**(31-N) or so. We must be careful here, + ! as our usual N-at-a-shot divide step will cause overflow and havoc. + ! The number of bits in the result here is N*ITER+SC, where SC <= N. + ! Compute ITER in an unorthodox manner: know we need to shift V into + ! the top decade: so do not even bother to compare to R. + 1: + cmp %o5, %g1 + bgeu 3f + mov 1, %g7 + sll %o5, 4, %o5 + b 1b + add %o4, 1, %o4 + + ! Now compute %g7. + 2: addcc %o5, %o5, %o5 + bcc Lnot_too_big + add %g7, 1, %g7 + + ! We get here if the %o1 overflowed while shifting. + ! This means that %o3 has the high-order bit set. + ! Restore %o5 and subtract from %o3. + sll %g1, 4, %g1 ! high order bit + srl %o5, 1, %o5 ! rest of %o5 + add %o5, %g1, %o5 + b Ldo_single_div + sub %g7, 1, %g7 + + Lnot_too_big: + 3: cmp %o5, %o3 + blu 2b + nop + be Ldo_single_div + nop + /* NB: these are commented out in the V8-Sparc manual as well */ + /* (I do not understand this) */ + ! %o5 > %o3: went too far: back up 1 step + ! srl %o5, 1, %o5 + ! dec %g7 + ! do single-bit divide steps + ! + ! We have to be careful here. We know that %o3 >= %o5, so we can do the + ! first divide step without thinking. BUT, the others are conditional, + ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- + ! order bit set in the first step, just falling into the regular + ! division loop will mess up the first time around. + ! So we unroll slightly... + Ldo_single_div: + subcc %g7, 1, %g7 + bl Lend_regular_divide + nop + sub %o3, %o5, %o3 + mov 1, %o2 + b Lend_single_divloop + nop + Lsingle_divloop: + sll %o2, 1, %o2 + bl 1f + srl %o5, 1, %o5 + ! %o3 >= 0 + sub %o3, %o5, %o3 + b 2f + add %o2, 1, %o2 + 1: ! %o3 < 0 + add %o3, %o5, %o3 + sub %o2, 1, %o2 + 2: + Lend_single_divloop: + subcc %g7, 1, %g7 + bge Lsingle_divloop + tst %o3 + b,a Lend_regular_divide + +Lnot_really_big: +1: + sll %o5, 4, %o5 + cmp %o5, %o3 + bleu 1b + addcc %o4, 1, %o4 + be Lgot_result + sub %o4, 1, %o4 + + tst %o3 ! set up for initial iteration +Ldivloop: + sll %o2, 4, %o2 + ! depth 1, accumulated bits 0 + bl L.1.16 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 2, accumulated bits 1 + bl L.2.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits 3 + bl L.3.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 7 + bl L.4.23 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (7*2+1), %o2 + +L.4.23: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (7*2-1), %o2 + + +L.3.19: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 5 + bl L.4.21 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (5*2+1), %o2 + +L.4.21: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (5*2-1), %o2 + + + +L.2.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits 1 + bl L.3.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 3 + bl L.4.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (3*2+1), %o2 + +L.4.19: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (3*2-1), %o2 + + +L.3.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 1 + bl L.4.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (1*2+1), %o2 + +L.4.17: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (1*2-1), %o2 + + + + +L.1.16: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 2, accumulated bits -1 + bl L.2.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits -1 + bl L.3.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -1 + bl L.4.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2+1), %o2 + +L.4.15: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2-1), %o2 + + +L.3.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -3 + bl L.4.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2+1), %o2 + +L.4.13: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2-1), %o2 + + + +L.2.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits -3 + bl L.3.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -5 + bl L.4.11 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2+1), %o2 + +L.4.11: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2-1), %o2 + + +L.3.13: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -7 + bl L.4.9 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2+1), %o2 + +L.4.9: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2-1), %o2 + + + + + 9: +Lend_regular_divide: + subcc %o4, 1, %o4 + bge Ldivloop + tst %o3 + bl,a Lgot_result + ! non-restoring fixup here (one instruction only!) + add %o3, %o1, %o3 + + +Lgot_result: + ! check to see if answer should be < 0 + tst %g6 + bl,a 1f + sub %g0, %o3, %o3 +1: + retl + mov %o3, %o0 diff --git a/sysdeps/sparc/sdiv.S b/sysdeps/sparc/sdiv.S new file mode 100644 index 0000000..dadbb36 --- /dev/null +++ b/sysdeps/sparc/sdiv.S @@ -0,0 +1,365 @@ + /* This file is generated from divrem.m4; DO NOT EDIT! */ +/* + * Division and remainder, from Appendix E of the Sparc Version 8 + * Architecture Manual, with fixes from Gordon Irlam. + */ + +/* + * Input: dividend and divisor in %o0 and %o1 respectively. + * + * m4 parameters: + * .div name of function to generate + * div div=div => %o0 / %o1; div=rem => %o0 % %o1 + * true true=true => signed; true=false => unsigned + * + * Algorithm parameters: + * N how many bits per iteration we try to get (4) + * WORDSIZE total number of bits (32) + * + * Derived constants: + * TOPBITS number of bits in the top decade of a number + * + * Important variables: + * Q the partial quotient under development (initially 0) + * R the remainder so far, initially the dividend + * ITER number of main division loop iterations required; + * equal to ceil(log2(quotient) / N). Note that this + * is the log base (2^N) of the quotient. + * V the current comparand, initially divisor*2^(ITER*N-1) + * + * Cost: + * Current estimate for non-large dividend is + * ceil(log2(quotient) / N) * (10 + 7N/2) + C + * A large dividend is one greater than 2^(31-TOPBITS) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + */ + + + +#include "DEFS.h" +#ifdef __svr4__ +#include <sys/trap.h> +#else +#include <machine/trap.h> +#endif + +FUNC(.div) + ! compute sign of result; if neither is negative, no problem + orcc %o1, %o0, %g0 ! either negative? + bge 2f ! no, go do the divide + xor %o1, %o0, %g6 ! compute sign in any case + tst %o1 + bge 1f + tst %o0 + ! %o1 is definitely negative; %o0 might also be negative + bge 2f ! if %o0 not negative... + sub %g0, %o1, %o1 ! in any case, make %o1 nonneg +1: ! %o0 is negative, %o1 is nonnegative + sub %g0, %o0, %o0 ! make %o0 nonnegative +2: + + ! Ready to divide. Compute size of quotient; scale comparand. + orcc %o1, %g0, %o5 + bne 1f + mov %o0, %o3 + + ! Divide by zero trap. If it returns, return 0 (about as + ! wrong as possible, but that is what SunOS does...). + ta ST_DIV0 + retl + clr %o0 + +1: + cmp %o3, %o5 ! if %o1 exceeds %o0, done + blu Lgot_result ! (and algorithm fails otherwise) + clr %o2 + sethi %hi(1 << (32 - 4 - 1)), %g1 + cmp %o3, %g1 + blu Lnot_really_big + clr %o4 + + ! Here the dividend is >= 2**(31-N) or so. We must be careful here, + ! as our usual N-at-a-shot divide step will cause overflow and havoc. + ! The number of bits in the result here is N*ITER+SC, where SC <= N. + ! Compute ITER in an unorthodox manner: know we need to shift V into + ! the top decade: so do not even bother to compare to R. + 1: + cmp %o5, %g1 + bgeu 3f + mov 1, %g7 + sll %o5, 4, %o5 + b 1b + add %o4, 1, %o4 + + ! Now compute %g7. + 2: addcc %o5, %o5, %o5 + bcc Lnot_too_big + add %g7, 1, %g7 + + ! We get here if the %o1 overflowed while shifting. + ! This means that %o3 has the high-order bit set. + ! Restore %o5 and subtract from %o3. + sll %g1, 4, %g1 ! high order bit + srl %o5, 1, %o5 ! rest of %o5 + add %o5, %g1, %o5 + b Ldo_single_div + sub %g7, 1, %g7 + + Lnot_too_big: + 3: cmp %o5, %o3 + blu 2b + nop + be Ldo_single_div + nop + /* NB: these are commented out in the V8-Sparc manual as well */ + /* (I do not understand this) */ + ! %o5 > %o3: went too far: back up 1 step + ! srl %o5, 1, %o5 + ! dec %g7 + ! do single-bit divide steps + ! + ! We have to be careful here. We know that %o3 >= %o5, so we can do the + ! first divide step without thinking. BUT, the others are conditional, + ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- + ! order bit set in the first step, just falling into the regular + ! division loop will mess up the first time around. + ! So we unroll slightly... + Ldo_single_div: + subcc %g7, 1, %g7 + bl Lend_regular_divide + nop + sub %o3, %o5, %o3 + mov 1, %o2 + b Lend_single_divloop + nop + Lsingle_divloop: + sll %o2, 1, %o2 + bl 1f + srl %o5, 1, %o5 + ! %o3 >= 0 + sub %o3, %o5, %o3 + b 2f + add %o2, 1, %o2 + 1: ! %o3 < 0 + add %o3, %o5, %o3 + sub %o2, 1, %o2 + 2: + Lend_single_divloop: + subcc %g7, 1, %g7 + bge Lsingle_divloop + tst %o3 + b,a Lend_regular_divide + +Lnot_really_big: +1: + sll %o5, 4, %o5 + cmp %o5, %o3 + bleu 1b + addcc %o4, 1, %o4 + be Lgot_result + sub %o4, 1, %o4 + + tst %o3 ! set up for initial iteration +Ldivloop: + sll %o2, 4, %o2 + ! depth 1, accumulated bits 0 + bl L.1.16 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 2, accumulated bits 1 + bl L.2.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits 3 + bl L.3.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 7 + bl L.4.23 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (7*2+1), %o2 + +L.4.23: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (7*2-1), %o2 + + +L.3.19: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 5 + bl L.4.21 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (5*2+1), %o2 + +L.4.21: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (5*2-1), %o2 + + + +L.2.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits 1 + bl L.3.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 3 + bl L.4.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (3*2+1), %o2 + +L.4.19: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (3*2-1), %o2 + + +L.3.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 1 + bl L.4.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (1*2+1), %o2 + +L.4.17: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (1*2-1), %o2 + + + + +L.1.16: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 2, accumulated bits -1 + bl L.2.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits -1 + bl L.3.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -1 + bl L.4.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2+1), %o2 + +L.4.15: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2-1), %o2 + + +L.3.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -3 + bl L.4.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2+1), %o2 + +L.4.13: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2-1), %o2 + + + +L.2.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits -3 + bl L.3.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -5 + bl L.4.11 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2+1), %o2 + +L.4.11: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2-1), %o2 + + +L.3.13: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -7 + bl L.4.9 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2+1), %o2 + +L.4.9: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2-1), %o2 + + + + + 9: +Lend_regular_divide: + subcc %o4, 1, %o4 + bge Ldivloop + tst %o3 + bl,a Lgot_result + ! non-restoring fixup here (one instruction only!) + sub %o2, 1, %o2 + + +Lgot_result: + ! check to see if answer should be < 0 + tst %g6 + bl,a 1f + sub %g0, %o2, %o2 +1: + retl + mov %o2, %o0 diff --git a/sysdeps/sparc/setjmp.S b/sysdeps/sparc/setjmp.S new file mode 100644 index 0000000..3c9c18d --- /dev/null +++ b/sysdeps/sparc/setjmp.S @@ -0,0 +1,31 @@ +/* Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. +This file is part of the GNU C Library. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 675 Mass Ave, +Cambridge, MA 02139, USA. */ + +#include <sysdep.h> + +/* NOTE: This code depends on the definition of `__jmp_buf' in <jmp_buf.h>. */ + +ENTRY (__sigsetjmp) + /* Save our return PC and SP (second store in the jmp delay slot). */ + st %o7, [%o0] + /* Save the signal mask if requested. We do this as a tail-call + for simplicity; it always returns zero. */ + sethi %hi(C_SYMBOL_NAME (__sigjmp_save)), %g1 + or %lo(C_SYMBOL_NAME (__sigjmp_save)), %g1, %g1 + jmp %g1 + st %sp, [%o0 + 4] diff --git a/sysdeps/sparc/sparc8/addmul_1.S b/sysdeps/sparc/sparc8/addmul_1.S new file mode 100644 index 0000000..fbaacfd --- /dev/null +++ b/sysdeps/sparc/sparc8/addmul_1.S @@ -0,0 +1,116 @@ +! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and +! add the result to a second limb vector. + +! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +#include "sysdep.h" + +.text + .align 4 + .global C_SYMBOL_NAME(__mpn_addmul_1) +C_SYMBOL_NAME(__mpn_addmul_1): + orcc %g0,%g0,%g2 + ld [%o1+0],%o4 ! 1 + + sll %o2,4,%g1 + and %g1,(4-1)<<4,%g1 + sethi %hi(LL),%g3 + or %g3,%lo(LL),%g3 + jmp %g3+%g1 + nop +LL: +LL00: add %o0,-4,%o0 + b Loop00 /* 4, 8, 12, ... */ + add %o1,-4,%o1 + nop +LL01: b Loop01 /* 1, 5, 9, ... */ + nop + nop + nop +LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */ + b Loop10 + add %o1,4,%o1 + nop +LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */ + b Loop11 + add %o1,-8,%o1 + nop + +1: addcc %g3,%g2,%g3 ! 1 + ld [%o1+4],%o4 ! 2 + rd %y,%g2 ! 1 + addx %g0,%g2,%g2 + ld [%o0+0],%g1 ! 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+0] ! 1 +Loop00: umul %o4,%o3,%g3 ! 2 + ld [%o0+4],%g1 ! 2 + addxcc %g3,%g2,%g3 ! 2 + ld [%o1+8],%o4 ! 3 + rd %y,%g2 ! 2 + addx %g0,%g2,%g2 + nop + addcc %g1,%g3,%g3 + st %g3,[%o0+4] ! 2 +Loop11: umul %o4,%o3,%g3 ! 3 + addxcc %g3,%g2,%g3 ! 3 + ld [%o1+12],%o4 ! 4 + rd %y,%g2 ! 3 + add %o1,16,%o1 + addx %g0,%g2,%g2 + ld [%o0+8],%g1 ! 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+8] ! 3 +Loop10: umul %o4,%o3,%g3 ! 4 + addxcc %g3,%g2,%g3 ! 4 + ld [%o1+0],%o4 ! 1 + rd %y,%g2 ! 4 + addx %g0,%g2,%g2 + ld [%o0+12],%g1 ! 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+12] ! 4 + add %o0,16,%o0 + addx %g0,%g2,%g2 +Loop01: addcc %o2,-4,%o2 + bg 1b + umul %o4,%o3,%g3 ! 1 + + addcc %g3,%g2,%g3 ! 4 + rd %y,%g2 ! 4 + addx %g0,%g2,%g2 + ld [%o0+0],%g1 ! 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+0] ! 4 + addx %g0,%g2,%o0 + + retl + nop + + +! umul, ld, addxcc, rd, st + +! umul, ld, addxcc, rd, ld, addcc, st, addx + diff --git a/sysdeps/sparc/sparc8/mul_1.S b/sysdeps/sparc/sparc8/mul_1.S new file mode 100644 index 0000000..9c21768 --- /dev/null +++ b/sysdeps/sparc/sparc8/mul_1.S @@ -0,0 +1,91 @@ +! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and +! store the product in a second limb vector. + +! Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +#include "sysdep.h" + +.text + .align 8 + .global C_SYMBOL_NAME(__mpn_mul_1) +C_SYMBOL_NAME(__mpn_mul_1): + sll %o2,4,%g1 + and %g1,(4-1)<<4,%g1 + sethi %hi(LL),%g3 + or %g3,%lo(LL),%g3 + jmp %g3+%g1 + ld [%o1+0],%o4 ! 1 +LL: +LL00: add %o0,-4,%o0 + add %o1,-4,%o1 + b Loop00 /* 4, 8, 12, ... */ + orcc %g0,%g0,%g2 +LL01: b Loop01 /* 1, 5, 9, ... */ + orcc %g0,%g0,%g2 + nop + nop +LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */ + add %o1,4,%o1 + b Loop10 + orcc %g0,%g0,%g2 + nop +LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */ + add %o1,-8,%o1 + b Loop11 + orcc %g0,%g0,%g2 + +Loop: addcc %g3,%g2,%g3 ! 1 + ld [%o1+4],%o4 ! 2 + st %g3,[%o0+0] ! 1 + rd %y,%g2 ! 1 +Loop00: umul %o4,%o3,%g3 ! 2 + addxcc %g3,%g2,%g3 ! 2 + ld [%o1+8],%o4 ! 3 + st %g3,[%o0+4] ! 2 + rd %y,%g2 ! 2 +Loop11: umul %o4,%o3,%g3 ! 3 + addxcc %g3,%g2,%g3 ! 3 + ld [%o1+12],%o4 ! 4 + add %o1,16,%o1 + st %g3,[%o0+8] ! 3 + rd %y,%g2 ! 3 +Loop10: umul %o4,%o3,%g3 ! 4 + addxcc %g3,%g2,%g3 ! 4 + ld [%o1+0],%o4 ! 1 + st %g3,[%o0+12] ! 4 + add %o0,16,%o0 + rd %y,%g2 ! 4 + addx %g0,%g2,%g2 +Loop01: addcc %o2,-4,%o2 + bg Loop + umul %o4,%o3,%g3 ! 1 + + addcc %g3,%g2,%g3 ! 4 + st %g3,[%o0+0] ! 4 + rd %y,%g2 ! 4 + + retl + addx %g0,%g2,%o0 diff --git a/sysdeps/sparc/sparc8/submul_1.S b/sysdeps/sparc/sparc8/submul_1.S new file mode 100644 index 0000000..5c4d688 --- /dev/null +++ b/sysdeps/sparc/sparc8/submul_1.S @@ -0,0 +1,57 @@ +! SPARC v8 __mpn_submul_1 -- Multiply a limb vector with a limb and +! subtract the result from a second limb vector. + +! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +#include "sysdep.h" + +.text + .align 4 + .global C_SYMBOL_NAME(__mpn_submul_1) +C_SYMBOL_NAME(__mpn_submul_1): + sub %g0,%o2,%o2 ! negate ... + sll %o2,2,%o2 ! ... and scale size + sub %o1,%o2,%o1 ! o1 is offset s1_ptr + sub %o0,%o2,%g1 ! g1 is offset res_ptr + + mov 0,%o0 ! clear cy_limb + +Loop: ld [%o1+%o2],%o4 + ld [%g1+%o2],%g2 + umul %o4,%o3,%o5 + rd %y,%g3 + addcc %o5,%o0,%o5 + addx %g3,0,%o0 + subcc %g2,%o5,%g2 + addx %o0,0,%o0 + st %g2,[%g1+%o2] + + addcc %o2,4,%o2 + bne Loop + nop + + retl + nop diff --git a/sysdeps/sparc/sparc8/udiv_qrnnd.S b/sysdeps/sparc/sparc8/udiv_qrnnd.S new file mode 100644 index 0000000..49c2398 --- /dev/null +++ b/sysdeps/sparc/sparc8/udiv_qrnnd.S @@ -0,0 +1,186 @@ +! SPARC __udiv_qrnnd division support, used from longlong.h. + +! Copyright (C) 1993, 1994 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +! INPUT PARAMETERS +! rem_ptr o0 +! n1 o1 +! n0 o2 +! d o3 + +#include "sysdep.h" + + .text + .align 4 + .global C_SYMBOL_NAME(__udiv_qrnnd) +C_SYMBOL_NAME(__udiv_qrnnd): + tst %o3 + bneg Largedivisor + mov 8,%g1 + + b Lp1 + addxcc %o2,%o2,%o2 + +Lplop: bcc Ln1 + addxcc %o2,%o2,%o2 +Lp1: addx %o1,%o1,%o1 + subcc %o1,%o3,%o4 + bcc Ln2 + addxcc %o2,%o2,%o2 +Lp2: addx %o1,%o1,%o1 + subcc %o1,%o3,%o4 + bcc Ln3 + addxcc %o2,%o2,%o2 +Lp3: addx %o1,%o1,%o1 + subcc %o1,%o3,%o4 + bcc Ln4 + addxcc %o2,%o2,%o2 +Lp4: addx %o1,%o1,%o1 + addcc %g1,-1,%g1 + bne Lplop + subcc %o1,%o3,%o4 + bcc Ln5 + addxcc %o2,%o2,%o2 +Lp5: st %o1,[%o0] + retl + xnor %g0,%o2,%o0 + +Lnlop: bcc Lp1 + addxcc %o2,%o2,%o2 +Ln1: addx %o4,%o4,%o4 + subcc %o4,%o3,%o1 + bcc Lp2 + addxcc %o2,%o2,%o2 +Ln2: addx %o4,%o4,%o4 + subcc %o4,%o3,%o1 + bcc Lp3 + addxcc %o2,%o2,%o2 +Ln3: addx %o4,%o4,%o4 + subcc %o4,%o3,%o1 + bcc Lp4 + addxcc %o2,%o2,%o2 +Ln4: addx %o4,%o4,%o4 + addcc %g1,-1,%g1 + bne Lnlop + subcc %o4,%o3,%o1 + bcc Lp5 + addxcc %o2,%o2,%o2 +Ln5: st %o4,[%o0] + retl + xnor %g0,%o2,%o0 + +Largedivisor: + and %o2,1,%o5 ! %o5 = n0 & 1 + + srl %o2,1,%o2 + sll %o1,31,%g2 + or %g2,%o2,%o2 ! %o2 = lo(n1n0 >> 1) + srl %o1,1,%o1 ! %o1 = hi(n1n0 >> 1) + + and %o3,1,%g2 + srl %o3,1,%g3 ! %g3 = floor(d / 2) + add %g3,%g2,%g3 ! %g3 = ceil(d / 2) + + b LLp1 + addxcc %o2,%o2,%o2 + +LLplop: bcc LLn1 + addxcc %o2,%o2,%o2 +LLp1: addx %o1,%o1,%o1 + subcc %o1,%g3,%o4 + bcc LLn2 + addxcc %o2,%o2,%o2 +LLp2: addx %o1,%o1,%o1 + subcc %o1,%g3,%o4 + bcc LLn3 + addxcc %o2,%o2,%o2 +LLp3: addx %o1,%o1,%o1 + subcc %o1,%g3,%o4 + bcc LLn4 + addxcc %o2,%o2,%o2 +LLp4: addx %o1,%o1,%o1 + addcc %g1,-1,%g1 + bne LLplop + subcc %o1,%g3,%o4 + bcc LLn5 + addxcc %o2,%o2,%o2 +LLp5: add %o1,%o1,%o1 ! << 1 + tst %g2 + bne Oddp + add %o5,%o1,%o1 + st %o1,[%o0] + retl + xnor %g0,%o2,%o0 + +LLnlop: bcc LLp1 + addxcc %o2,%o2,%o2 +LLn1: addx %o4,%o4,%o4 + subcc %o4,%g3,%o1 + bcc LLp2 + addxcc %o2,%o2,%o2 +LLn2: addx %o4,%o4,%o4 + subcc %o4,%g3,%o1 + bcc LLp3 + addxcc %o2,%o2,%o2 +LLn3: addx %o4,%o4,%o4 + subcc %o4,%g3,%o1 + bcc LLp4 + addxcc %o2,%o2,%o2 +LLn4: addx %o4,%o4,%o4 + addcc %g1,-1,%g1 + bne LLnlop + subcc %o4,%g3,%o1 + bcc LLp5 + addxcc %o2,%o2,%o2 +LLn5: add %o4,%o4,%o4 ! << 1 + tst %g2 + bne Oddn + add %o5,%o4,%o4 + st %o4,[%o0] + retl + xnor %g0,%o2,%o0 + +Oddp: xnor %g0,%o2,%o2 + ! q' in %o2. r' in %o1 + addcc %o1,%o2,%o1 + bcc LLp6 + addx %o2,0,%o2 + sub %o1,%o3,%o1 +LLp6: subcc %o1,%o3,%g0 + bcs LLp7 + subx %o2,-1,%o2 + sub %o1,%o3,%o1 +LLp7: st %o1,[%o0] + retl + mov %o2,%o0 + +Oddn: xnor %g0,%o2,%o2 + ! q' in %o2. r' in %o4 + addcc %o4,%o2,%o4 + bcc LLn6 + addx %o2,0,%o2 + sub %o4,%o3,%o4 +LLn6: subcc %o4,%o3,%g0 + bcs LLn7 + subx %o2,-1,%o2 + sub %o4,%o3,%o4 +LLn7: st %o4,[%o0] + retl + mov %o2,%o0 diff --git a/sysdeps/sparc/sqrt.c b/sysdeps/sparc/sqrt.c new file mode 100644 index 0000000..bff46e1 --- /dev/null +++ b/sysdeps/sparc/sqrt.c @@ -0,0 +1,34 @@ +/* Copyright (C) 1991, 1992 Free Software Foundation, Inc. +This file is part of the GNU C Library. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 675 Mass Ave, +Cambridge, MA 02139, USA. */ + +#include <ansidecl.h> +#include <errno.h> +#include <math.h> + +#ifndef __GNUC__ + #error This file uses GNU C extensions; you must compile with GCC. +#endif + +/* Return the square root of X. */ +double +DEFUN(sqrt, (x), double x) +{ + register double result; + asm("fsqrtd %1, %0" : "=f" (result) : "f" (x)); + return result; +} diff --git a/sysdeps/sparc/sub_n.S b/sysdeps/sparc/sub_n.S new file mode 100644 index 0000000..7a167b2 --- /dev/null +++ b/sysdeps/sparc/sub_n.S @@ -0,0 +1,134 @@ +! sparc __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +! store difference in a third limb vector. + +! Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +! INPUT PARAMETERS +! res_ptr %o0 +! s1_ptr %o1 +! s2_ptr %o2 +! size %o3 + +#include "sysdep.h" + + .text + .align 4 + .global C_SYMBOL_NAME(__mpn_sub_n) +C_SYMBOL_NAME(__mpn_sub_n): + ld [%o1+0],%o4 ! read first limb from s1_ptr + srl %o3,4,%g1 + ld [%o2+0],%o5 ! read first limb from s2_ptr + + sub %g0,%o3,%o3 + andcc %o3,(16-1),%o3 + be Lzero + nop + + sll %o3,2,%o3 ! multiply by 4 + sub %o0,%o3,%o0 ! adjust res_ptr + sub %o1,%o3,%o1 ! adjust s1_ptr + sub %o2,%o3,%o2 ! adjust s2_ptr + + mov %o4,%g2 + + sethi %hi(Lbase),%g3 + or %g3,%lo(Lbase),%g3 + sll %o3,2,%o3 ! multiply by 4 + jmp %g3+%o3 + mov %o5,%g3 + +Loop: subxcc %g2,%g3,%o3 + add %o1,64,%o1 + st %o3,[%o0+60] + add %o2,64,%o2 + ld [%o1+0],%o4 + add %o0,64,%o0 + ld [%o2+0],%o5 +Lzero: sub %g1,1,%g1 ! add 0 + 16r limbs (adjust loop counter) +Lbase: ld [%o1+4],%g2 + subxcc %o4,%o5,%o3 + ld [%o2+4],%g3 + st %o3,[%o0+0] + ld [%o1+8],%o4 ! add 15 + 16r limbs + subxcc %g2,%g3,%o3 + ld [%o2+8],%o5 + st %o3,[%o0+4] + ld [%o1+12],%g2 ! add 14 + 16r limbs + subxcc %o4,%o5,%o3 + ld [%o2+12],%g3 + st %o3,[%o0+8] + ld [%o1+16],%o4 ! add 13 + 16r limbs + subxcc %g2,%g3,%o3 + ld [%o2+16],%o5 + st %o3,[%o0+12] + ld [%o1+20],%g2 ! add 12 + 16r limbs + subxcc %o4,%o5,%o3 + ld [%o2+20],%g3 + st %o3,[%o0+16] + ld [%o1+24],%o4 ! add 11 + 16r limbs + subxcc %g2,%g3,%o3 + ld [%o2+24],%o5 + st %o3,[%o0+20] + ld [%o1+28],%g2 ! add 10 + 16r limbs + subxcc %o4,%o5,%o3 + ld [%o2+28],%g3 + st %o3,[%o0+24] + ld [%o1+32],%o4 ! add 9 + 16r limbs + subxcc %g2,%g3,%o3 + ld [%o2+32],%o5 + st %o3,[%o0+28] + ld [%o1+36],%g2 ! add 8 + 16r limbs + subxcc %o4,%o5,%o3 + ld [%o2+36],%g3 + st %o3,[%o0+32] + ld [%o1+40],%o4 ! add 7 + 16r limbs + subxcc %g2,%g3,%o3 + ld [%o2+40],%o5 + st %o3,[%o0+36] + ld [%o1+44],%g2 ! add 6 + 16r limbs + subxcc %o4,%o5,%o3 + ld [%o2+44],%g3 + st %o3,[%o0+40] + ld [%o1+48],%o4 ! add 5 + 16r limbs + subxcc %g2,%g3,%o3 + ld [%o2+48],%o5 + st %o3,[%o0+44] + ld [%o1+52],%g2 ! add 4 + 16r limbs + subxcc %o4,%o5,%o3 + ld [%o2+52],%g3 + st %o3,[%o0+48] + ld [%o1+56],%o4 ! add 3 + 16r limbs + subxcc %g2,%g3,%o3 + ld [%o2+56],%o5 + st %o3,[%o0+52] + ld [%o1+60],%g2 ! add 2 + 16r limbs + subxcc %o4,%o5,%o3 + ld [%o2+60],%g3 + st %o3,[%o0+56] + subx %g0,%g0,%o4 + tst %g1 + bne Loop + subcc %g0,%o4,%g0 ! restore cy (delay slot) + + subxcc %g2,%g3,%o3 + st %o3,[%o0+60] ! store most significant limb + + retl + addx %g0,%g0,%o0 ! return carry-out from most sign. limb diff --git a/sysdeps/sparc/submul_1.S b/sysdeps/sparc/submul_1.S new file mode 100644 index 0000000..ed67c99 --- /dev/null +++ b/sysdeps/sparc/submul_1.S @@ -0,0 +1,146 @@ +! SPARC __mpn_submul_1 -- Multiply a limb vector with a limb and subtract +! the result from a second limb vector. + +! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +#include "sysdep.h" + +.text + .align 4 + .global C_SYMBOL_NAME(__mpn_submul_1) +C_SYMBOL_NAME(__mpn_submul_1): + ! Make S1_PTR and RES_PTR point at the end of their blocks + ! and put (- 4 x SIZE) in index/loop counter. + sll %o2,2,%o2 + add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval + add %o1,%o2,%o1 + sub %g0,%o2,%o2 + + cmp %o3,0xfff + bgu Large + nop + + ld [%o1+%o2],%o5 + mov 0,%o0 + b L0 + add %o4,-4,%o4 +Loop0: + subcc %o5,%g1,%g1 + ld [%o1+%o2],%o5 + addx %o0,%g0,%o0 + st %g1,[%o4+%o2] +L0: wr %g0,%o3,%y + sra %o5,31,%g2 + and %o3,%g2,%g2 + andcc %g1,0,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,0,%g1 + sra %g1,20,%g4 + sll %g1,12,%g1 + rd %y,%g3 + srl %g3,20,%g3 + or %g1,%g3,%g1 + + addcc %g1,%o0,%g1 + addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb + addcc %o2,4,%o2 ! loop counter + bne Loop0 + ld [%o4+%o2],%o5 + + subcc %o5,%g1,%g1 + addx %o0,%g0,%o0 + retl + st %g1,[%o4+%o2] + + +Large: ld [%o1+%o2],%o5 + mov 0,%o0 + sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0 + b L1 + add %o4,-4,%o4 +Loop: + subcc %o5,%g3,%g3 + ld [%o1+%o2],%o5 + addx %o0,%g0,%o0 + st %g3,[%o4+%o2] +L1: wr %g0,%o5,%y + and %o5,%g4,%g2 + andcc %g0,%g0,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%g0,%g1 + rd %y,%g3 + addcc %g3,%o0,%g3 + addx %g2,%g1,%o0 + addcc %o2,4,%o2 + bne Loop + ld [%o4+%o2],%o5 + + subcc %o5,%g3,%g3 + addx %o0,%g0,%o0 + retl + st %g3,[%o4+%o2] diff --git a/sysdeps/sparc/udiv.S b/sysdeps/sparc/udiv.S new file mode 100644 index 0000000..826d011 --- /dev/null +++ b/sysdeps/sparc/udiv.S @@ -0,0 +1,348 @@ + /* This file is generated from divrem.m4; DO NOT EDIT! */ +/* + * Division and remainder, from Appendix E of the Sparc Version 8 + * Architecture Manual, with fixes from Gordon Irlam. + */ + +/* + * Input: dividend and divisor in %o0 and %o1 respectively. + * + * m4 parameters: + * .udiv name of function to generate + * div div=div => %o0 / %o1; div=rem => %o0 % %o1 + * false false=true => signed; false=false => unsigned + * + * Algorithm parameters: + * N how many bits per iteration we try to get (4) + * WORDSIZE total number of bits (32) + * + * Derived constants: + * TOPBITS number of bits in the top decade of a number + * + * Important variables: + * Q the partial quotient under development (initially 0) + * R the remainder so far, initially the dividend + * ITER number of main division loop iterations required; + * equal to ceil(log2(quotient) / N). Note that this + * is the log base (2^N) of the quotient. + * V the current comparand, initially divisor*2^(ITER*N-1) + * + * Cost: + * Current estimate for non-large dividend is + * ceil(log2(quotient) / N) * (10 + 7N/2) + C + * A large dividend is one greater than 2^(31-TOPBITS) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + */ + + + +#include "DEFS.h" +#ifdef __svr4__ +#include <sys/trap.h> +#else +#include <machine/trap.h> +#endif + +FUNC(.udiv) + + ! Ready to divide. Compute size of quotient; scale comparand. + orcc %o1, %g0, %o5 + bne 1f + mov %o0, %o3 + + ! Divide by zero trap. If it returns, return 0 (about as + ! wrong as possible, but that is what SunOS does...). + ta ST_DIV0 + retl + clr %o0 + +1: + cmp %o3, %o5 ! if %o1 exceeds %o0, done + blu Lgot_result ! (and algorithm fails otherwise) + clr %o2 + sethi %hi(1 << (32 - 4 - 1)), %g1 + cmp %o3, %g1 + blu Lnot_really_big + clr %o4 + + ! Here the dividend is >= 2**(31-N) or so. We must be careful here, + ! as our usual N-at-a-shot divide step will cause overflow and havoc. + ! The number of bits in the result here is N*ITER+SC, where SC <= N. + ! Compute ITER in an unorthodox manner: know we need to shift V into + ! the top decade: so do not even bother to compare to R. + 1: + cmp %o5, %g1 + bgeu 3f + mov 1, %g7 + sll %o5, 4, %o5 + b 1b + add %o4, 1, %o4 + + ! Now compute %g7. + 2: addcc %o5, %o5, %o5 + bcc Lnot_too_big + add %g7, 1, %g7 + + ! We get here if the %o1 overflowed while shifting. + ! This means that %o3 has the high-order bit set. + ! Restore %o5 and subtract from %o3. + sll %g1, 4, %g1 ! high order bit + srl %o5, 1, %o5 ! rest of %o5 + add %o5, %g1, %o5 + b Ldo_single_div + sub %g7, 1, %g7 + + Lnot_too_big: + 3: cmp %o5, %o3 + blu 2b + nop + be Ldo_single_div + nop + /* NB: these are commented out in the V8-Sparc manual as well */ + /* (I do not understand this) */ + ! %o5 > %o3: went too far: back up 1 step + ! srl %o5, 1, %o5 + ! dec %g7 + ! do single-bit divide steps + ! + ! We have to be careful here. We know that %o3 >= %o5, so we can do the + ! first divide step without thinking. BUT, the others are conditional, + ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- + ! order bit set in the first step, just falling into the regular + ! division loop will mess up the first time around. + ! So we unroll slightly... + Ldo_single_div: + subcc %g7, 1, %g7 + bl Lend_regular_divide + nop + sub %o3, %o5, %o3 + mov 1, %o2 + b Lend_single_divloop + nop + Lsingle_divloop: + sll %o2, 1, %o2 + bl 1f + srl %o5, 1, %o5 + ! %o3 >= 0 + sub %o3, %o5, %o3 + b 2f + add %o2, 1, %o2 + 1: ! %o3 < 0 + add %o3, %o5, %o3 + sub %o2, 1, %o2 + 2: + Lend_single_divloop: + subcc %g7, 1, %g7 + bge Lsingle_divloop + tst %o3 + b,a Lend_regular_divide + +Lnot_really_big: +1: + sll %o5, 4, %o5 + cmp %o5, %o3 + bleu 1b + addcc %o4, 1, %o4 + be Lgot_result + sub %o4, 1, %o4 + + tst %o3 ! set up for initial iteration +Ldivloop: + sll %o2, 4, %o2 + ! depth 1, accumulated bits 0 + bl L.1.16 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 2, accumulated bits 1 + bl L.2.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits 3 + bl L.3.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 7 + bl L.4.23 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (7*2+1), %o2 + +L.4.23: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (7*2-1), %o2 + + +L.3.19: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 5 + bl L.4.21 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (5*2+1), %o2 + +L.4.21: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (5*2-1), %o2 + + + +L.2.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits 1 + bl L.3.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 3 + bl L.4.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (3*2+1), %o2 + +L.4.19: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (3*2-1), %o2 + + +L.3.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 1 + bl L.4.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (1*2+1), %o2 + +L.4.17: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (1*2-1), %o2 + + + + +L.1.16: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 2, accumulated bits -1 + bl L.2.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits -1 + bl L.3.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -1 + bl L.4.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2+1), %o2 + +L.4.15: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2-1), %o2 + + +L.3.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -3 + bl L.4.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2+1), %o2 + +L.4.13: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2-1), %o2 + + + +L.2.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits -3 + bl L.3.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -5 + bl L.4.11 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2+1), %o2 + +L.4.11: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2-1), %o2 + + +L.3.13: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -7 + bl L.4.9 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2+1), %o2 + +L.4.9: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2-1), %o2 + + + + + 9: +Lend_regular_divide: + subcc %o4, 1, %o4 + bge Ldivloop + tst %o3 + bl,a Lgot_result + ! non-restoring fixup here (one instruction only!) + sub %o2, 1, %o2 + + +Lgot_result: + + retl + mov %o2, %o0 diff --git a/sysdeps/sparc/udiv_qrnnd.S b/sysdeps/sparc/udiv_qrnnd.S new file mode 100644 index 0000000..4cd4f05 --- /dev/null +++ b/sysdeps/sparc/udiv_qrnnd.S @@ -0,0 +1,143 @@ +! SPARC __udiv_qrnnd division support, used from longlong.h. + +! Copyright (C) 1993, 1994 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +! INPUT PARAMETERS +! rem_ptr i0 +! n1 i1 +! n0 i2 +! d i3 + +#include "sysdep.h" +#undef ret /* Kludge for glibc */ + + .text + .align 8 +LC0: .double 0r4294967296 +LC1: .double 0r2147483648 + + .align 4 + .global C_SYMBOL_NAME(__udiv_qrnnd) +C_SYMBOL_NAME(__udiv_qrnnd): + !#PROLOGUE# 0 + save %sp,-104,%sp + !#PROLOGUE# 1 + st %i1,[%fp-8] + ld [%fp-8],%f10 + sethi %hi(LC0),%o7 + fitod %f10,%f4 + ldd [%o7+%lo(LC0)],%f8 + cmp %i1,0 + bge L248 + mov %i0,%i5 + faddd %f4,%f8,%f4 +L248: + st %i2,[%fp-8] + ld [%fp-8],%f10 + fmuld %f4,%f8,%f6 + cmp %i2,0 + bge L249 + fitod %f10,%f2 + faddd %f2,%f8,%f2 +L249: + st %i3,[%fp-8] + faddd %f6,%f2,%f2 + ld [%fp-8],%f10 + cmp %i3,0 + bge L250 + fitod %f10,%f4 + faddd %f4,%f8,%f4 +L250: + fdivd %f2,%f4,%f2 + sethi %hi(LC1),%o7 + ldd [%o7+%lo(LC1)],%f4 + fcmped %f2,%f4 + nop + fbge,a L251 + fsubd %f2,%f4,%f2 + fdtoi %f2,%f2 + st %f2,[%fp-8] + b L252 + ld [%fp-8],%i4 +L251: + fdtoi %f2,%f2 + st %f2,[%fp-8] + ld [%fp-8],%i4 + sethi %hi(-2147483648),%g2 + xor %i4,%g2,%i4 +L252: + wr %g0,%i4,%y + sra %i3,31,%g2 + and %i4,%g2,%g2 + andcc %g0,0,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,0,%g1 + add %g1,%g2,%i0 + rd %y,%g3 + subcc %i2,%g3,%o7 + subxcc %i1,%i0,%g0 + be L253 + cmp %o7,%i3 + + add %i4,-1,%i0 + add %o7,%i3,%o7 + st %o7,[%i5] + ret + restore +L253: + blu L246 + mov %i4,%i0 + add %i4,1,%i0 + sub %o7,%i3,%o7 +L246: + st %o7,[%i5] + ret + restore diff --git a/sysdeps/sparc/umul.S b/sysdeps/sparc/umul.S new file mode 100644 index 0000000..7a26c29 --- /dev/null +++ b/sysdeps/sparc/umul.S @@ -0,0 +1,153 @@ +/* + * Unsigned multiply. Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the + * upper 32 bits of the 64-bit product). + * + * This code optimizes short (less than 13-bit) multiplies. Short + * multiplies require 25 instruction cycles, and long ones require + * 45 instruction cycles. + * + * On return, overflow has occurred (%o1 is not zero) if and only if + * the Z condition code is clear, allowing, e.g., the following: + * + * call .umul + * nop + * bnz overflow (or tnz) + */ + +#include "DEFS.h" +FUNC(.umul) + or %o0, %o1, %o4 + mov %o0, %y ! multiplier -> Y + andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args + be Lmul_shortway ! if zero, can do it the short way + andcc %g0, %g0, %o4 ! zero the partial product and clear N and V + + /* + * Long multiply. 32 steps, followed by a final shift step. + */ + mulscc %o4, %o1, %o4 ! 1 + mulscc %o4, %o1, %o4 ! 2 + mulscc %o4, %o1, %o4 ! 3 + mulscc %o4, %o1, %o4 ! 4 + mulscc %o4, %o1, %o4 ! 5 + mulscc %o4, %o1, %o4 ! 6 + mulscc %o4, %o1, %o4 ! 7 + mulscc %o4, %o1, %o4 ! 8 + mulscc %o4, %o1, %o4 ! 9 + mulscc %o4, %o1, %o4 ! 10 + mulscc %o4, %o1, %o4 ! 11 + mulscc %o4, %o1, %o4 ! 12 + mulscc %o4, %o1, %o4 ! 13 + mulscc %o4, %o1, %o4 ! 14 + mulscc %o4, %o1, %o4 ! 15 + mulscc %o4, %o1, %o4 ! 16 + mulscc %o4, %o1, %o4 ! 17 + mulscc %o4, %o1, %o4 ! 18 + mulscc %o4, %o1, %o4 ! 19 + mulscc %o4, %o1, %o4 ! 20 + mulscc %o4, %o1, %o4 ! 21 + mulscc %o4, %o1, %o4 ! 22 + mulscc %o4, %o1, %o4 ! 23 + mulscc %o4, %o1, %o4 ! 24 + mulscc %o4, %o1, %o4 ! 25 + mulscc %o4, %o1, %o4 ! 26 + mulscc %o4, %o1, %o4 ! 27 + mulscc %o4, %o1, %o4 ! 28 + mulscc %o4, %o1, %o4 ! 29 + mulscc %o4, %o1, %o4 ! 30 + mulscc %o4, %o1, %o4 ! 31 + mulscc %o4, %o1, %o4 ! 32 + mulscc %o4, %g0, %o4 ! final shift + + + /* + * Normally, with the shift-and-add approach, if both numbers are + * positive you get the correct result. With 32-bit two's-complement + * numbers, -x is represented as + * + * x 32 + * ( 2 - ------ ) mod 2 * 2 + * 32 + * 2 + * + * (the `mod 2' subtracts 1 from 1.bbbb). To avoid lots of 2^32s, + * we can treat this as if the radix point were just to the left + * of the sign bit (multiply by 2^32), and get + * + * -x = (2 - x) mod 2 + * + * Then, ignoring the `mod 2's for convenience: + * + * x * y = xy + * -x * y = 2y - xy + * x * -y = 2x - xy + * -x * -y = 4 - 2x - 2y + xy + * + * For signed multiplies, we subtract (x << 32) from the partial + * product to fix this problem for negative multipliers (see mul.s). + * Because of the way the shift into the partial product is calculated + * (N xor V), this term is automatically removed for the multiplicand, + * so we don't have to adjust. + * + * But for unsigned multiplies, the high order bit wasn't a sign bit, + * and the correction is wrong. So for unsigned multiplies where the + * high order bit is one, we end up with xy - (y << 32). To fix it + * we add y << 32. + */ +#if 0 + tst %o1 + bl,a 1f ! if %o1 < 0 (high order bit = 1), + add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half) +1: rd %y, %o0 ! get lower half of product + retl + addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0 +#else + /* Faster code from tege@sics.se. */ + sra %o1, 31, %o2 ! make mask from sign bit + and %o0, %o2, %o2 ! %o2 = 0 or %o0, depending on sign of %o1 + rd %y, %o0 ! get lower half of product + retl + addcc %o4, %o2, %o1 ! add compensation and put upper half in place +#endif + +Lmul_shortway: + /* + * Short multiply. 12 steps, followed by a final shift step. + * The resulting bits are off by 12 and (32-12) = 20 bit positions, + * but there is no problem with %o0 being negative (unlike above), + * and overflow is impossible (the answer is at most 24 bits long). + */ + mulscc %o4, %o1, %o4 ! 1 + mulscc %o4, %o1, %o4 ! 2 + mulscc %o4, %o1, %o4 ! 3 + mulscc %o4, %o1, %o4 ! 4 + mulscc %o4, %o1, %o4 ! 5 + mulscc %o4, %o1, %o4 ! 6 + mulscc %o4, %o1, %o4 ! 7 + mulscc %o4, %o1, %o4 ! 8 + mulscc %o4, %o1, %o4 ! 9 + mulscc %o4, %o1, %o4 ! 10 + mulscc %o4, %o1, %o4 ! 11 + mulscc %o4, %o1, %o4 ! 12 + mulscc %o4, %g0, %o4 ! final shift + + /* + * %o4 has 20 of the bits that should be in the result; %y has + * the bottom 12 (as %y's top 12). That is: + * + * %o4 %y + * +----------------+----------------+ + * | -12- | -20- | -12- | -20- | + * +------(---------+------)---------+ + * -----result----- + * + * The 12 bits of %o4 left of the `result' area are all zero; + * in fact, all top 20 bits of %o4 are zero. + */ + + rd %y, %o5 + sll %o4, 12, %o0 ! shift middle bits left 12 + srl %o5, 20, %o5 ! shift low bits right 20 + or %o5, %o0, %o0 + retl + addcc %g0, %g0, %o1 ! %o1 = zero, and set Z diff --git a/sysdeps/sparc/urem.S b/sysdeps/sparc/urem.S new file mode 100644 index 0000000..9f64c88 --- /dev/null +++ b/sysdeps/sparc/urem.S @@ -0,0 +1,348 @@ + /* This file is generated from divrem.m4; DO NOT EDIT! */ +/* + * Division and remainder, from Appendix E of the Sparc Version 8 + * Architecture Manual, with fixes from Gordon Irlam. + */ + +/* + * Input: dividend and divisor in %o0 and %o1 respectively. + * + * m4 parameters: + * .urem name of function to generate + * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1 + * false false=true => signed; false=false => unsigned + * + * Algorithm parameters: + * N how many bits per iteration we try to get (4) + * WORDSIZE total number of bits (32) + * + * Derived constants: + * TOPBITS number of bits in the top decade of a number + * + * Important variables: + * Q the partial quotient under development (initially 0) + * R the remainder so far, initially the dividend + * ITER number of main division loop iterations required; + * equal to ceil(log2(quotient) / N). Note that this + * is the log base (2^N) of the quotient. + * V the current comparand, initially divisor*2^(ITER*N-1) + * + * Cost: + * Current estimate for non-large dividend is + * ceil(log2(quotient) / N) * (10 + 7N/2) + C + * A large dividend is one greater than 2^(31-TOPBITS) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + */ + + + +#include "DEFS.h" +#ifdef __svr4__ +#include <sys/trap.h> +#else +#include <machine/trap.h> +#endif + +FUNC(.urem) + + ! Ready to divide. Compute size of quotient; scale comparand. + orcc %o1, %g0, %o5 + bne 1f + mov %o0, %o3 + + ! Divide by zero trap. If it returns, return 0 (about as + ! wrong as possible, but that is what SunOS does...). + ta ST_DIV0 + retl + clr %o0 + +1: + cmp %o3, %o5 ! if %o1 exceeds %o0, done + blu Lgot_result ! (and algorithm fails otherwise) + clr %o2 + sethi %hi(1 << (32 - 4 - 1)), %g1 + cmp %o3, %g1 + blu Lnot_really_big + clr %o4 + + ! Here the dividend is >= 2**(31-N) or so. We must be careful here, + ! as our usual N-at-a-shot divide step will cause overflow and havoc. + ! The number of bits in the result here is N*ITER+SC, where SC <= N. + ! Compute ITER in an unorthodox manner: know we need to shift V into + ! the top decade: so do not even bother to compare to R. + 1: + cmp %o5, %g1 + bgeu 3f + mov 1, %g7 + sll %o5, 4, %o5 + b 1b + add %o4, 1, %o4 + + ! Now compute %g7. + 2: addcc %o5, %o5, %o5 + bcc Lnot_too_big + add %g7, 1, %g7 + + ! We get here if the %o1 overflowed while shifting. + ! This means that %o3 has the high-order bit set. + ! Restore %o5 and subtract from %o3. + sll %g1, 4, %g1 ! high order bit + srl %o5, 1, %o5 ! rest of %o5 + add %o5, %g1, %o5 + b Ldo_single_div + sub %g7, 1, %g7 + + Lnot_too_big: + 3: cmp %o5, %o3 + blu 2b + nop + be Ldo_single_div + nop + /* NB: these are commented out in the V8-Sparc manual as well */ + /* (I do not understand this) */ + ! %o5 > %o3: went too far: back up 1 step + ! srl %o5, 1, %o5 + ! dec %g7 + ! do single-bit divide steps + ! + ! We have to be careful here. We know that %o3 >= %o5, so we can do the + ! first divide step without thinking. BUT, the others are conditional, + ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- + ! order bit set in the first step, just falling into the regular + ! division loop will mess up the first time around. + ! So we unroll slightly... + Ldo_single_div: + subcc %g7, 1, %g7 + bl Lend_regular_divide + nop + sub %o3, %o5, %o3 + mov 1, %o2 + b Lend_single_divloop + nop + Lsingle_divloop: + sll %o2, 1, %o2 + bl 1f + srl %o5, 1, %o5 + ! %o3 >= 0 + sub %o3, %o5, %o3 + b 2f + add %o2, 1, %o2 + 1: ! %o3 < 0 + add %o3, %o5, %o3 + sub %o2, 1, %o2 + 2: + Lend_single_divloop: + subcc %g7, 1, %g7 + bge Lsingle_divloop + tst %o3 + b,a Lend_regular_divide + +Lnot_really_big: +1: + sll %o5, 4, %o5 + cmp %o5, %o3 + bleu 1b + addcc %o4, 1, %o4 + be Lgot_result + sub %o4, 1, %o4 + + tst %o3 ! set up for initial iteration +Ldivloop: + sll %o2, 4, %o2 + ! depth 1, accumulated bits 0 + bl L.1.16 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 2, accumulated bits 1 + bl L.2.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits 3 + bl L.3.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 7 + bl L.4.23 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (7*2+1), %o2 + +L.4.23: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (7*2-1), %o2 + + +L.3.19: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 5 + bl L.4.21 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (5*2+1), %o2 + +L.4.21: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (5*2-1), %o2 + + + +L.2.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits 1 + bl L.3.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 3 + bl L.4.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (3*2+1), %o2 + +L.4.19: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (3*2-1), %o2 + + +L.3.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 1 + bl L.4.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (1*2+1), %o2 + +L.4.17: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (1*2-1), %o2 + + + + +L.1.16: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 2, accumulated bits -1 + bl L.2.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits -1 + bl L.3.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -1 + bl L.4.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2+1), %o2 + +L.4.15: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2-1), %o2 + + +L.3.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -3 + bl L.4.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2+1), %o2 + +L.4.13: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2-1), %o2 + + + +L.2.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits -3 + bl L.3.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -5 + bl L.4.11 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2+1), %o2 + +L.4.11: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2-1), %o2 + + +L.3.13: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -7 + bl L.4.9 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2+1), %o2 + +L.4.9: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2-1), %o2 + + + + + 9: +Lend_regular_divide: + subcc %o4, 1, %o4 + bge Ldivloop + tst %o3 + bl,a Lgot_result + ! non-restoring fixup here (one instruction only!) + add %o3, %o1, %o3 + + +Lgot_result: + + retl + mov %o3, %o0 |