From 8f5ca04bc7fd53741d80117df992995ace8f6d2d Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Mon, 16 Oct 1995 01:37:51 +0000 Subject: Sat Oct 14 02:52:36 1995 Ulrich Drepper * malloc/malloc.c (_malloc_internal): Performance fix. Move if statement out of loop. * stdio/_itoa.c, stdio/_itoa.h: Complete rewrite. Much faster implementation using GMP functions. Contributed by Torbjorn Granlund and Ulrich Drepper. * stdio/test_rdwr.c: Include . * sysdeps/i386/i586/Implies: New file. New highly optimized string functions for i[345]86. * sysdeps/i386/memchr.S, sysdeps/i386/memcmp.S: New files. * sysdeps/i386/stpcpy.S, sysdeps/i386/stpncpy.S: New files. * sysdeps/i386/strchr.S, sysdeps/i386/strcspn.S: New files. * sysdeps/i386/strpbrk.S, sysdeps/i386/strrchr.S: New files. * sysdeps/i386/strspn.S, sysdeps/i386/i486/strcat.S: New files. * sysdeps/i386/i486/strlen.S, sysdeps/i386/i586/strchr.S: New files. * sysdeps/i386/i586/strlen.S: New file. * sysdeps/i386/memchr.c: Removed. There is now an assembler version. * sysdeps/i386/i586/memcopy.h (WORD_COPY_BWD): Parameters did not correspond to used values. * sysdeps/unix/sysv/linux/nfs/nfs.h: New file. Simply a wrapper around a kernel header file. * sysdeps/unix/sysv/linux/Dist: Add it. * sysdeps/unix/sysv/linux/Makefile [$(subdir)=sunrpc] (headers): Likewise. * sysdeps/unix/sysv/linux/local_lim.h: Rewrite. Instead of defining ourself we use a kernel header file. * sysdeps/unix/sysv/linux/i386/sysdep.h (DO_CALL): Optimize system call handler for i586. * sysdeps/unix/sysv/linux/sys/param.h: Add copyright and clean up. Sat Oct 14 02:52:36 1995 Ulrich Drepper * malloc/malloc.c (_malloc_internal): Performance fix. Move if statement out of loop. * stdio/_itoa.c, stdio/_itoa.h: Complete rewrite. Much faster implementation using GMP functions. Contributed by Torbjorn Granlund and Ulrich Drepper. * stdio/test_rdwr.c: Include . * sysdeps/i386/i586/Implies: New file. New highly optimized string functions for i[345]86. * sysdeps/i386/memchr.S, sysdeps/i386/memcmp.S: New files. * sysdeps/i386/stpcpy.S, sysdeps/i386/stpncpy.S: New files. * sysdeps/i386/strchr.S, sysdeps/i386/strcspn.S: New files. * sysdeps/i386/strpbrk.S, sysdeps/i386/strrchr.S: New files. * sysdeps/i386/strspn.S, sysdeps/i386/i486/strcat.S: New files. * sysdeps/i386/i486/strlen.S, sysdeps/i386/i586/strchr.S: New files. * sysdeps/i386/i586/strlen.S: New file. * sysdeps/i386/memchr.c: Removed. There is now an assembler version. * sysdeps/i386/i586/memcopy.h (WORD_COPY_BWD): Parameters did not correspond to used values. * sysdeps/unix/sysv/linux/nfs/nfs.h: New file. Simply a wrapper around a kernel header file. * sysdeps/unix/sysv/linux/Dist: Add it. * sysdeps/unix/sysv/linux/Makefile [$(subdir)=sunrpc] (headers): Likewise. * sysdeps/unix/sysv/linux/local_lim.h: Rewrite. Instead of defining ourself we use a kernel header file. * sysdeps/unix/sysv/linux/i386/sysdep.h (DO_CALL): Optimize system call handler for i586. * sysdeps/unix/sysv/linux/sys/param.h: Add copyright and clean up. --- stdlib/gmp-impl.h | 66 ++++++++++++---- stdlib/gmp.h | 40 ++++++---- stdlib/longlong.h | 229 +++++++++++++++++++++++++++++++++--------------------- 3 files changed, 220 insertions(+), 115 deletions(-) (limited to 'stdlib') diff --git a/stdlib/gmp-impl.h b/stdlib/gmp-impl.h index ccffe7b..48d3af9 100644 --- a/stdlib/gmp-impl.h +++ b/stdlib/gmp-impl.h @@ -19,11 +19,17 @@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #if ! defined (alloca) -#if defined (__GNUC__) || defined (__sparc__) || defined (sparc) +#if defined (__GNUC__) #define alloca __builtin_alloca #endif #endif +#if ! defined (alloca) +#if defined (__sparc__) || defined (sparc) || defined (__sgi) +#include +#endif +#endif + #ifndef NULL #define NULL 0L #endif @@ -168,6 +174,7 @@ void _mp_default_free (); else \ ____mpn_sqr_n (prodp, up, size, tspace); \ } while (0); +#define assert(trueval) do {if (!(trueval)) abort ();} while (0) /* Structure for conversion between internal binary format and strings in base 2..36. */ @@ -197,9 +204,11 @@ struct bases extern const struct bases __mp_bases[]; extern mp_size_t __gmp_default_fp_limb_precision; -/* Divide the two-limb number in (NH,,NL) by D, with DI being a 32 bit - approximation to (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB). - Put the quotient in Q and the remainder in R. */ +/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest + limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB). + If this would yield overflow, DI should be the largest possible number + (i.e., only ones). For correct operation, the most significant bit of D + has to be set. Put the quotient in Q and the remainder in R. */ #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \ do { \ mp_limb _q, _ql, _r; \ @@ -226,6 +235,8 @@ extern mp_size_t __gmp_default_fp_limb_precision; (r) = _r; \ (q) = _q; \ } while (0) +/* Like udiv_qrnnd_preinv, but for for any value D. DNORM is D shifted left + so that its most significant bit is set. LGUP is ceil(log2(D)). */ #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \ do { \ mp_limb n2, n10, n1, nadj, q1; \ @@ -243,6 +254,8 @@ extern mp_size_t __gmp_default_fp_limb_precision; (r) = _xl + ((d) & _xh); \ (q) = _xh - q1; \ } while (0) +/* Exactly like udiv_qrnnd_preinv, but branch-free. It is not clear which + version to use. */ #define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \ do { \ mp_limb n2, n10, n1, nadj, q1; \ @@ -262,22 +275,49 @@ extern mp_size_t __gmp_default_fp_limb_precision; } while (0) #if defined (__GNUC__) -/* Define stuff for longlong.h asm macros. */ -#if __GNUC_NEW_ATTR_MODE_SYNTAX -typedef unsigned int UQItype __attribute__ ((mode ("QI"))); -typedef int SItype __attribute__ ((mode ("SI"))); -typedef unsigned int USItype __attribute__ ((mode ("SI"))); -typedef int DItype __attribute__ ((mode ("DI"))); -typedef unsigned int UDItype __attribute__ ((mode ("DI"))); -#else +/* Define stuff for longlong.h. */ typedef unsigned int UQItype __attribute__ ((mode (QI))); typedef int SItype __attribute__ ((mode (SI))); typedef unsigned int USItype __attribute__ ((mode (SI))); typedef int DItype __attribute__ ((mode (DI))); typedef unsigned int UDItype __attribute__ ((mode (DI))); -#endif +#else +typedef unsigned char UQItype; +typedef long SItype; +typedef unsigned long USItype; #endif typedef mp_limb UWtype; typedef unsigned int UHWtype; #define W_TYPE_SIZE BITS_PER_MP_LIMB + + +#ifndef IEEE_DOUBLE_BIG_ENDIAN +#define IEEE_DOUBLE_BIG_ENDIAN 1 +#endif + +#if IEEE_DOUBLE_BIG_ENDIAN +union ieee_double_extract +{ + struct + { + unsigned long sig:1; + unsigned long exp:11; + unsigned long manh:20; + unsigned long manl:32; + } s; + double d; +}; +#else +union ieee_double_extract +{ + struct + { + unsigned long manl:32; + unsigned long manh:20; + unsigned long exp:11; + unsigned long sig:1; + } s; + double d; +}; +#endif diff --git a/stdlib/gmp.h b/stdlib/gmp.h index 95c2f1b..0b2cb29 100644 --- a/stdlib/gmp.h +++ b/stdlib/gmp.h @@ -24,13 +24,13 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define __need_size_t #include -#ifdef __STDC__ +#if defined (__STDC__) #define __gmp_const const #else #define __gmp_const #endif -#ifdef __GNUC__ +#if defined (__GNUC__) #define __gmp_inline inline #else #define __gmp_inline @@ -40,9 +40,14 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ typedef unsigned int mp_limb; typedef int mp_limb_signed; #else +#if _LONG_LONG_LIMB +typedef unsigned long long int mp_limb; +typedef long long int mp_limb_signed; +#else typedef unsigned long int mp_limb; typedef long int mp_limb_signed; #endif +#endif typedef mp_limb * mp_ptr; typedef __gmp_const mp_limb * mp_srcptr; @@ -52,9 +57,9 @@ typedef long int mp_exp_t; #ifndef __MP_SMALL__ typedef struct { - long int alloc; /* Number of *limbs* allocated and pointed + mp_size_t alloc; /* Number of *limbs* allocated and pointed to by the D field. */ - long int size; /* abs(SIZE) is the number of limbs + mp_size_t size; /* abs(SIZE) is the number of limbs the last field points to. If SIZE is negative this is a negative number. */ @@ -130,12 +135,16 @@ typedef __mpf_struct *mpf_ptr; typedef __gmp_const __mpq_struct *mpq_srcptr; typedef __mpq_struct *mpq_ptr; -#ifdef __STDC__ +#if defined (__STDC__) #define _PROTO(x) x #else #define _PROTO(x) () #endif +#if defined (FILE) || defined (_STDIO_H_) || defined (__STDIO_H__) || defined (H_STDIO) +#define _GMP_H_HAVE_FILE 1 +#endif + void mp_set_memory_functions _PROTO((void *(*) (size_t), void *(*) (void *, size_t, size_t), void (*) (void *, size_t))); @@ -165,7 +174,7 @@ unsigned long int mpz_get_ui _PROTO ((mpz_srcptr)); mp_limb mpz_getlimbn _PROTO ((mpz_srcptr, mp_size_t)); mp_size_t mpz_hamdist _PROTO ((mpz_srcptr, mpz_srcptr)); void mpz_init _PROTO ((mpz_ptr)); -#ifdef FILE +#ifdef _GMP_H_HAVE_FILE void mpz_inp_raw _PROTO ((mpz_ptr, FILE *)); int mpz_inp_str _PROTO ((mpz_ptr, FILE *, int)); #endif @@ -180,7 +189,7 @@ void mpz_mul _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); void mpz_mul_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); void mpz_mul_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); void mpz_neg _PROTO ((mpz_ptr, mpz_srcptr)); -#ifdef FILE +#ifdef _GMP_H_HAVE_FILE void mpz_out_raw _PROTO ((FILE *, mpz_srcptr)); void mpz_out_str _PROTO ((FILE *, int, mpz_srcptr)); #endif @@ -218,6 +227,8 @@ void mpz_tdiv_qr_ui _PROTO((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int)); void mpz_tdiv_r _PROTO((mpz_ptr, mpz_srcptr, mpz_srcptr)); void mpz_tdiv_r_ui _PROTO((mpz_ptr, mpz_srcptr, unsigned long int)); +void mpz_array_init (mpz_ptr, size_t, mp_size_t); + /**************** Rational (i.e. Q) routines. ****************/ void mpq_init _PROTO ((mpq_ptr)); @@ -253,7 +264,7 @@ void mpf_dump _PROTO ((mpf_srcptr)); char *mpf_get_str _PROTO ((char *, mp_exp_t *, int, size_t, mpf_srcptr)); void mpf_init _PROTO ((mpf_ptr)); void mpf_init2 _PROTO ((mpf_ptr, mp_size_t)); -#ifdef FILE +#ifdef _GMP_H_HAVE_FILE void mpf_inp_str _PROTO ((mpf_ptr, FILE *, int)); #endif void mpf_init_set _PROTO ((mpf_ptr, mpf_srcptr)); @@ -265,7 +276,7 @@ void mpf_mul _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr)); void mpf_mul_2exp _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int)); void mpf_mul_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int)); void mpf_neg _PROTO ((mpf_ptr, mpf_srcptr)); -#ifdef FILE +#ifdef _GMP_H_HAVE_FILE void mpf_out_str _PROTO ((mpf_ptr, int, size_t, FILE *)); #endif void mpf_set _PROTO ((mpf_ptr, mpf_srcptr)); @@ -335,7 +346,7 @@ mp_limb __mpn_gcd_1 _PROTO ((mp_srcptr, mp_size_t, mp_limb)); static __gmp_inline mp_limb -#if __STDC__ +#if defined (__STDC__) __mpn_add_1 (register mp_ptr res_ptr, register mp_srcptr s1_ptr, register mp_size_t s1_size, @@ -377,7 +388,7 @@ __mpn_add_1 (res_ptr, s1_ptr, s1_size, s2_limb) } static __gmp_inline mp_limb -#if __STDC__ +#if defined (__STDC__) __mpn_add (register mp_ptr res_ptr, register mp_srcptr s1_ptr, register mp_size_t s1_size, @@ -406,7 +417,7 @@ __mpn_add (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size) } static __gmp_inline mp_limb -#if __STDC__ +#if defined (__STDC__) __mpn_sub_1 (register mp_ptr res_ptr, register mp_srcptr s1_ptr, register mp_size_t s1_size, @@ -448,7 +459,7 @@ __mpn_sub_1 (res_ptr, s1_ptr, s1_size, s2_limb) } static __gmp_inline mp_limb -#if __STDC__ +#if defined (__STDC__) __mpn_sub (register mp_ptr res_ptr, register mp_srcptr s1_ptr, register mp_size_t s1_size, @@ -477,7 +488,7 @@ __mpn_sub (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size) } static __gmp_inline mp_size_t -#if __STDC__ +#if defined (__STDC__) __mpn_normal_size (mp_srcptr ptr, mp_size_t size) #else __mpn_normal_size (ptr, size) @@ -512,7 +523,6 @@ __mpn_normal_size (ptr, size) /* Useful synonyms, but not quite compatible with GMP 1. */ #define mpz_div mpz_fdiv_q #define mpz_divmod mpz_fdiv_qr -#define mpz_mod mpz_fdiv_r #define mpz_div_ui mpz_fdiv_q_ui #define mpz_divmod_ui mpz_fdiv_qr_ui #define mpz_mod_ui mpz_fdiv_r_ui diff --git a/stdlib/longlong.h b/stdlib/longlong.h index 97c469d..bbb92e3 100644 --- a/stdlib/longlong.h +++ b/stdlib/longlong.h @@ -97,7 +97,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define __AND_CLOBBER_CC , "cc" #endif /* __GNUC__ < 2 */ -#if (defined (__a29k__) || defined (___AM29K__)) && W_TYPE_SIZE == 32 +#if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("add %1,%4,%5 addc %0,%2,%3" \ @@ -152,6 +152,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ (pl) = __m0 * __m1; \ } while (0) #define UMUL_TIME 46 +#ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ do { UDItype __r; \ (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ @@ -159,12 +160,13 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ } while (0) extern UDItype __udiv_qrnnd (); #define UDIV_TIME 220 -#endif +#endif /* LONGLONG_STANDALONE */ +#endif /* __alpha__ */ #if defined (__arm__) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ - __asm__ ("adds %1,%4,%5 - adc %0,%2,%3" \ + __asm__ ("adds %1, %4, %5 + adc %0, %2, %3" \ : "=r" ((USItype)(sh)), \ "=&r" ((USItype)(sl)) \ : "%r" ((USItype)(ah)), \ @@ -172,8 +174,8 @@ extern UDItype __udiv_qrnnd (); "%r" ((USItype)(al)), \ "rI" ((USItype)(bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ - __asm__ ("subs %1,%4,%5 - sbc %0,%2,%3" \ + __asm__ ("subs %1, %4, %5 + sbc %0, %2, %3" \ : "=r" ((USItype)(sh)), \ "=&r" ((USItype)(sl)) \ : "r" ((USItype)(ah)), \ @@ -181,19 +183,19 @@ extern UDItype __udiv_qrnnd (); "r" ((USItype)(al)), \ "rI" ((USItype)(bl))) #define umul_ppmm(xh, xl, a, b) \ - __asm__ ("; Inlined umul_ppmm - mov r0,%2 lsr 16 - mov r2,%3 lsr 16 - bic r1,%2,r0 lsl 16 - bic r2,%3,r2 lsl 16 - mul %1,r1,r2 - mul r2,r0,r2 - mul r1,%0,r1 - mul %0,r0,%0 - adds r1,r2,r1 - addcs %0,%0,0x10000 - adds %1,%1,r1 lsl 16 - adc %0,%0,r1 lsr 16" \ + __asm__ ("%@ Inlined umul_ppmm + mov %|r0, %2, lsr #16 + mov %|r2, %3, lsr #16 + bic %|r1, %2, %|r0, lsl #16 + bic %|r2, %3, %|r2, lsl #16 + mul %1, %|r1, %|r2 + mul %|r2, %|r0, %|r2 + mul %|r1, %0, %|r1 + mul %0, %|r0, %0 + adds %|r1, %|r2, %|r1 + addcs %0, %0, #65536 + adds %1, %1, %|r1, lsl #16 + adc %0, %0, %|r1, lsr #16" \ : "=&r" ((USItype)(xh)), \ "=r" ((USItype)(xl)) \ : "r" ((USItype)(a)), \ @@ -296,9 +298,9 @@ extern UDItype __udiv_qrnnd (); struct {USItype __h, __l;} __i; \ } __xx; \ __asm__ ("xmpyu %1,%2,%0" \ - : "=x" (__xx.__ll) \ - : "x" ((USItype)(u)), \ - "x" ((USItype)(v))); \ + : "=fx" (__xx.__ll) \ + : "fx" ((USItype)(u)), \ + "fx" ((USItype)(v))); \ (wh) = __xx.__i.__h; \ (wl) = __xx.__i.__l; \ } while (0) @@ -308,12 +310,14 @@ extern UDItype __udiv_qrnnd (); #define UMUL_TIME 40 #define UDIV_TIME 80 #endif +#ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ do { USItype __r; \ (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ (r) = __r; \ } while (0) extern USItype __udiv_qrnnd (); +#endif /* LONGLONG_STANDALONE */ #define count_leading_zeros(count, x) \ do { \ USItype __tmp; \ @@ -419,8 +423,12 @@ extern USItype __udiv_qrnnd (); } while (0) #define count_trailing_zeros(count, x) \ __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x))) +#ifndef UMUL_TIME #define UMUL_TIME 40 +#endif +#ifndef UDIV_TIME #define UDIV_TIME 40 +#endif #endif /* 80x86 */ #if defined (__i960__) && W_TYPE_SIZE == 32 @@ -442,7 +450,7 @@ extern USItype __udiv_qrnnd (); __w; }) #endif /* __i960__ */ -#if defined (__mc68000__) && W_TYPE_SIZE == 32 +#if (defined (__mc68000__) || defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("add%.l %5,%1 addx%.l %3,%0" \ @@ -489,38 +497,34 @@ extern USItype __udiv_qrnnd (); : "=d" ((USItype)(count)) \ : "od" ((USItype)(x)), "n" (0)) #else /* not mc68020 */ -#define umul_ppmm(xh, xl, a, b) \ - __asm__ ("| Inlined umul_ppmm - move%.l %2,%/d0 - move%.l %3,%/d1 - move%.l %/d0,%/d2 - swap %/d0 - move%.l %/d1,%/d3 - swap %/d1 - move%.w %/d2,%/d4 - mulu %/d3,%/d4 - mulu %/d1,%/d2 - mulu %/d0,%/d3 - mulu %/d0,%/d1 - move%.l %/d4,%/d0 - eor%.w %/d0,%/d0 - swap %/d0 - add%.l %/d0,%/d2 - add%.l %/d3,%/d2 +#define umul_ppmmxx(xh, xl, a, b) \ + do { USItype __umul_tmp1, __umul_tmp2; \ + __asm__ ("| Inlined umul_ppmm + move%.l %5,%3 + move%.l %2,%0 + move%.w %3,%1 + swap %3 + swap %0 + mulu %2,%1 + mulu %3,%0 + mulu %2,%3 + swap %2 + mulu %5,%2 + add%.l %3,%2 jcc 1f - add%.l #65536,%/d1 -1: swap %/d2 - moveq #0,%/d0 - move%.w %/d2,%/d0 - move%.w %/d4,%/d2 - move%.l %/d2,%1 - add%.l %/d1,%/d0 - move%.l %/d0,%0" \ - : "=g" ((USItype)(xh)), \ - "=g" ((USItype)(xl)) \ - : "g" ((USItype)(a)), \ - "g" ((USItype)(b)) \ - : "d0", "d1", "d2", "d3", "d4") + add%.l %#0x10000,%0 +1: move%.l %2,%3 + clr%.w %2 + swap %2 + swap %3 + clr%.w %3 + add%.l %3,%1 + addx%.l %2,%0 + | End inlined umul_ppmm" \ + : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \ + "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \ + : "%2" ((USItype)(a)), "d" ((USItype)(b))); \ + } while (0) #define UMUL_TIME 100 #define UDIV_TIME 400 #endif /* not mc68020 */ @@ -553,7 +557,7 @@ extern USItype __udiv_qrnnd (); : "r" ((USItype)(x))); \ (count) = __cbtmp ^ 31; \ } while (0) -#if defined (__mc88110__) +#if defined (__m88110__) #define umul_ppmm(wh, wl, u, v) \ do { \ union {UDItype __ll; \ @@ -582,10 +586,18 @@ extern USItype __udiv_qrnnd (); #else #define UMUL_TIME 17 #define UDIV_TIME 150 -#endif /* __mc88110__ */ +#endif /* __m88110__ */ #endif /* __m88000__ */ #if defined (__mips__) && W_TYPE_SIZE == 32 +#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("multu %2,%3" \ + : "=l" ((USItype)(w0)), \ + "=h" ((USItype)(w1)) \ + : "d" ((USItype)(u)), \ + "d" ((USItype)(v))) +#else #define umul_ppmm(w1, w0, u, v) \ __asm__ ("multu %2,%3 mflo %0 @@ -594,11 +606,20 @@ extern USItype __udiv_qrnnd (); "=d" ((USItype)(w1)) \ : "d" ((USItype)(u)), \ "d" ((USItype)(v))) +#endif #define UMUL_TIME 10 #define UDIV_TIME 100 #endif /* __mips__ */ #if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64 +#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("dmultu %2,%3" \ + : "=l" ((UDItype)(w0)), \ + "=h" ((UDItype)(w1)) \ + : "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))) +#else #define umul_ppmm(w1, w0, u, v) \ __asm__ ("dmultu %2,%3 mflo %0 @@ -607,8 +628,9 @@ extern USItype __udiv_qrnnd (); "=d" ((UDItype)(w1)) \ : "d" ((UDItype)(u)), \ "d" ((UDItype)(v))) -#define UMUL_TIME 10 -#define UDIV_TIME 100 +#endif +#define UMUL_TIME 20 +#define UDIV_TIME 140 #endif /* __mips__ */ #if defined (__ns32000__) && W_TYPE_SIZE == 32 @@ -647,7 +669,7 @@ extern USItype __udiv_qrnnd (); } while (0) #endif /* __ns32000__ */ -#if (defined (__powerpc__) || defined (___IBMR2__)) && W_TYPE_SIZE == 32 +#if (defined (_ARCH_PPC) || defined (_IBMR2)) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (bh) && (bh) == 0) \ @@ -676,14 +698,14 @@ extern USItype __udiv_qrnnd (); #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (ah) && (ah) == 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ : "=r" ((USItype)(sh)), \ "=&r" ((USItype)(sl)) \ : "r" ((USItype)(bh)), \ "rI" ((USItype)(al)), \ "r" ((USItype)(bl))); \ else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ : "=r" ((USItype)(sh)), \ "=&r" ((USItype)(sl)) \ : "r" ((USItype)(bh)), \ @@ -716,7 +738,7 @@ extern USItype __udiv_qrnnd (); __asm__ ("{cntlz|cntlzw} %0,%1" \ : "=r" ((USItype)(count)) \ : "r" ((USItype)(x))) -#if defined (__powerpc__) +#if defined (_ARCH_PPC) #define umul_ppmm(ph, pl, m0, m1) \ do { \ USItype __m0 = (m0), __m1 = (m1); \ @@ -785,16 +807,15 @@ extern USItype __udiv_qrnnd (); "g" ((USItype)(bh)), \ "1" ((USItype)(al)), \ "g" ((USItype)(bl))) -/* This insn doesn't work on ancient pyramids. */ +/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */ #define umul_ppmm(w1, w0, u, v) \ ({union {UDItype __ll; \ struct {USItype __h, __l;} __i; \ } __xx; \ - __xx.__i.__l = u; \ - __asm__ ("uemul %3,%0" \ - : "=r" (__xx.__i.__h), \ - "=r" (__xx.__i.__l) \ - : "1" (__xx.__i.__l), \ + __asm__ ("movw %1,%R0 + uemul %2,%0" \ + : "=&r" (__xx.__ll) \ + : "g" ((USItype) (u)), \ "g" ((USItype)(v))); \ (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) #endif /* __pyr__ */ @@ -868,6 +889,20 @@ extern USItype __udiv_qrnnd (); } while (0) #endif +#if defined (__sh2__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ( \ + "dmulu.l %2,%3 + sts macl,%1 + sts mach,%0" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "r" ((USItype)(u)), \ + "r" ((USItype)(v)) \ + : "macl", "mach") +#define UMUL_TIME 5 +#endif + #if defined (__sparc__) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("addcc %r4,%5,%1 @@ -901,17 +936,21 @@ extern USItype __udiv_qrnnd (); : "r" ((USItype)(u)), \ "r" ((USItype)(v))) #define UMUL_TIME 5 -/* We might want to leave this undefined for `SuperSPARC (tm)' since - its implementation is crippled and often traps. */ +#ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */ #define udiv_qrnnd(q, r, n1, n0, d) \ - __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\ - : "=&r" ((USItype)(q)), \ - "=&r" ((USItype)(r)) \ - : "r" ((USItype)(n1)), \ - "r" ((USItype)(n0)), \ - "r" ((USItype)(d))) + do { \ + USItype __q; \ + __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ + : "=r" ((USItype)(__q)) \ + : "r" ((USItype)(n1)), \ + "r" ((USItype)(n0)), \ + "r" ((USItype)(d))); \ + (r) = (n0) - __q * (d); \ + (q) = __q; \ + } while (0) #define UDIV_TIME 25 -#else +#endif /* SUPERSPARC */ +#else /* ! __sparc_v8__ */ #if defined (__sparclite__) /* This has hardware multiply but not divide. It also has two additional instructions scan (ffs from high bit) and divscc. */ @@ -973,9 +1012,10 @@ extern USItype __udiv_qrnnd (); __asm__ ("scan %1,0,%0" \ : "=r" ((USItype)(x)) \ : "r" ((USItype)(count))) -#else -/* SPARC without integer multiplication and divide instructions. - (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */ +#endif /* __sparclite__ */ +#endif /* __sparc_v8__ */ +/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ +#ifndef umul_ppmm #define umul_ppmm(w1, w0, u, v) \ __asm__ ("! Inlined umul_ppmm wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr @@ -1023,6 +1063,9 @@ extern USItype __udiv_qrnnd (); "r" ((USItype)(v)) \ : "%g1", "%g2" __AND_CLOBBER_CC) #define UMUL_TIME 39 /* 39 instructions */ +#endif +#ifndef udiv_qrnnd +#ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ do { USItype __r; \ (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ @@ -1030,8 +1073,8 @@ extern USItype __udiv_qrnnd (); } while (0) extern USItype __udiv_qrnnd (); #define UDIV_TIME 140 -#endif /* __sparclite__ */ -#endif /* __sparc_v8__ */ +#endif /* LONGLONG_STANDALONE */ +#endif /* udiv_qrnnd */ #endif /* __sparc__ */ #if defined (__vax__) && W_TYPE_SIZE == 32 @@ -1075,7 +1118,7 @@ extern USItype __udiv_qrnnd (); __xx.__i.__h = n1; __xx.__i.__l = n0; \ __asm__ ("ediv %3,%2,%0,%1" \ : "=g" (q), "=g" (r) \ - : "g" (__n1n0.ll), "g" (d)); \ + : "g" (__xx.ll), "g" (d)); \ } while (0) #endif /* __vax__ */ @@ -1173,11 +1216,12 @@ extern USItype __udiv_qrnnd (); do { \ UWtype __x0, __x1, __x2, __x3; \ UHWtype __ul, __vl, __uh, __vh; \ + UWtype __u = (u), __v = (v); \ \ - __ul = __ll_lowpart (u); \ - __uh = __ll_highpart (u); \ - __vl = __ll_lowpart (v); \ - __vh = __ll_highpart (v); \ + __ul = __ll_lowpart (__u); \ + __uh = __ll_highpart (__u); \ + __vl = __ll_lowpart (__v); \ + __vh = __ll_highpart (__v); \ \ __x0 = (UWtype) __ul * __vl; \ __x1 = (UWtype) __ul * __vh; \ @@ -1194,6 +1238,17 @@ extern USItype __udiv_qrnnd (); } while (0) #endif +#if !defined (umul_ppmm) +#define smul_ppmm(w1, w0, u, v) \ + do { \ + UWtype __w1; \ + UWtype __m0 = (u), __m1 = (v); \ + umul_ppmm (__w1, w0, __m0, __m1); \ + (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \ + - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \ + } while (0) +#endif + /* Define this unconditionally, so it can be used for debugging. */ #define __udiv_qrnnd_c(q, r, n1, n0, d) \ do { \ -- cgit v1.1