From 6b628d3634559ddd84148ed860a0e1e967b5d59c Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 1 Mar 1996 18:45:35 +0000 Subject: * stdlib/Makefile (mpn-stuff): New target. (copy-mpn): Use it. * Code copied from GMP updated to 1.937 version. * stdlib/strtod.c (HAVE_ALLOCA): Define this for gmp headers. --- sysdeps/alpha/addmul_1.s | 23 +-- sysdeps/alpha/alphaev5/add_n.s | 175 +++++++++------- sysdeps/alpha/alphaev5/lshift.s | 30 ++- sysdeps/alpha/alphaev5/rshift.s | 28 ++- sysdeps/alpha/alphaev5/sub_n.s | 148 ++++++++++++++ sysdeps/alpha/lshift.s | 14 +- sysdeps/alpha/mul_1.s | 2 +- sysdeps/alpha/rshift.s | 16 +- sysdeps/alpha/submul_1.s | 23 +-- sysdeps/alpha/udiv_qrnnd.S | 34 ++-- sysdeps/generic/add_n.c | 8 +- sysdeps/generic/addmul_1.c | 6 +- sysdeps/generic/cmp.c | 8 +- sysdeps/generic/divmod_1.c | 8 +- sysdeps/generic/lshift.c | 8 +- sysdeps/generic/mod_1.c | 8 +- sysdeps/generic/mul.c | 36 ++-- sysdeps/generic/mul_1.c | 6 +- sysdeps/generic/mul_n.c | 110 +++++------ sysdeps/generic/rshift.c | 8 +- sysdeps/generic/sub_n.c | 8 +- sysdeps/generic/submul_1.c | 6 +- sysdeps/i386/gmp-mparam.h | 2 +- sysdeps/i386/i586/add_n.S | 133 ++++++------- sysdeps/i386/i586/addmul_1.S | 34 ++-- sysdeps/i386/i586/mul_1.S | 20 +- sysdeps/i386/i586/sub_n.S | 133 ++++++------- sysdeps/i386/i586/submul_1.S | 16 +- sysdeps/m68k/add_n.S | 75 +++---- sysdeps/m68k/lshift.S | 150 ++++++++++++++ sysdeps/m68k/m68020/addmul_1.S | 75 +++---- sysdeps/m68k/m68020/mul_1.S | 83 ++++---- sysdeps/m68k/m68020/submul_1.S | 75 +++---- sysdeps/m68k/rshift.S | 149 ++++++++++++++ sysdeps/m68k/sub_n.S | 75 +++---- sysdeps/m88k/add_n.s | 2 +- sysdeps/m88k/m88110/add_n.S | 199 +++++++++++++++++++ sysdeps/m88k/m88110/addmul_1.s | 60 ++++++ sysdeps/m88k/m88110/mul_1.s | 28 +-- sysdeps/m88k/m88110/sub_n.S | 275 ++++++++++++++++++++++++++ sysdeps/m88k/mul_1.s | 74 ++++--- sysdeps/m88k/sub_n.s | 7 +- sysdeps/mips/addmul_1.s | 4 +- sysdeps/mips/mips3/addmul_1.s | 2 +- sysdeps/mips/mips3/mul_1.s | 2 +- sysdeps/mips/mips3/submul_1.s | 2 +- sysdeps/mips/mul_1.s | 4 +- sysdeps/mips/submul_1.s | 4 +- sysdeps/rs6000/add_n.s | 2 +- sysdeps/rs6000/sub_n.s | 2 +- sysdeps/sparc/add_n.S | 42 ++-- sysdeps/sparc/lshift.S | 6 +- sysdeps/sparc/rshift.S | 6 +- sysdeps/sparc/sparc64/add_n.s | 57 ++++++ sysdeps/sparc/sparc64/addmul_1.s | 88 +++++++++ sysdeps/sparc/sparc64/gmp-mparam.h | 26 +++ sysdeps/sparc/sparc64/lshift.s | 95 +++++++++ sysdeps/sparc/sparc64/mul_1.s | 85 ++++++++ sysdeps/sparc/sparc64/rshift.s | 92 +++++++++ sysdeps/sparc/sparc64/sub_n.s | 57 ++++++ sysdeps/sparc/sparc64/submul_1.s | 88 +++++++++ sysdeps/sparc/sparc8/addmul_1.S | 2 +- sysdeps/sparc/sparc8/mul_1.S | 2 +- sysdeps/sparc/sub_n.S | 12 +- sysdeps/unix/sysv/linux/m68k/profil-counter.h | 24 +++ sysdeps/vax/gmp-mparam.h | 2 +- sysdeps/z8000/mul_1.s | 2 +- 67 files changed, 2329 insertions(+), 757 deletions(-) create mode 100644 sysdeps/alpha/alphaev5/sub_n.s create mode 100644 sysdeps/m68k/lshift.S create mode 100644 sysdeps/m68k/rshift.S create mode 100644 sysdeps/m88k/m88110/add_n.S create mode 100644 sysdeps/m88k/m88110/addmul_1.s create mode 100644 sysdeps/m88k/m88110/sub_n.S create mode 100644 sysdeps/sparc/sparc64/add_n.s create mode 100644 sysdeps/sparc/sparc64/addmul_1.s create mode 100644 sysdeps/sparc/sparc64/gmp-mparam.h create mode 100644 sysdeps/sparc/sparc64/lshift.s create mode 100644 sysdeps/sparc/sparc64/mul_1.s create mode 100644 sysdeps/sparc/sparc64/rshift.s create mode 100644 sysdeps/sparc/sparc64/sub_n.s create mode 100644 sysdeps/sparc/sparc64/submul_1.s create mode 100644 sysdeps/unix/sysv/linux/m68k/profil-counter.h (limited to 'sysdeps') diff --git a/sysdeps/alpha/addmul_1.s b/sysdeps/alpha/addmul_1.s index 46d277d..8b168cb 100644 --- a/sysdeps/alpha/addmul_1.s +++ b/sysdeps/alpha/addmul_1.s @@ -26,16 +26,7 @@ # size r18 # s2_limb r19 - # This code runs at 42 cycles/limb on the 21064. - - # To improve performance for long multiplications, we would use - # 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use - # these instructions without slowing down the general code: 1. We can - # only have two prefetches in operation at any time in the Alpha - # architecture. 2. There will seldom be any special alignment - # between RES_PTR and S1_PTR. Maybe we can simply divide the current - # loop into an inner and outer loop, having the inner loop handle - # exactly one prefetch block? + # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5. .set noreorder .set noat @@ -52,7 +43,7 @@ __mpn_addmul_1: mulq $2,$19,$3 # $3 = prod_low ldq $5,0($16) # $5 = *res_ptr umulh $2,$19,$0 # $0 = prod_high - beq $18,Lend1 # jump if size was == 1 + beq $18,.Lend1 # jump if size was == 1 ldq $2,0($17) # $2 = s1_limb addq $17,8,$17 # s1_ptr++ subq $18,1,$18 # size-- @@ -60,10 +51,10 @@ __mpn_addmul_1: cmpult $3,$5,$4 stq $3,0($16) addq $16,8,$16 # res_ptr++ - beq $18,Lend2 # jump if size was == 2 + beq $18,.Lend2 # jump if size was == 2 .align 3 -Loop: mulq $2,$19,$3 # $3 = prod_low +.Loop: mulq $2,$19,$3 # $3 = prod_low ldq $5,0($16) # $5 = *res_ptr addq $4,$0,$0 # cy_limb = cy_limb + 'cy' subq $18,1,$18 # size-- @@ -77,9 +68,9 @@ Loop: mulq $2,$19,$3 # $3 = prod_low stq $3,0($16) addq $16,8,$16 # res_ptr++ addq $5,$0,$0 # combine carries - bne $18,Loop + bne $18,.Loop -Lend2: mulq $2,$19,$3 # $3 = prod_low +.Lend2: mulq $2,$19,$3 # $3 = prod_low ldq $5,0($16) # $5 = *res_ptr addq $4,$0,$0 # cy_limb = cy_limb + 'cy' umulh $2,$19,$4 # $4 = cy_limb @@ -91,7 +82,7 @@ Lend2: mulq $2,$19,$3 # $3 = prod_low addq $5,$0,$0 # combine carries addq $4,$0,$0 # cy_limb = prod_high + cy ret $31,($26),1 -Lend1: addq $5,$3,$3 +.Lend1: addq $5,$3,$3 cmpult $3,$5,$5 stq $3,0($16) addq $0,$5,$0 diff --git a/sysdeps/alpha/alphaev5/add_n.s b/sysdeps/alpha/alphaev5/add_n.s index 2aaf041..66cf82b 100644 --- a/sysdeps/alpha/alphaev5/add_n.s +++ b/sysdeps/alpha/alphaev5/add_n.s @@ -35,84 +35,113 @@ __mpn_add_n: .frame $30,0,$26,0 - ldq $3,0($17) - ldq $4,0($18) - - subq $19,1,$19 - and $19,4-1,$2 # number of limbs in first loop - bis $31,$31,$0 - beq $2,.L0 # if multiple of 4 limbs, skip first loop - - subq $19,$2,$19 - -.Loop0: subq $2,1,$2 + or $31,$31,$25 # clear cy + subq $19,4,$19 # decr loop cnt + blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop + # Start software pipeline for 1st loop + ldq $0,0($18) + ldq $1,8($18) + ldq $4,0($17) ldq $5,8($17) - addq $4,$0,$4 - ldq $6,8($18) - cmpult $4,$0,$1 - addq $3,$4,$4 - cmpult $4,$3,$0 - stq $4,0($16) - or $0,$1,$0 - - addq $17,8,$17 - addq $18,8,$18 - bis $5,$5,$3 - bis $6,$6,$4 - addq $16,8,$16 - bne $2,.Loop0 - -.L0: beq $19,.Lend - + addq $17,32,$17 # update s1_ptr + ldq $2,16($18) + addq $0,$4,$20 # 1st main add + ldq $3,24($18) + subq $19,4,$19 # decr loop cnt + ldq $6,-16($17) + cmpult $20,$0,$25 # compute cy from last add + ldq $7,-8($17) + addq $1,$25,$28 # cy add + addq $18,32,$18 # update s2_ptr + addq $5,$28,$21 # 2nd main add + cmpult $28,$25,$8 # compute cy from last add + blt $19,.Lend1 # if less than 4 limbs remain, jump + # 1st loop handles groups of 4 limbs in a software pipeline .align 4 -.Loop: subq $19,4,$19 - unop - - ldq $6,8($18) - addq $4,$0,$0 +.Loop: cmpult $21,$28,$25 # compute cy from last add + ldq $0,0($18) + or $8,$25,$25 # combine cy from the two adds + ldq $1,8($18) + addq $2,$25,$28 # cy add + ldq $4,0($17) + addq $28,$6,$22 # 3rd main add ldq $5,8($17) - cmpult $0,$4,$1 - ldq $4,16($18) - addq $3,$0,$20 - cmpult $20,$3,$0 - ldq $3,16($17) - or $0,$1,$0 - addq $6,$0,$0 - cmpult $0,$6,$1 - ldq $6,24($18) - addq $5,$0,$21 - cmpult $21,$5,$0 - ldq $5,24($17) - or $0,$1,$0 - addq $4,$0,$0 - cmpult $0,$4,$1 - ldq $4,32($18) - addq $3,$0,$22 - cmpult $22,$3,$0 - ldq $3,32($17) - or $0,$1,$0 - addq $6,$0,$0 - cmpult $0,$6,$1 - addq $5,$0,$23 - cmpult $23,$5,$0 - or $0,$1,$0 - + cmpult $28,$25,$8 # compute cy from last add + cmpult $22,$28,$25 # compute cy from last add stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds stq $21,8($16) - stq $22,16($16) - stq $23,24($16) - - addq $17,32,$17 - addq $18,32,$18 - addq $16,32,$16 - bne $19,.Loop + addq $3,$25,$28 # cy add + addq $28,$7,$23 # 4th main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $23,$28,$25 # compute cy from last add + addq $17,32,$17 # update s1_ptr + or $8,$25,$25 # combine cy from the two adds + addq $16,32,$16 # update res_ptr + addq $0,$25,$28 # cy add + ldq $2,16($18) + addq $4,$28,$20 # 1st main add + ldq $3,24($18) + cmpult $28,$25,$8 # compute cy from last add + ldq $6,-16($17) + cmpult $20,$28,$25 # compute cy from last add + ldq $7,-8($17) + or $8,$25,$25 # combine cy from the two adds + subq $19,4,$19 # decr loop cnt + stq $22,-16($16) + addq $1,$25,$28 # cy add + stq $23,-8($16) + addq $5,$28,$21 # 2nd main add + addq $18,32,$18 # update s2_ptr + cmpult $28,$25,$8 # compute cy from last add + bge $19,.Loop + # Finish software pipeline for 1st loop +.Lend1: cmpult $21,$28,$25 # compute cy from last add + or $8,$25,$25 # combine cy from the two adds + addq $2,$25,$28 # cy add + addq $28,$6,$22 # 3rd main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $22,$28,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + stq $21,8($16) + addq $3,$25,$28 # cy add + addq $28,$7,$23 # 4th main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $23,$28,$25 # compute cy from last add + or $8,$25,$25 # combine cy from the two adds + addq $16,32,$16 # update res_ptr + stq $22,-16($16) + stq $23,-8($16) +.Lend2: addq $19,4,$19 # restore loop cnt + beq $19,.Lret + # Start software pipeline for 2nd loop + ldq $0,0($18) + ldq $4,0($17) + subq $19,1,$19 + beq $19,.Lend0 + # 2nd loop handles remaining 1-3 limbs + .align 4 +.Loop0: addq $0,$25,$28 # cy add + ldq $0,8($18) + addq $4,$28,$20 # main add + ldq $4,8($17) + addq $18,8,$18 + cmpult $28,$25,$8 # compute cy from last add + addq $17,8,$17 + stq $20,0($16) + cmpult $20,$28,$25 # compute cy from last add + subq $19,1,$19 # decr loop cnt + or $8,$25,$25 # combine cy from the two adds + addq $16,8,$16 + bne $19,.Loop0 +.Lend0: addq $0,$25,$28 # cy add + addq $4,$28,$20 # main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $20,$28,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds -.Lend: addq $4,$0,$4 - cmpult $4,$0,$1 - addq $3,$4,$4 - cmpult $4,$3,$0 - stq $4,0($16) - or $0,$1,$0 +.Lret: or $25,$31,$0 # return cy ret $31,($26),1 - .end __mpn_add_n diff --git a/sysdeps/alpha/alphaev5/lshift.s b/sysdeps/alpha/alphaev5/lshift.s index fdb0895..392b424 100644 --- a/sysdeps/alpha/alphaev5/lshift.s +++ b/sysdeps/alpha/alphaev5/lshift.s @@ -25,7 +25,7 @@ # size r18 # cnt r19 - # This code runs at 4.25 cycles/limb on the EV5. + # This code runs at 3.25 cycles/limb on the EV5. .set noreorder .set noat @@ -44,11 +44,11 @@ __mpn_lshift: and $18,4-1,$28 # number of limbs in first loop srl $4,$20,$0 # compute function result - beq $28,L0 + beq $28,.L0 subq $18,$28,$18 .align 3 -Loop0: ldq $3,-16($17) +.Loop0: ldq $3,-16($17) subq $16,8,$16 sll $4,$19,$5 subq $17,8,$17 @@ -57,17 +57,17 @@ Loop0: ldq $3,-16($17) or $3,$3,$4 or $5,$6,$8 stq $8,0($16) - bne $28,Loop0 + bne $28,.Loop0 -L0: sll $4,$19,$24 - beq $18,Lend +.L0: sll $4,$19,$24 + beq $18,.Lend # warm up phase 1 ldq $1,-16($17) subq $18,4,$18 ldq $2,-24($17) ldq $3,-32($17) ldq $4,-40($17) - beq $18,Lcool1 + beq $18,.Lend1 # warm up phase 2 srl $1,$20,$7 sll $1,$19,$21 @@ -84,10 +84,10 @@ L0: sll $4,$19,$24 sll $4,$19,$24 ldq $4,-72($17) subq $18,4,$18 - beq $18,Lcool1 + beq $18,.Lend2 .align 4 # main loop -Loop: stq $7,-8($16) +.Loop: stq $7,-8($16) or $5,$22,$5 stq $8,-16($16) or $6,$23,$6 @@ -113,16 +113,14 @@ Loop: stq $7,-8($16) subq $16,32,$16 srl $4,$20,$6 - ldq $3,-96($17 + ldq $3,-96($17) sll $4,$19,$24 ldq $4,-104($17) subq $17,32,$17 - bne $18,Loop - unop - unop + bne $18,.Loop # cool down phase 2/1 -Lcool1: stq $7,-8($16) +.Lend2: stq $7,-8($16) or $5,$22,$5 stq $8,-16($16) or $6,$23,$6 @@ -150,7 +148,7 @@ Lcool1: stq $7,-8($16) ret $31,($26),1 # cool down phase 1/1 -Lcool1: srl $1,$20,$7 +.Lend1: srl $1,$20,$7 sll $1,$19,$21 srl $2,$20,$8 sll $2,$19,$22 @@ -170,6 +168,6 @@ Lcool1: srl $1,$20,$7 stq $24,-40($16) ret $31,($26),1 -Lend stq $24,-8($16) +.Lend: stq $24,-8($16) ret $31,($26),1 .end __mpn_lshift diff --git a/sysdeps/alpha/alphaev5/rshift.s b/sysdeps/alpha/alphaev5/rshift.s index 1da9960..d20dde3 100644 --- a/sysdeps/alpha/alphaev5/rshift.s +++ b/sysdeps/alpha/alphaev5/rshift.s @@ -25,7 +25,7 @@ # size r18 # cnt r19 - # This code runs at 4.25 cycles/limb on the EV5. + # This code runs at 3.25 cycles/limb on the EV5. .set noreorder .set noat @@ -42,11 +42,11 @@ __mpn_rshift: and $18,4-1,$28 # number of limbs in first loop sll $4,$20,$0 # compute function result - beq $28,L0 + beq $28,.L0 subq $18,$28,$18 .align 3 -Loop0: ldq $3,8($17) +.Loop0: ldq $3,8($17) addq $16,8,$16 srl $4,$19,$5 addq $17,8,$17 @@ -55,17 +55,17 @@ Loop0: ldq $3,8($17) or $3,$3,$4 or $5,$6,$8 stq $8,-8($16) - bne $28,Loop0 + bne $28,.Loop0 -L0: srl $4,$19,$24 - beq $18,Lend +.L0: srl $4,$19,$24 + beq $18,.Lend # warm up phase 1 ldq $1,8($17) subq $18,4,$18 ldq $2,16($17) ldq $3,24($17) ldq $4,32($17) - beq $18,Lcool1 + beq $18,.Lend1 # warm up phase 2 sll $1,$20,$7 srl $1,$19,$21 @@ -82,10 +82,10 @@ L0: srl $4,$19,$24 srl $4,$19,$24 ldq $4,64($17) subq $18,4,$18 - beq $18,Lcool2 + beq $18,.Lend2 .align 4 # main loop -Loop: stq $7,0($16) +.Loop: stq $7,0($16) or $5,$22,$5 stq $8,8($16) or $6,$23,$6 @@ -116,11 +116,9 @@ Loop: stq $7,0($16) ldq $4,96($17) addq $17,32,$17 - bne $18,Loop - unop - unop + bne $18,.Loop # cool down phase 2/1 -Lcool2: stq $7,0($16) +.Lend2: stq $7,0($16) or $5,$22,$5 stq $8,8($16) or $6,$23,$6 @@ -148,7 +146,7 @@ Lcool2: stq $7,0($16) ret $31,($26),1 # cool down phase 1/1 -Lcool1: sll $1,$20,$7 +.Lend1: sll $1,$20,$7 srl $1,$19,$21 sll $2,$20,$8 srl $2,$19,$22 @@ -168,6 +166,6 @@ Lcool1: sll $1,$20,$7 stq $24,32($16) ret $31,($26),1 -Lend: stq $24,0($16) +.Lend: stq $24,0($16) ret $31,($26),1 .end __mpn_rshift diff --git a/sysdeps/alpha/alphaev5/sub_n.s b/sysdeps/alpha/alphaev5/sub_n.s new file mode 100644 index 0000000..c9f3a4e --- /dev/null +++ b/sysdeps/alpha/alphaev5/sub_n.s @@ -0,0 +1,148 @@ + # Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and + # store difference in a third limb vector. + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + + # INPUT PARAMETERS + # res_ptr $16 + # s1_ptr $17 + # s2_ptr $18 + # size $19 + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_sub_n + .ent __mpn_sub_n +__mpn_sub_n: + .frame $30,0,$26,0 + + or $31,$31,$25 # clear cy + subq $19,4,$19 # decr loop cnt + blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop + # Start software pipeline for 1st loop + ldq $0,0($18) + ldq $1,8($18) + ldq $4,0($17) + ldq $5,8($17) + addq $17,32,$17 # update s1_ptr + ldq $2,16($18) + subq $4,$0,$20 # 1st main sub + ldq $3,24($18) + subq $19,4,$19 # decr loop cnt + ldq $6,-16($17) + cmpult $4,$20,$25 # compute cy from last sub + ldq $7,-8($17) + addq $1,$25,$28 # cy add + addq $18,32,$18 # update s2_ptr + subq $5,$28,$21 # 2nd main sub + cmpult $28,$25,$8 # compute cy from last add + blt $19,.Lend1 # if less than 4 limbs remain, jump + # 1st loop handles groups of 4 limbs in a software pipeline + .align 4 +.Loop: cmpult $5,$21,$25 # compute cy from last add + ldq $0,0($18) + or $8,$25,$25 # combine cy from the two adds + ldq $1,8($18) + addq $2,$25,$28 # cy add + ldq $4,0($17) + subq $6,$28,$22 # 3rd main sub + ldq $5,8($17) + cmpult $28,$25,$8 # compute cy from last add + cmpult $6,$22,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + stq $21,8($16) + addq $3,$25,$28 # cy add + subq $7,$28,$23 # 4th main sub + cmpult $28,$25,$8 # compute cy from last add + cmpult $7,$23,$25 # compute cy from last add + addq $17,32,$17 # update s1_ptr + or $8,$25,$25 # combine cy from the two adds + addq $16,32,$16 # update res_ptr + addq $0,$25,$28 # cy add + ldq $2,16($18) + subq $4,$28,$20 # 1st main sub + ldq $3,24($18) + cmpult $28,$25,$8 # compute cy from last add + ldq $6,-16($17) + cmpult $4,$20,$25 # compute cy from last add + ldq $7,-8($17) + or $8,$25,$25 # combine cy from the two adds + subq $19,4,$19 # decr loop cnt + stq $22,-16($16) + addq $1,$25,$28 # cy add + stq $23,-8($16) + subq $5,$28,$21 # 2nd main sub + addq $18,32,$18 # update s2_ptr + cmpult $28,$25,$8 # compute cy from last add + bge $19,.Loop + # Finish software pipeline for 1st loop +.Lend1: cmpult $5,$21,$25 # compute cy from last add + or $8,$25,$25 # combine cy from the two adds + addq $2,$25,$28 # cy add + subq $6,$28,$22 # 3rd main sub + cmpult $28,$25,$8 # compute cy from last add + cmpult $6,$22,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + stq $21,8($16) + addq $3,$25,$28 # cy add + subq $7,$28,$23 # 4th main sub + cmpult $28,$25,$8 # compute cy from last add + cmpult $7,$23,$25 # compute cy from last add + or $8,$25,$25 # combine cy from the two adds + addq $16,32,$16 # update res_ptr + stq $22,-16($16) + stq $23,-8($16) +.Lend2: addq $19,4,$19 # restore loop cnt + beq $19,.Lret + # Start software pipeline for 2nd loop + ldq $0,0($18) + ldq $4,0($17) + subq $19,1,$19 + beq $19,.Lend0 + # 2nd loop handles remaining 1-3 limbs + .align 4 +.Loop0: addq $0,$25,$28 # cy add + ldq $0,8($18) + subq $4,$28,$20 # main sub + ldq $1,8($17) + addq $18,8,$18 + cmpult $28,$25,$8 # compute cy from last add + addq $17,8,$17 + stq $20,0($16) + cmpult $4,$20,$25 # compute cy from last add + subq $19,1,$19 # decr loop cnt + or $8,$25,$25 # combine cy from the two adds + addq $16,8,$16 + or $1,$31,$4 + bne $19,.Loop0 +.Lend0: addq $0,$25,$28 # cy add + subq $4,$28,$20 # main sub + cmpult $28,$25,$8 # compute cy from last add + cmpult $4,$20,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + +.Lret: or $25,$31,$0 # return cy + ret $31,($26),1 + .end __mpn_sub_n diff --git a/sysdeps/alpha/lshift.s b/sysdeps/alpha/lshift.s index c284349..aa8417b 100644 --- a/sysdeps/alpha/lshift.s +++ b/sysdeps/alpha/lshift.s @@ -53,11 +53,11 @@ __mpn_lshift: and $18,4-1,$20 # number of limbs in first loop srl $4,$7,$0 # compute function result - beq $20,L0 + beq $20,.L0 subq $18,$20,$18 .align 3 -Loop0: +.Loop0: ldq $3,-8($17) subq $16,8,$16 subq $17,8,$17 @@ -67,12 +67,12 @@ Loop0: bis $3,$3,$4 bis $5,$6,$8 stq $8,0($16) - bne $20,Loop0 + bne $20,.Loop0 -L0: beq $18,Lend +.L0: beq $18,.Lend .align 3 -Loop: ldq $3,-8($17) +.Loop: ldq $3,-8($17) subq $16,32,$16 subq $18,4,$18 sll $4,$19,$5 @@ -100,9 +100,9 @@ Loop: ldq $3,-8($17) bis $1,$2,$8 stq $8,0($16) - bgt $18,Loop + bgt $18,.Loop -Lend: sll $4,$19,$8 +.Lend: sll $4,$19,$8 stq $8,-8($16) ret $31,($26),1 .end __mpn_lshift diff --git a/sysdeps/alpha/mul_1.s b/sysdeps/alpha/mul_1.s index 3ef194d..58a63df 100644 --- a/sysdeps/alpha/mul_1.s +++ b/sysdeps/alpha/mul_1.s @@ -1,7 +1,7 @@ # Alpha 21064 __mpn_mul_1 -- Multiply a limb vector with a limb and store # the result in a second limb vector. - # Copyright (C) 1992, 1994 Free Software Foundation, Inc. + # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. # This file is part of the GNU MP Library. diff --git a/sysdeps/alpha/rshift.s b/sysdeps/alpha/rshift.s index 74eab04..037b776 100644 --- a/sysdeps/alpha/rshift.s +++ b/sysdeps/alpha/rshift.s @@ -34,7 +34,7 @@ # 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay. # 2. Only aligned instruction pairs can be paired. # 3. The store buffer or silo might not be able to deal with the bandwidth. - + .set noreorder .set noat .text @@ -51,11 +51,11 @@ __mpn_rshift: and $18,4-1,$20 # number of limbs in first loop sll $4,$7,$0 # compute function result - beq $20,L0 + beq $20,.L0 subq $18,$20,$18 .align 3 -Loop0: +.Loop0: ldq $3,0($17) addq $16,8,$16 addq $17,8,$17 @@ -65,12 +65,12 @@ Loop0: bis $3,$3,$4 bis $5,$6,$8 stq $8,-8($16) - bne $20,Loop0 + bne $20,.Loop0 -L0: beq $18,Lend +.L0: beq $18,.Lend .align 3 -Loop: ldq $3,0($17) +.Loop: ldq $3,0($17) addq $16,32,$16 subq $18,4,$18 srl $4,$19,$5 @@ -98,9 +98,9 @@ Loop: ldq $3,0($17) bis $1,$2,$8 stq $8,-8($16) - bgt $18,Loop + bgt $18,.Loop -Lend: srl $4,$19,$8 +.Lend: srl $4,$19,$8 stq $8,0($16) ret $31,($26),1 .end __mpn_rshift diff --git a/sysdeps/alpha/submul_1.s b/sysdeps/alpha/submul_1.s index acaa11c..292b2c1 100644 --- a/sysdeps/alpha/submul_1.s +++ b/sysdeps/alpha/submul_1.s @@ -26,16 +26,7 @@ # size r18 # s2_limb r19 - # This code runs at 42 cycles/limb on the 21064. - - # To improve performance for long multiplications, we would use - # 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use - # these instructions without slowing down the general code: 1. We can - # only have two prefetches in operation at any time in the Alpha - # architecture. 2. There will seldom be any special alignment - # between RES_PTR and S1_PTR. Maybe we can simply divide the current - # loop into an inner and outer loop, having the inner loop handle - # exactly one prefetch block? + # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5. .set noreorder .set noat @@ -52,7 +43,7 @@ __mpn_submul_1: mulq $2,$19,$3 # $3 = prod_low ldq $5,0($16) # $5 = *res_ptr umulh $2,$19,$0 # $0 = prod_high - beq $18,Lend1 # jump if size was == 1 + beq $18,.Lend1 # jump if size was == 1 ldq $2,0($17) # $2 = s1_limb addq $17,8,$17 # s1_ptr++ subq $18,1,$18 # size-- @@ -60,10 +51,10 @@ __mpn_submul_1: cmpult $5,$3,$4 stq $3,0($16) addq $16,8,$16 # res_ptr++ - beq $18,Lend2 # jump if size was == 2 + beq $18,.Lend2 # jump if size was == 2 .align 3 -Loop: mulq $2,$19,$3 # $3 = prod_low +.Loop: mulq $2,$19,$3 # $3 = prod_low ldq $5,0($16) # $5 = *res_ptr addq $4,$0,$0 # cy_limb = cy_limb + 'cy' subq $18,1,$18 # size-- @@ -77,9 +68,9 @@ Loop: mulq $2,$19,$3 # $3 = prod_low stq $3,0($16) addq $16,8,$16 # res_ptr++ addq $5,$0,$0 # combine carries - bne $18,Loop + bne $18,.Loop -Lend2: mulq $2,$19,$3 # $3 = prod_low +.Lend2: mulq $2,$19,$3 # $3 = prod_low ldq $5,0($16) # $5 = *res_ptr addq $4,$0,$0 # cy_limb = cy_limb + 'cy' umulh $2,$19,$4 # $4 = cy_limb @@ -91,7 +82,7 @@ Lend2: mulq $2,$19,$3 # $3 = prod_low addq $5,$0,$0 # combine carries addq $4,$0,$0 # cy_limb = prod_high + cy ret $31,($26),1 -Lend1: subq $5,$3,$3 +.Lend1: subq $5,$3,$3 cmpult $5,$3,$5 stq $3,0($16) addq $0,$5,$0 diff --git a/sysdeps/alpha/udiv_qrnnd.S b/sysdeps/alpha/udiv_qrnnd.S index bafafd6..ce590ed 100644 --- a/sysdeps/alpha/udiv_qrnnd.S +++ b/sysdeps/alpha/udiv_qrnnd.S @@ -1,6 +1,6 @@ # Alpha 21064 __udiv_qrnnd - # Copyright (C) 1992, 1994 Free Software Foundation, Inc. + # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. # This file is part of the GNU MP Library. @@ -21,13 +21,11 @@ .set noreorder .set noat - .text - .align 3 - .globl __udiv_qrnnd - .ent __udiv_qrnnd 0 + .align 3 + .globl __udiv_qrnnd + .ent __udiv_qrnnd __udiv_qrnnd: -__udiv_qrnnd..ng: .frame $30,0,$26,0 .prologue 0 #define cnt $2 @@ -39,9 +37,9 @@ __udiv_qrnnd..ng: #define qb $20 ldiq cnt,16 - blt d,Largedivisor + blt d,.Largedivisor -Loop1: cmplt n0,0,tmp +.Loop1: cmplt n0,0,tmp addq n1,n1,n1 bis n1,tmp,n1 addq n0,n0,n0 @@ -74,12 +72,12 @@ Loop1: cmplt n0,0,tmp cmovne qb,tmp,n1 bis n0,qb,n0 subq cnt,1,cnt - bgt cnt,Loop1 + bgt cnt,.Loop1 stq n1,0(rem_ptr) bis $31,n0,$0 ret $31,($26),1 -Largedivisor: +.Largedivisor: and n0,1,$4 srl n0,1,n0 @@ -91,7 +89,7 @@ Largedivisor: srl d,1,$5 addq $5,$6,$5 -Loop2: cmplt n0,0,tmp +.Loop2: cmplt n0,0,tmp addq n1,n1,n1 bis n1,tmp,n1 addq n0,n0,n0 @@ -124,27 +122,27 @@ Loop2: cmplt n0,0,tmp cmovne qb,tmp,n1 bis n0,qb,n0 subq cnt,1,cnt - bgt cnt,Loop2 + bgt cnt,.Loop2 addq n1,n1,n1 addq $4,n1,n1 - bne $6,Odd + bne $6,.LOdd stq n1,0(rem_ptr) bis $31,n0,$0 ret $31,($26),1 -Odd: +.LOdd: /* q' in n0. r' in n1 */ addq n1,n0,n1 cmpult n1,n0,tmp # tmp := carry from addq - beq tmp,LLp6 + beq tmp,.LLp6 addq n0,1,n0 subq n1,d,n1 -LLp6: cmpult n1,d,tmp - bne tmp,LLp7 +.LLp6: cmpult n1,d,tmp + bne tmp,.LLp7 addq n0,1,n0 subq n1,d,n1 -LLp7: +.LLp7: stq n1,0(rem_ptr) bis $31,n0,$0 ret $31,($26),1 diff --git a/sysdeps/generic/add_n.c b/sysdeps/generic/add_n.c index 6989ab0..647548d 100644 --- a/sysdeps/generic/add_n.c +++ b/sysdeps/generic/add_n.c @@ -1,6 +1,6 @@ -/* __mpn_add_n -- Add two limb vectors of equal, non-zero length. +/* mpn_add_n -- Add two limb vectors of equal, non-zero length. -Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -23,9 +23,9 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ mp_limb #if __STDC__ -__mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size) +mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size) #else -__mpn_add_n (res_ptr, s1_ptr, s2_ptr, size) +mpn_add_n (res_ptr, s1_ptr, s2_ptr, size) register mp_ptr res_ptr; register mp_srcptr s1_ptr; register mp_srcptr s2_ptr; diff --git a/sysdeps/generic/addmul_1.c b/sysdeps/generic/addmul_1.c index fdf3541..6156cab 100644 --- a/sysdeps/generic/addmul_1.c +++ b/sysdeps/generic/addmul_1.c @@ -1,9 +1,9 @@ -/* __mpn_addmul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR +/* mpn_addmul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR by S2_LIMB, add the S1_SIZE least significant limbs of the product to the limb vector pointed to by RES_PTR. Return the most significant limb of the product, adjusted for carry-out from the addition. -Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -26,7 +26,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "longlong.h" mp_limb -__mpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb) +mpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb) register mp_ptr res_ptr; register mp_srcptr s1_ptr; mp_size_t s1_size; diff --git a/sysdeps/generic/cmp.c b/sysdeps/generic/cmp.c index 144c885..e499b1e 100644 --- a/sysdeps/generic/cmp.c +++ b/sysdeps/generic/cmp.c @@ -1,6 +1,6 @@ -/* __mpn_cmp -- Compare two low-level natural-number integers. +/* mpn_cmp -- Compare two low-level natural-number integers. -Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -28,9 +28,9 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ int #if __STDC__ -__mpn_cmp (mp_srcptr op1_ptr, mp_srcptr op2_ptr, mp_size_t size) +mpn_cmp (mp_srcptr op1_ptr, mp_srcptr op2_ptr, mp_size_t size) #else -__mpn_cmp (op1_ptr, op2_ptr, size) +mpn_cmp (op1_ptr, op2_ptr, size) mp_srcptr op1_ptr; mp_srcptr op2_ptr; mp_size_t size; diff --git a/sysdeps/generic/divmod_1.c b/sysdeps/generic/divmod_1.c index 2989d36..c040327 100644 --- a/sysdeps/generic/divmod_1.c +++ b/sysdeps/generic/divmod_1.c @@ -1,4 +1,4 @@ -/* __mpn_divmod_1(quot_ptr, dividend_ptr, dividend_size, divisor_limb) -- +/* mpn_divmod_1(quot_ptr, dividend_ptr, dividend_size, divisor_limb) -- Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR. Return the single-limb remainder. @@ -6,7 +6,7 @@ QUOT_PTR and DIVIDEND_PTR might point to the same limb. -Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -41,11 +41,11 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ mp_limb #if __STDC__ -__mpn_divmod_1 (mp_ptr quot_ptr, +mpn_divmod_1 (mp_ptr quot_ptr, mp_srcptr dividend_ptr, mp_size_t dividend_size, mp_limb divisor_limb) #else -__mpn_divmod_1 (quot_ptr, dividend_ptr, dividend_size, divisor_limb) +mpn_divmod_1 (quot_ptr, dividend_ptr, dividend_size, divisor_limb) mp_ptr quot_ptr; mp_srcptr dividend_ptr; mp_size_t dividend_size; diff --git a/sysdeps/generic/lshift.c b/sysdeps/generic/lshift.c index 1ba0903..35794e4 100644 --- a/sysdeps/generic/lshift.c +++ b/sysdeps/generic/lshift.c @@ -1,6 +1,6 @@ -/* __mpn_lshift -- Shift left low level. +/* mpn_lshift -- Shift left low level. -Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -32,11 +32,11 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ mp_limb #if __STDC__ -__mpn_lshift (register mp_ptr wp, +mpn_lshift (register mp_ptr wp, register mp_srcptr up, mp_size_t usize, register unsigned int cnt) #else -__mpn_lshift (wp, up, usize, cnt) +mpn_lshift (wp, up, usize, cnt) register mp_ptr wp; register mp_srcptr up; mp_size_t usize; diff --git a/sysdeps/generic/mod_1.c b/sysdeps/generic/mod_1.c index 8a49fb4..0842f6b 100644 --- a/sysdeps/generic/mod_1.c +++ b/sysdeps/generic/mod_1.c @@ -1,4 +1,4 @@ -/* __mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) -- +/* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) -- Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. Return the single-limb remainder. There are no constraints on the value of the divisor. @@ -38,10 +38,10 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ mp_limb #if __STDC__ -__mpn_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size, - mp_limb divisor_limb) +mpn_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size, + mp_limb divisor_limb) #else -__mpn_mod_1 (dividend_ptr, dividend_size, divisor_limb) +mpn_mod_1 (dividend_ptr, dividend_size, divisor_limb) mp_srcptr dividend_ptr; mp_size_t dividend_size; mp_limb divisor_limb; diff --git a/sysdeps/generic/mul.c b/sysdeps/generic/mul.c index cd2acb5..3f3f41e 100644 --- a/sysdeps/generic/mul.c +++ b/sysdeps/generic/mul.c @@ -1,6 +1,6 @@ -/* __mpn_mul -- Multiply two natural numbers. +/* mpn_mul -- Multiply two natural numbers. -Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -43,11 +43,11 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ mp_limb #if __STDC__ -__mpn_mul (mp_ptr prodp, - mp_srcptr up, mp_size_t usize, - mp_srcptr vp, mp_size_t vsize) +mpn_mul (mp_ptr prodp, + mp_srcptr up, mp_size_t usize, + mp_srcptr vp, mp_size_t vsize) #else -__mpn_mul (prodp, up, usize, vp, vsize) +mpn_mul (prodp, up, usize, vp, vsize) mp_ptr prodp; mp_srcptr up; mp_size_t usize; @@ -58,6 +58,7 @@ __mpn_mul (prodp, up, usize, vp, vsize) mp_ptr prod_endp = prodp + usize + vsize - 1; mp_limb cy; mp_ptr tspace; + TMP_DECL (marker); if (vsize < KARATSUBA_THRESHOLD) { @@ -86,7 +87,7 @@ __mpn_mul (prodp, up, usize, vp, vsize) cy_limb = 0; } else - cy_limb = __mpn_mul_1 (prodp, up, usize, v_limb); + cy_limb = mpn_mul_1 (prodp, up, usize, v_limb); prodp[usize] = cy_limb; prodp++; @@ -100,10 +101,10 @@ __mpn_mul (prodp, up, usize, vp, vsize) { cy_limb = 0; if (v_limb == 1) - cy_limb = __mpn_add_n (prodp, prodp, up, usize); + cy_limb = mpn_add_n (prodp, prodp, up, usize); } else - cy_limb = __mpn_addmul_1 (prodp, up, usize, v_limb); + cy_limb = mpn_addmul_1 (prodp, up, usize, v_limb); prodp[usize] = cy_limb; prodp++; @@ -111,7 +112,9 @@ __mpn_mul (prodp, up, usize, vp, vsize) return cy_limb; } - tspace = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB); + TMP_MARK (marker); + + tspace = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB); MPN_MUL_N_RECURSE (prodp, up, vp, vsize, tspace); prodp += vsize; @@ -119,12 +122,12 @@ __mpn_mul (prodp, up, usize, vp, vsize) usize -= vsize; if (usize >= vsize) { - mp_ptr tp = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB); + mp_ptr tp = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB); do { MPN_MUL_N_RECURSE (tp, up, vp, vsize, tspace); - cy = __mpn_add_n (prodp, prodp, tp, vsize); - __mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy); + cy = mpn_add_n (prodp, prodp, tp, vsize); + mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy); prodp += vsize; up += vsize; usize -= vsize; @@ -138,10 +141,11 @@ __mpn_mul (prodp, up, usize, vp, vsize) if (usize != 0) { - __mpn_mul (tspace, vp, vsize, up, usize); - cy = __mpn_add_n (prodp, prodp, tspace, vsize); - __mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy); + mpn_mul (tspace, vp, vsize, up, usize); + cy = mpn_add_n (prodp, prodp, tspace, vsize); + mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy); } + TMP_FREE (marker); return *prod_endp; } diff --git a/sysdeps/generic/mul_1.c b/sysdeps/generic/mul_1.c index 37dbc33..01fdbbb 100644 --- a/sysdeps/generic/mul_1.c +++ b/sysdeps/generic/mul_1.c @@ -1,7 +1,7 @@ -/* __mpn_mul_1 -- Multiply a limb vector with a single limb and +/* mpn_mul_1 -- Multiply a limb vector with a single limb and store the product in a second limb vector. -Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc. +Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -24,7 +24,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "longlong.h" mp_limb -__mpn_mul_1 (res_ptr, s1_ptr, s1_size, s2_limb) +mpn_mul_1 (res_ptr, s1_ptr, s1_size, s2_limb) register mp_ptr res_ptr; register mp_srcptr s1_ptr; mp_size_t s1_size; diff --git a/sysdeps/generic/mul_n.c b/sysdeps/generic/mul_n.c index e37c5d8..049f63d 100644 --- a/sysdeps/generic/mul_n.c +++ b/sysdeps/generic/mul_n.c @@ -1,6 +1,6 @@ -/* __mpn_mul_n -- Multiply two natural numbers of length n. +/* mpn_mul_n -- Multiply two natural numbers of length n. -Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc. +Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -41,13 +41,6 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define KARATSUBA_THRESHOLD 2 #endif -void -#if __STDC__ -____mpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr); -#else -____mpn_mul_n (); -#endif - /* Handle simple cases with traditional multiplication. This is the most critical code of multiplication. All multiplies rely @@ -57,9 +50,9 @@ ____mpn_mul_n (); void #if __STDC__ -____mpn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) +impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) #else -____mpn_mul_n_basecase (prodp, up, vp, size) +impn_mul_n_basecase (prodp, up, vp, size) mp_ptr prodp; mp_srcptr up; mp_srcptr vp; @@ -82,7 +75,7 @@ ____mpn_mul_n_basecase (prodp, up, vp, size) cy_limb = 0; } else - cy_limb = __mpn_mul_1 (prodp, up, size, v_limb); + cy_limb = mpn_mul_1 (prodp, up, size, v_limb); prodp[size] = cy_limb; prodp++; @@ -96,10 +89,10 @@ ____mpn_mul_n_basecase (prodp, up, vp, size) { cy_limb = 0; if (v_limb == 1) - cy_limb = __mpn_add_n (prodp, prodp, up, size); + cy_limb = mpn_add_n (prodp, prodp, up, size); } else - cy_limb = __mpn_addmul_1 (prodp, up, size, v_limb); + cy_limb = mpn_addmul_1 (prodp, up, size, v_limb); prodp[size] = cy_limb; prodp++; @@ -108,10 +101,10 @@ ____mpn_mul_n_basecase (prodp, up, vp, size) void #if __STDC__ -____mpn_mul_n (mp_ptr prodp, +impn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace) #else -____mpn_mul_n (prodp, up, vp, size, tspace) +impn_mul_n (prodp, up, vp, size, tspace) mp_ptr prodp; mp_srcptr up; mp_srcptr vp; @@ -135,9 +128,9 @@ ____mpn_mul_n (prodp, up, vp, size, tspace) mp_limb cy_limb; MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace); - cy_limb = __mpn_addmul_1 (prodp + esize, up, esize, vp[esize]); + cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]); prodp[esize + esize] = cy_limb; - cy_limb = __mpn_addmul_1 (prodp + esize, vp, size, up[esize]); + cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]); prodp[esize + size] = cy_limb; } @@ -170,24 +163,24 @@ ____mpn_mul_n (prodp, up, vp, size, tspace) /*** Product M. ________________ |_(U1-U0)(V0-V1)_| */ - if (__mpn_cmp (up + hsize, up, hsize) >= 0) + if (mpn_cmp (up + hsize, up, hsize) >= 0) { - __mpn_sub_n (prodp, up + hsize, up, hsize); + mpn_sub_n (prodp, up + hsize, up, hsize); negflg = 0; } else { - __mpn_sub_n (prodp, up, up + hsize, hsize); + mpn_sub_n (prodp, up, up + hsize, hsize); negflg = 1; } - if (__mpn_cmp (vp + hsize, vp, hsize) >= 0) + if (mpn_cmp (vp + hsize, vp, hsize) >= 0) { - __mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize); + mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize); negflg ^= 1; } else { - __mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize); + mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize); /* No change of NEGFLG. */ } /* Read temporary operands from low part of PROD. @@ -197,13 +190,13 @@ ____mpn_mul_n (prodp, up, vp, size, tspace) /*** Add/copy product H. */ MPN_COPY (prodp + hsize, prodp + size, hsize); - cy = __mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize); + cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize); /*** Add product M (if NEGFLG M is a negative number). */ if (negflg) - cy -= __mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size); + cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size); else - cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size); + cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size); /*** Product L. ________________ ________________ |________________||____U0 x V0_____| */ @@ -214,22 +207,22 @@ ____mpn_mul_n (prodp, up, vp, size, tspace) /*** Add/copy Product L (twice). */ - cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size); + cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size); if (cy) - __mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy); + mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy); MPN_COPY (prodp, tspace, hsize); - cy = __mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize); + cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize); if (cy) - __mpn_add_1 (prodp + size, prodp + size, size, 1); + mpn_add_1 (prodp + size, prodp + size, size, 1); } } void #if __STDC__ -____mpn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size) +impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size) #else -____mpn_sqr_n_basecase (prodp, up, size) +impn_sqr_n_basecase (prodp, up, size) mp_ptr prodp; mp_srcptr up; mp_size_t size; @@ -251,7 +244,7 @@ ____mpn_sqr_n_basecase (prodp, up, size) cy_limb = 0; } else - cy_limb = __mpn_mul_1 (prodp, up, size, v_limb); + cy_limb = mpn_mul_1 (prodp, up, size, v_limb); prodp[size] = cy_limb; prodp++; @@ -265,10 +258,10 @@ ____mpn_sqr_n_basecase (prodp, up, size) { cy_limb = 0; if (v_limb == 1) - cy_limb = __mpn_add_n (prodp, prodp, up, size); + cy_limb = mpn_add_n (prodp, prodp, up, size); } else - cy_limb = __mpn_addmul_1 (prodp, up, size, v_limb); + cy_limb = mpn_addmul_1 (prodp, up, size, v_limb); prodp[size] = cy_limb; prodp++; @@ -277,10 +270,10 @@ ____mpn_sqr_n_basecase (prodp, up, size) void #if __STDC__ -____mpn_sqr_n (mp_ptr prodp, +impn_sqr_n (mp_ptr prodp, mp_srcptr up, mp_size_t size, mp_ptr tspace) #else -____mpn_sqr_n (prodp, up, size, tspace) +impn_sqr_n (prodp, up, size, tspace) mp_ptr prodp; mp_srcptr up; mp_size_t size; @@ -303,9 +296,9 @@ ____mpn_sqr_n (prodp, up, size, tspace) mp_limb cy_limb; MPN_SQR_N_RECURSE (prodp, up, esize, tspace); - cy_limb = __mpn_addmul_1 (prodp + esize, up, esize, up[esize]); + cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]); prodp[esize + esize] = cy_limb; - cy_limb = __mpn_addmul_1 (prodp + esize, up, size, up[esize]); + cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]); prodp[esize + size] = cy_limb; } @@ -322,13 +315,13 @@ ____mpn_sqr_n (prodp, up, size, tspace) /*** Product M. ________________ |_(U1-U0)(U0-U1)_| */ - if (__mpn_cmp (up + hsize, up, hsize) >= 0) + if (mpn_cmp (up + hsize, up, hsize) >= 0) { - __mpn_sub_n (prodp, up + hsize, up, hsize); + mpn_sub_n (prodp, up + hsize, up, hsize); } else { - __mpn_sub_n (prodp, up, up + hsize, hsize); + mpn_sub_n (prodp, up, up + hsize, hsize); } /* Read temporary operands from low part of PROD. @@ -338,10 +331,10 @@ ____mpn_sqr_n (prodp, up, size, tspace) /*** Add/copy product H. */ MPN_COPY (prodp + hsize, prodp + size, hsize); - cy = __mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize); + cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize); /*** Add product M (if NEGFLG M is a negative number). */ - cy -= __mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size); + cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size); /*** Product L. ________________ ________________ |________________||____U0 x U0_____| */ @@ -352,53 +345,56 @@ ____mpn_sqr_n (prodp, up, size, tspace) /*** Add/copy Product L (twice). */ - cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size); + cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size); if (cy) - __mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy); + mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy); MPN_COPY (prodp, tspace, hsize); - cy = __mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize); + cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize); if (cy) - __mpn_add_1 (prodp + size, prodp + size, size, 1); + mpn_add_1 (prodp + size, prodp + size, size, 1); } } /* This should be made into an inline function in gmp.h. */ inline void #if __STDC__ -__mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) +mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) #else -__mpn_mul_n (prodp, up, vp, size) +mpn_mul_n (prodp, up, vp, size) mp_ptr prodp; mp_srcptr up; mp_srcptr vp; mp_size_t size; #endif { + TMP_DECL (marker); + TMP_MARK (marker); if (up == vp) { if (size < KARATSUBA_THRESHOLD) { - ____mpn_sqr_n_basecase (prodp, up, size); + impn_sqr_n_basecase (prodp, up, size); } else { mp_ptr tspace; - tspace = (mp_ptr) alloca (2 * size * BYTES_PER_MP_LIMB); - ____mpn_sqr_n (prodp, up, size, tspace); + tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB); + impn_sqr_n (prodp, up, size, tspace); } } else { if (size < KARATSUBA_THRESHOLD) { - ____mpn_mul_n_basecase (prodp, up, vp, size); + impn_mul_n_basecase (prodp, up, vp, size); } else { mp_ptr tspace; - tspace = (mp_ptr) alloca (2 * size * BYTES_PER_MP_LIMB); - ____mpn_mul_n (prodp, up, vp, size, tspace); + tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB); + impn_mul_n (prodp, up, vp, size, tspace); } } + TMP_FREE (marker); } diff --git a/sysdeps/generic/rshift.c b/sysdeps/generic/rshift.c index 966cc7b..7ce02e0 100644 --- a/sysdeps/generic/rshift.c +++ b/sysdeps/generic/rshift.c @@ -1,6 +1,6 @@ -/* __mpn_rshift -- Shift right a low-level natural-number integer. +/* mpn_rshift -- Shift right a low-level natural-number integer. -Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -32,11 +32,11 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ mp_limb #if __STDC__ -__mpn_rshift (register mp_ptr wp, +mpn_rshift (register mp_ptr wp, register mp_srcptr up, mp_size_t usize, register unsigned int cnt) #else -__mpn_rshift (wp, up, usize, cnt) +mpn_rshift (wp, up, usize, cnt) register mp_ptr wp; register mp_srcptr up; mp_size_t usize; diff --git a/sysdeps/generic/sub_n.c b/sysdeps/generic/sub_n.c index 6b33e66..f3c83d1 100644 --- a/sysdeps/generic/sub_n.c +++ b/sysdeps/generic/sub_n.c @@ -1,6 +1,6 @@ -/* __mpn_sub_n -- Subtract two limb vectors of equal, non-zero length. +/* mpn_sub_n -- Subtract two limb vectors of equal, non-zero length. -Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -23,9 +23,9 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ mp_limb #if __STDC__ -__mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size) +mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size) #else -__mpn_sub_n (res_ptr, s1_ptr, s2_ptr, size) +mpn_sub_n (res_ptr, s1_ptr, s2_ptr, size) register mp_ptr res_ptr; register mp_srcptr s1_ptr; register mp_srcptr s2_ptr; diff --git a/sysdeps/generic/submul_1.c b/sysdeps/generic/submul_1.c index 855dd3f..57122a5 100644 --- a/sysdeps/generic/submul_1.c +++ b/sysdeps/generic/submul_1.c @@ -1,9 +1,9 @@ -/* __mpn_submul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR +/* mpn_submul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR by S2_LIMB, subtract the S1_SIZE least significant limbs of the product from the limb vector pointed to by RES_PTR. Return the most significant limb of the product, adjusted for carry-out from the subtraction. -Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -26,7 +26,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "longlong.h" mp_limb -__mpn_submul_1 (res_ptr, s1_ptr, s1_size, s2_limb) +mpn_submul_1 (res_ptr, s1_ptr, s1_size, s2_limb) register mp_ptr res_ptr; register mp_srcptr s1_ptr; mp_size_t s1_size; diff --git a/sysdeps/i386/gmp-mparam.h b/sysdeps/i386/gmp-mparam.h index 687f12a..ddc308a 100644 --- a/sysdeps/i386/gmp-mparam.h +++ b/sysdeps/i386/gmp-mparam.h @@ -1,6 +1,6 @@ /* gmp-mparam.h -- Compiler/machine parameter header file. -Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. This file is part of the GNU MP Library. diff --git a/sysdeps/i386/i586/add_n.S b/sysdeps/i386/i586/add_n.S index f52f9c6..f214c8c 100644 --- a/sysdeps/i386/i586/add_n.S +++ b/sysdeps/i386/i586/add_n.S @@ -1,7 +1,7 @@ /* Pentium __mpn_add_n -- Add two limb vectors of the same length > 0 and store sum in a third limb vector. -Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. +Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -30,13 +30,6 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "sysdep.h" #include "asm-syntax.h" -#define t1 %eax -#define t2 %edx -#define src1 %esi -#define src2 %ebp -#define dst %edi -#define x %ebx - .text ALIGN (3) .globl C_SYMBOL_NAME(__mpn_add_n) @@ -46,85 +39,85 @@ C_SYMBOL_NAME(__mpn_add_n:) pushl %ebx pushl %ebp - movl 20(%esp),dst /* res_ptr */ - movl 24(%esp),src1 /* s1_ptr */ - movl 28(%esp),src2 /* s2_ptr */ + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s1_ptr */ + movl 28(%esp),%ebp /* s2_ptr */ movl 32(%esp),%ecx /* size */ - movl (src2),x + movl (%ebp),%ebx decl %ecx - movl %ecx,t2 + movl %ecx,%edx shrl $3,%ecx - andl $7,t2 + andl $7,%edx testl %ecx,%ecx /* zero carry flag */ jz Lend - pushl t2 + pushl %edx ALIGN (3) -Loop: movl 28(dst),%eax /* fetch destination cache line */ - leal 32(dst),dst - -L1: movl (src1),t1 - movl 4(src1),t2 - adcl x,t1 - movl 4(src2),x - adcl x,t2 - movl 8(src2),x - movl t1,-32(dst) - movl t2,-28(dst) - -L2: movl 8(src1),t1 - movl 12(src1),t2 - adcl x,t1 - movl 12(src2),x - adcl x,t2 - movl 16(src2),x - movl t1,-24(dst) - movl t2,-20(dst) - -L3: movl 16(src1),t1 - movl 20(src1),t2 - adcl x,t1 - movl 20(src2),x - adcl x,t2 - movl 24(src2),x - movl t1,-16(dst) - movl t2,-12(dst) - -L4: movl 24(src1),t1 - movl 28(src1),t2 - adcl x,t1 - movl 28(src2),x - adcl x,t2 - movl 32(src2),x - movl t1,-8(dst) - movl t2,-4(dst) - - leal 32(src1),src1 - leal 32(src2),src2 +Loop: movl 28(%edi),%eax /* fetch destination cache line */ + leal 32(%edi),%edi + +L1: movl (%esi),%eax + movl 4(%esi),%edx + adcl %ebx,%eax + movl 4(%ebp),%ebx + adcl %ebx,%edx + movl 8(%ebp),%ebx + movl %eax,-32(%edi) + movl %edx,-28(%edi) + +L2: movl 8(%esi),%eax + movl 12(%esi),%edx + adcl %ebx,%eax + movl 12(%ebp),%ebx + adcl %ebx,%edx + movl 16(%ebp),%ebx + movl %eax,-24(%edi) + movl %edx,-20(%edi) + +L3: movl 16(%esi),%eax + movl 20(%esi),%edx + adcl %ebx,%eax + movl 20(%ebp),%ebx + adcl %ebx,%edx + movl 24(%ebp),%ebx + movl %eax,-16(%edi) + movl %edx,-12(%edi) + +L4: movl 24(%esi),%eax + movl 28(%esi),%edx + adcl %ebx,%eax + movl 28(%ebp),%ebx + adcl %ebx,%edx + movl 32(%ebp),%ebx + movl %eax,-8(%edi) + movl %edx,-4(%edi) + + leal 32(%esi),%esi + leal 32(%ebp),%ebp decl %ecx jnz Loop - popl t2 + popl %edx Lend: - decl t2 /* test t2 w/o clobbering carry */ + decl %edx /* test %edx w/o clobbering carry */ js Lend2 - incl t2 + incl %edx Loop2: - leal 4(dst),dst - movl (src1),t1 - adcl x,t1 - movl 4(src2),x - movl t1,-4(dst) - leal 4(src1),src1 - leal 4(src2),src2 - decl t2 + leal 4(%edi),%edi + movl (%esi),%eax + adcl %ebx,%eax + movl 4(%ebp),%ebx + movl %eax,-4(%edi) + leal 4(%esi),%esi + leal 4(%ebp),%ebp + decl %edx jnz Loop2 Lend2: - movl (src1),t1 - adcl x,t1 - movl t1,(dst) + movl (%esi),%eax + adcl %ebx,%eax + movl %eax,(%edi) sbbl %eax,%eax negl %eax diff --git a/sysdeps/i386/i586/addmul_1.S b/sysdeps/i386/i586/addmul_1.S index b222840..5bf2603 100644 --- a/sysdeps/i386/i586/addmul_1.S +++ b/sysdeps/i386/i586/addmul_1.S @@ -1,7 +1,7 @@ /* Pentium __mpn_addmul_1 -- Multiply a limb vector with a limb and add the result to a second limb vector. -Copyright (C) 1992, 1994 Free Software Foundation, Inc. +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -32,12 +32,12 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define res_ptr edi #define s1_ptr esi +#define size ecx #define s2_limb ebp TEXT ALIGN (3) GLOBL C_SYMBOL_NAME(__mpn_addmul_1) - .type C_SYMBOL_NAME(__mpn_addmul_1),@function C_SYMBOL_NAME(__mpn_addmul_1:) INSN1(push,l ,R(edi)) @@ -47,38 +47,36 @@ C_SYMBOL_NAME(__mpn_addmul_1:) INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) - INSN2(mov,l ,R(ecx),MEM_DISP(esp,28)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) - INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,ecx,4)) - INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,ecx,4)) - INSN1(neg,l ,R(ecx)) - INSN2(xor,l ,R(edx),R(edx)) + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) ALIGN (3) -Loop: - INSN2(mov,l ,R(ebx),R(edx)) - INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,ecx,4)) + +Loop: INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) INSN1(mul,l ,R(s2_limb)) INSN2(add,l ,R(eax),R(ebx)) - INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,ecx,4)) + INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4)) INSN2(adc,l ,R(edx),$0) INSN2(add,l ,R(ebx),R(eax)) - INSN2(adc,l ,R(edx),$0) - INSN2(mov,l ,MEM_INDEX(res_ptr,ecx,4),R(ebx)) + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) + INSN1(inc,l ,R(size)) - INSN1(inc,l ,R(ecx)) + INSN2(mov,l ,R(ebx),R(edx)) INSN1(jnz, ,Loop) - - INSN2(mov,l ,R(eax),R(edx)) + INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),R(ebx)) INSN1(pop,l ,R(ebp)) INSN1(pop,l ,R(ebx)) INSN1(pop,l ,R(esi)) INSN1(pop,l ,R(edi)) ret -Lfe1: - .size C_SYMBOL_NAME(__mpn_addmul_1),Lfe1-C_SYMBOL_NAME(__mpn_addmul_1) diff --git a/sysdeps/i386/i586/mul_1.S b/sysdeps/i386/i586/mul_1.S index 2b7258e..048c060 100644 --- a/sysdeps/i386/i586/mul_1.S +++ b/sysdeps/i386/i586/mul_1.S @@ -1,7 +1,7 @@ /* Pentium __mpn_mul_1 -- Multiply a limb vector with a limb and store the result in a second limb vector. -Copyright (C) 1992, 1994 Free Software Foundation, Inc. +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -53,24 +53,24 @@ C_SYMBOL_NAME(__mpn_mul_1:) INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) INSN1(neg,l ,R(size)) - INSN2(xor,l ,R(edx),R(edx)) + INSN2(xor,l ,R(ebx),R(ebx)) ALIGN (3) -Loop: - INSN2(mov,l ,R(ebx),R(edx)) + +Loop: INSN2(adc,l ,R(ebx),$0) INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) INSN1(mul,l ,R(s2_limb)) - INSN2(add,l ,R(eax),R(ebx)) - - INSN2(adc,l ,R(edx),$0) - INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(eax)) + INSN2(add,l ,R(ebx),R(eax)) + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) INSN1(inc,l ,R(size)) - INSN1(jnz, ,Loop) + INSN2(mov,l ,R(ebx),R(edx)) + INSN1(jnz, ,Loop) - INSN2(mov,l ,R(eax),R(edx)) + INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),R(ebx)) INSN1(pop,l ,R(ebp)) INSN1(pop,l ,R(ebx)) INSN1(pop,l ,R(esi)) diff --git a/sysdeps/i386/i586/sub_n.S b/sysdeps/i386/i586/sub_n.S index 9c964a8..cd158a5 100644 --- a/sysdeps/i386/i586/sub_n.S +++ b/sysdeps/i386/i586/sub_n.S @@ -1,7 +1,7 @@ /* Pentium __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and store difference in a third limb vector. -Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. +Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -30,13 +30,6 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "sysdep.h" #include "asm-syntax.h" -#define t1 %eax -#define t2 %edx -#define src1 %esi -#define src2 %ebp -#define dst %edi -#define x %ebx - .text ALIGN (3) .globl C_SYMBOL_NAME(__mpn_sub_n) @@ -46,85 +39,85 @@ C_SYMBOL_NAME(__mpn_sub_n:) pushl %ebx pushl %ebp - movl 20(%esp),dst /* res_ptr */ - movl 24(%esp),src1 /* s1_ptr */ - movl 28(%esp),src2 /* s2_ptr */ + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s1_ptr */ + movl 28(%esp),%ebp /* s2_ptr */ movl 32(%esp),%ecx /* size */ - movl (src2),x + movl (%ebp),%ebx decl %ecx - movl %ecx,t2 + movl %ecx,%edx shrl $3,%ecx - andl $7,t2 + andl $7,%edx testl %ecx,%ecx /* zero carry flag */ jz Lend - pushl t2 + pushl %edx ALIGN (3) -Loop: movl 28(dst),%eax /* fetch destination cache line */ - leal 32(dst),dst - -L1: movl (src1),t1 - movl 4(src1),t2 - sbbl x,t1 - movl 4(src2),x - sbbl x,t2 - movl 8(src2),x - movl t1,-32(dst) - movl t2,-28(dst) - -L2: movl 8(src1),t1 - movl 12(src1),t2 - sbbl x,t1 - movl 12(src2),x - sbbl x,t2 - movl 16(src2),x - movl t1,-24(dst) - movl t2,-20(dst) - -L3: movl 16(src1),t1 - movl 20(src1),t2 - sbbl x,t1 - movl 20(src2),x - sbbl x,t2 - movl 24(src2),x - movl t1,-16(dst) - movl t2,-12(dst) - -L4: movl 24(src1),t1 - movl 28(src1),t2 - sbbl x,t1 - movl 28(src2),x - sbbl x,t2 - movl 32(src2),x - movl t1,-8(dst) - movl t2,-4(dst) - - leal 32(src1),src1 - leal 32(src2),src2 +Loop: movl 28(%edi),%eax /* fetch destination cache line */ + leal 32(%edi),%edi + +L1: movl (%esi),%eax + movl 4(%esi),%edx + sbbl %ebx,%eax + movl 4(%ebp),%ebx + sbbl %ebx,%edx + movl 8(%ebp),%ebx + movl %eax,-32(%edi) + movl %edx,-28(%edi) + +L2: movl 8(%esi),%eax + movl 12(%esi),%edx + sbbl %ebx,%eax + movl 12(%ebp),%ebx + sbbl %ebx,%edx + movl 16(%ebp),%ebx + movl %eax,-24(%edi) + movl %edx,-20(%edi) + +L3: movl 16(%esi),%eax + movl 20(%esi),%edx + sbbl %ebx,%eax + movl 20(%ebp),%ebx + sbbl %ebx,%edx + movl 24(%ebp),%ebx + movl %eax,-16(%edi) + movl %edx,-12(%edi) + +L4: movl 24(%esi),%eax + movl 28(%esi),%edx + sbbl %ebx,%eax + movl 28(%ebp),%ebx + sbbl %ebx,%edx + movl 32(%ebp),%ebx + movl %eax,-8(%edi) + movl %edx,-4(%edi) + + leal 32(%esi),%esi + leal 32(%ebp),%ebp decl %ecx jnz Loop - popl t2 + popl %edx Lend: - decl t2 /* test t2 w/o clobbering carry */ + decl %edx /* test %edx w/o clobbering carry */ js Lend2 - incl t2 + incl %edx Loop2: - leal 4(dst),dst - movl (src1),t1 - sbbl x,t1 - movl 4(src2),x - movl t1,-4(dst) - leal 4(src1),src1 - leal 4(src2),src2 - decl t2 + leal 4(%edi),%edi + movl (%esi),%eax + sbbl %ebx,%eax + movl 4(%ebp),%ebx + movl %eax,-4(%edi) + leal 4(%esi),%esi + leal 4(%ebp),%ebp + decl %edx jnz Loop2 Lend2: - movl (src1),t1 - sbbl x,t1 - movl t1,(dst) + movl (%esi),%eax + sbbl %ebx,%eax + movl %eax,(%edi) sbbl %eax,%eax negl %eax diff --git a/sysdeps/i386/i586/submul_1.S b/sysdeps/i386/i586/submul_1.S index 14bfe54..440f64f 100644 --- a/sysdeps/i386/i586/submul_1.S +++ b/sysdeps/i386/i586/submul_1.S @@ -1,7 +1,7 @@ /* Pentium __mpn_submul_1 -- Multiply a limb vector with a limb and subtract the result from a second limb vector. -Copyright (C) 1992, 1994 Free Software Foundation, Inc. +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -53,10 +53,10 @@ C_SYMBOL_NAME(__mpn_submul_1:) INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) INSN1(neg,l ,R(size)) - INSN2(xor,l ,R(edx),R(edx)) + INSN2(xor,l ,R(ebx),R(ebx)) ALIGN (3) -Loop: - INSN2(mov,l ,R(ebx),R(edx)) + +Loop: INSN2(adc,l ,R(ebx),$0) INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) INSN1(mul,l ,R(s2_limb)) @@ -67,14 +67,14 @@ Loop: INSN2(adc,l ,R(edx),$0) INSN2(sub,l ,R(ebx),R(eax)) - INSN2(adc,l ,R(edx),$0) INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) - INSN1(inc,l ,R(size)) - INSN1(jnz, ,Loop) + INSN2(mov,l ,R(ebx),R(edx)) + INSN1(jnz, ,Loop) - INSN2(mov,l ,R(eax),R(edx)) + INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),R(ebx)) INSN1(pop,l ,R(ebp)) INSN1(pop,l ,R(ebx)) INSN1(pop,l ,R(esi)) diff --git a/sysdeps/m68k/add_n.S b/sysdeps/m68k/add_n.S index ea7a4458..754af9f 100644 --- a/sysdeps/m68k/add_n.S +++ b/sysdeps/m68k/add_n.S @@ -1,7 +1,7 @@ /* mc68020 __mpn_add_n -- Add two limb vectors of the same length > 0 and store sum in a third limb vector. -Copyright (C) 1992, 1994 Free Software Foundation, Inc. +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -27,50 +27,53 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ size (sp + 12) */ +#include "sysdep.h" #include "asm-syntax.h" TEXT ALIGN - GLOBL ___mpn_add_n + GLOBL C_SYMBOL_NAME(__mpn_add_n) -LAB(___mpn_add_n) +C_SYMBOL_NAME(__mpn_add_n:) +PROLOG(__mpn_add_n) /* Save used registers on the stack. */ - INSN2(move,l ,MEM_PREDEC(sp),d2) - INSN2(move,l ,MEM_PREDEC(sp),a2) + movel R(d2),MEM_PREDEC(sp) + movel R(a2),MEM_PREDEC(sp) /* Copy the arguments to registers. Better use movem? */ - INSN2(move,l ,a2,MEM_DISP(sp,12)) - INSN2(move,l ,a0,MEM_DISP(sp,16)) - INSN2(move,l ,a1,MEM_DISP(sp,20)) - INSN2(move,l ,d2,MEM_DISP(sp,24)) - - INSN2(eor,w ,d2,#1) - INSN2(lsr,l ,d2,#1) - bcc L1 - INSN2(subq,l ,d2,#1) /* clears cy as side effect */ - -LAB(Loop) - INSN2(move,l ,d0,MEM_POSTINC(a0)) - INSN2(move,l ,d1,MEM_POSTINC(a1)) - INSN2(addx,l ,d0,d1) - INSN2(move,l ,MEM_POSTINC(a2),d0) -LAB(L1) INSN2(move,l ,d0,MEM_POSTINC(a0)) - INSN2(move,l ,d1,MEM_POSTINC(a1)) - INSN2(addx,l ,d0,d1) - INSN2(move,l ,MEM_POSTINC(a2),d0) - - dbf d2,Loop /* loop until 16 lsb of %4 == -1 */ - INSN2(subx,l ,d0,d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */ - INSN2(sub,l ,d2,#0x10000) - bcs L2 - INSN2(add,l ,d0,d0) /* restore cy */ - bra Loop - -LAB(L2) - INSN1(neg,l ,d0) + movel MEM_DISP(sp,12),R(a2) + movel MEM_DISP(sp,16),R(a0) + movel MEM_DISP(sp,20),R(a1) + movel MEM_DISP(sp,24),R(d2) + + eorw #1,R(d2) + lsrl #1,R(d2) + bcc L(L1) + subql #1,R(d2) /* clears cy as side effect */ + +L(Loop:) + movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + addxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) +L(L1:) movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + addxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) + + dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */ + subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */ + subl #0x10000,R(d2) + bcs L(L2) + addl R(d0),R(d0) /* restore cy */ + bra L(Loop) + +L(L2:) + negl R(d0) /* Restore used registers from stack frame. */ - INSN2(move,l ,a2,MEM_POSTINC(sp)) - INSN2(move,l ,d2,MEM_POSTINC(sp)) + movel MEM_POSTINC(sp),R(a2) + movel MEM_POSTINC(sp),R(d2) rts +EPILOG(__mpn_add_n) diff --git a/sysdeps/m68k/lshift.S b/sysdeps/m68k/lshift.S new file mode 100644 index 0000000..c58594a --- /dev/null +++ b/sysdeps/m68k/lshift.S @@ -0,0 +1,150 @@ +/* mc68020 __mpn_lshift -- Shift left a low-level natural-number integer. + +Copyright (C) 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s_ptr (sp + 8) + s_size (sp + 16) + cnt (sp + 12) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +#define res_ptr a1 +#define s_ptr a0 +#define s_size d6 +#define cnt d4 + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__mpn_lshift) + +C_SYMBOL_NAME(__mpn_lshift:) +PROLOG(__mpn_lshift) + +/* Save used registers on the stack. */ + moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp) + +/* Copy the arguments to registers. */ + movel MEM_DISP(sp,28),R(res_ptr) + movel MEM_DISP(sp,32),R(s_ptr) + movel MEM_DISP(sp,36),R(s_size) + movel MEM_DISP(sp,40),R(cnt) + + moveql #1,R(d5) + cmpl R(d5),R(cnt) + bne L(Lnormal) + cmpl R(s_ptr),R(res_ptr) + bls L(Lspecial) /* jump if s_ptr >= res_ptr */ +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(s_ptr,s_size,l,4),R(a2) +#else /* not mc68020 */ + movel R(s_size),R(d0) + asll #2,R(d0) + lea MEM_INDX(s_ptr,d0,l),R(a2) +#endif + cmpl R(res_ptr),R(a2) + bls L(Lspecial) /* jump if res_ptr >= s_ptr + s_size */ + +L(Lnormal:) + moveql #32,R(d5) + subl R(cnt),R(d5) + +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr) + lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr) +#else /* not mc68000 */ + movel R(s_size),R(d0) + asll #2,R(d0) + addl R(s_size),R(s_ptr) + addl R(s_size),R(res_ptr) +#endif + movel MEM_PREDEC(s_ptr),R(d2) + movel R(d2),R(d0) + lsrl R(d5),R(d0) /* compute carry limb */ + + lsll R(cnt),R(d2) + movel R(d2),R(d1) + subql #1,R(s_size) + beq L(Lend) + lsrl #1,R(s_size) + bcs L(L1) + subql #1,R(s_size) + +L(Loop:) + movel MEM_PREDEC(s_ptr),R(d2) + movel R(d2),R(d3) + lsrl R(d5),R(d3) + orl R(d3),R(d1) + movel R(d1),MEM_PREDEC(res_ptr) + lsll R(cnt),R(d2) +L(L1:) + movel MEM_PREDEC(s_ptr),R(d1) + movel R(d1),R(d3) + lsrl R(d5),R(d3) + orl R(d3),R(d2) + movel R(d2),MEM_PREDEC(res_ptr) + lsll R(cnt),R(d1) + + dbf R(s_size),L(Loop) + subl #0x10000,R(s_size) + bcc L(Loop) + +L(Lend:) + movel R(d1),MEM_PREDEC(res_ptr) /* store least significant limb */ + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts + +/* We loop from least significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. */ + +L(Lspecial:) + clrl R(d0) /* initialize carry */ + eorw #1,R(s_size) + lsrl #1,R(s_size) + bcc L(LL1) + subql #1,R(s_size) + +L(LLoop:) + movel MEM_POSTINC(s_ptr),R(d2) + addxl R(d2),R(d2) + movel R(d2),MEM_POSTINC(res_ptr) +L(LL1:) + movel MEM_POSTINC(s_ptr),R(d2) + addxl R(d2),R(d2) + movel R(d2),MEM_POSTINC(res_ptr) + + dbf R(s_size),L(LLoop) + addxl R(d0),R(d0) /* save cy in lsb */ + subl #0x10000,R(s_size) + bcs L(LLend) + lsrl #1,R(d0) /* restore cy */ + bra L(LLoop) + +L(LLend:) +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts +EPILOG(__mpn_lshift) diff --git a/sysdeps/m68k/m68020/addmul_1.S b/sysdeps/m68k/m68020/addmul_1.S index 3f244c4..169f113 100644 --- a/sysdeps/m68k/m68020/addmul_1.S +++ b/sysdeps/m68k/m68020/addmul_1.S @@ -1,7 +1,7 @@ /* mc68020 __mpn_addmul_1 -- Multiply a limb vector with a limb and add the result to a second limb vector. -Copyright (C) 1992, 1994 Free Software Foundation, Inc. +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -23,58 +23,61 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ INPUT PARAMETERS res_ptr (sp + 4) s1_ptr (sp + 8) - size (sp + 12) + s1_size (sp + 12) s2_limb (sp + 16) */ +#include "sysdep.h" #include "asm-syntax.h" TEXT ALIGN - GLOBL ___mpn_addmul_1 + GLOBL C_SYMBOL_NAME(__mpn_addmul_1) -LAB(___mpn_addmul_1) +C_SYMBOL_NAME(__mpn_addmul_1:) +PROLOG(__mpn_addmul_1) #define res_ptr a0 #define s1_ptr a1 -#define size d2 +#define s1_size d2 #define s2_limb d4 /* Save used registers on the stack. */ - INSN2(movem,l ,MEM_PREDEC(sp),d2-d5) + moveml R(d2)-R(d5),MEM_PREDEC(sp) /* Copy the arguments to registers. Better use movem? */ - INSN2(move,l ,res_ptr,MEM_DISP(sp,20)) - INSN2(move,l ,s1_ptr,MEM_DISP(sp,24)) - INSN2(move,l ,size,MEM_DISP(sp,28)) - INSN2(move,l ,s2_limb,MEM_DISP(sp,32)) - - INSN2(eor,w ,size,#1) - INSN1(clr,l ,d1) - INSN1(clr,l ,d5) - INSN2(lsr,l ,size,#1) - bcc L1 - INSN2(subq,l ,size,#1) - INSN2(sub,l ,d0,d0) /* (d0,cy) <= (0,0) */ - -LAB(Loop) - INSN2(move,l ,d3,MEM_POSTINC(s1_ptr)) - INSN2(mulu,l ,d1:d3,s2_limb) - INSN2(addx,l ,d3,d0) - INSN2(addx,l ,d1,d5) - INSN2(add,l ,MEM_POSTINC(res_ptr),d3) -LAB(L1) INSN2(move,l ,d3,MEM_POSTINC(s1_ptr)) - INSN2(mulu,l ,d0:d3,s2_limb) - INSN2(addx,l ,d3,d1) - INSN2(addx,l ,d0,d5) - INSN2(add,l ,MEM_POSTINC(res_ptr),d3) - - dbf size,Loop - INSN2(addx,l ,d0,d5) - INSN2(sub,l ,size,#0x10000) - bcc Loop + movel MEM_DISP(sp,20),R(res_ptr) + movel MEM_DISP(sp,24),R(s1_ptr) + movel MEM_DISP(sp,28),R(s1_size) + movel MEM_DISP(sp,32),R(s2_limb) + + eorw #1,R(s1_size) + clrl R(d1) + clrl R(d5) + lsrl #1,R(s1_size) + bcc L(L1) + subql #1,R(s1_size) + subl R(d0),R(d0) /* (d0,cy) <= (0,0) */ + +L(Loop:) + movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d1):R(d3) + addxl R(d0),R(d3) + addxl R(d5),R(d1) + addl R(d3),MEM_POSTINC(res_ptr) +L(L1:) movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d0):R(d3) + addxl R(d1),R(d3) + addxl R(d5),R(d0) + addl R(d3),MEM_POSTINC(res_ptr) + + dbf R(s1_size),L(Loop) + addxl R(d5),R(d0) + subl #0x10000,R(s1_size) + bcc L(Loop) /* Restore used registers from stack frame. */ - INSN2(movem,l ,d2-d5,MEM_POSTINC(sp)) + moveml MEM_POSTINC(sp),R(d2)-R(d5) rts +EPILOG(__mpn_addmul_1) diff --git a/sysdeps/m68k/m68020/mul_1.S b/sysdeps/m68k/m68020/mul_1.S index 548ca00..4db1cca 100644 --- a/sysdeps/m68k/m68020/mul_1.S +++ b/sysdeps/m68k/m68020/mul_1.S @@ -1,7 +1,7 @@ /* mc68020 __mpn_mul_1 -- Multiply a limb vector with a limb and store the result in a second limb vector. -Copyright (C) 1992, 1994 Free Software Foundation, Inc. +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -23,65 +23,68 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ INPUT PARAMETERS res_ptr (sp + 4) s1_ptr (sp + 8) - size (sp + 12) + s1_size (sp + 12) s2_limb (sp + 16) */ +#include "sysdep.h" #include "asm-syntax.h" TEXT ALIGN - GLOBL ___mpn_mul_1 + GLOBL C_SYMBOL_NAME(__mpn_mul_1) -LAB(___mpn_mul_1) +C_SYMBOL_NAME(__mpn_mul_1:) +PROLOG(__mpn_mul_1) #define res_ptr a0 #define s1_ptr a1 -#define size d2 +#define s1_size d2 #define s2_limb d4 /* Save used registers on the stack. */ - INSN2(movem,l ,MEM_PREDEC(sp),d2-d4) + moveml R(d2)-R(d4),MEM_PREDEC(sp) #if 0 - INSN2(move,l ,MEM_PREDEC(sp),d2) - INSN2(move,l ,MEM_PREDEC(sp),d3) - INSN2(move,l ,MEM_PREDEC(sp),d4) + movel R(d2),MEM_PREDEC(sp) + movel R(d3),MEM_PREDEC(sp) + movel R(d4),MEM_PREDEC(sp) #endif /* Copy the arguments to registers. Better use movem? */ - INSN2(move,l ,res_ptr,MEM_DISP(sp,16)) - INSN2(move,l ,s1_ptr,MEM_DISP(sp,20)) - INSN2(move,l ,size,MEM_DISP(sp,24)) - INSN2(move,l ,s2_limb,MEM_DISP(sp,28)) - - INSN2(eor,w ,size,#1) - INSN1(clr,l ,d1) - INSN2(lsr,l ,size,#1) - bcc L1 - INSN2(subq,l ,size,#1) - INSN2(sub,l ,d0,d0) /* (d0,cy) <= (0,0) */ - -LAB(Loop) - INSN2(move,l ,d3,MEM_POSTINC(s1_ptr)) - INSN2(mulu,l ,d1:d3,s2_limb) - INSN2(addx,l ,d3,d0) - INSN2(move,l ,MEM_POSTINC(res_ptr),d3) -LAB(L1) INSN2(move,l ,d3,MEM_POSTINC(s1_ptr)) - INSN2(mulu,l ,d0:d3,s2_limb) - INSN2(addx,l ,d3,d1) - INSN2(move,l ,MEM_POSTINC(res_ptr),d3) - - dbf size,Loop - INSN1(clr,l ,d3) - INSN2(addx,l ,d0,d3) - INSN2(sub,l ,size,#0x10000) - bcc Loop + movel MEM_DISP(sp,16),R(res_ptr) + movel MEM_DISP(sp,20),R(s1_ptr) + movel MEM_DISP(sp,24),R(s1_size) + movel MEM_DISP(sp,28),R(s2_limb) + + eorw #1,R(s1_size) + clrl R(d1) + lsrl #1,R(s1_size) + bcc L(L1) + subql #1,R(s1_size) + subl R(d0),R(d0) /* (d0,cy) <= (0,0) */ + +L(Loop:) + movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d1):R(d3) + addxl R(d0),R(d3) + movel R(d3),MEM_POSTINC(res_ptr) +L(L1:) movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d0):R(d3) + addxl R(d1),R(d3) + movel R(d3),MEM_POSTINC(res_ptr) + + dbf R(s1_size),L(Loop) + clrl R(d3) + addxl R(d3),R(d0) + subl #0x10000,R(s1_size) + bcc L(Loop) /* Restore used registers from stack frame. */ - INSN2(movem,l ,d2-d4,MEM_POSTINC(sp)) + moveml MEM_POSTINC(sp),R(d2)-R(d4) #if 0 - INSN2(move,l ,d4,MEM_POSTINC(sp)) - INSN2(move,l ,d3,MEM_POSTINC(sp)) - INSN2(move,l ,d2,MEM_POSTINC(sp)) + movel MEM_POSTINC(sp),R(d4) + movel MEM_POSTINC(sp),R(d3) + movel MEM_POSTINC(sp),R(d2) #endif rts +EPILOG(__mpn_mul_1) diff --git a/sysdeps/m68k/m68020/submul_1.S b/sysdeps/m68k/m68020/submul_1.S index ef7f39d..cf30029 100644 --- a/sysdeps/m68k/m68020/submul_1.S +++ b/sysdeps/m68k/m68020/submul_1.S @@ -1,7 +1,7 @@ /* mc68020 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract the result from a second limb vector. -Copyright (C) 1992, 1994 Free Software Foundation, Inc. +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -23,58 +23,61 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ INPUT PARAMETERS res_ptr (sp + 4) s1_ptr (sp + 8) - size (sp + 12) + s1_size (sp + 12) s2_limb (sp + 16) */ +#include "sysdep.h" #include "asm-syntax.h" TEXT ALIGN - GLOBL ___mpn_submul_1 + GLOBL C_SYMBOL_NAME(__mpn_submul_1) -LAB(___mpn_submul_1) +C_SYMBOL_NAME(__mpn_submul_1:) +PROLOG(__mpn_submul_1) #define res_ptr a0 #define s1_ptr a1 -#define size d2 +#define s1_size d2 #define s2_limb d4 /* Save used registers on the stack. */ - INSN2(movem,l ,MEM_PREDEC(sp),d2-d5) + moveml R(d2)-R(d5),MEM_PREDEC(sp) /* Copy the arguments to registers. Better use movem? */ - INSN2(move,l ,res_ptr,MEM_DISP(sp,20)) - INSN2(move,l ,s1_ptr,MEM_DISP(sp,24)) - INSN2(move,l ,size,MEM_DISP(sp,28)) - INSN2(move,l ,s2_limb,MEM_DISP(sp,32)) - - INSN2(eor,w ,size,#1) - INSN1(clr,l ,d1) - INSN1(clr,l ,d5) - INSN2(lsr,l ,size,#1) - bcc L1 - INSN2(subq,l ,size,#1) - INSN2(sub,l ,d0,d0) /* (d0,cy) <= (0,0) */ - -LAB(Loop) - INSN2(move,l ,d3,MEM_POSTINC(s1_ptr)) - INSN2(mulu,l ,d1:d3,s2_limb) - INSN2(addx,l ,d3,d0) - INSN2(addx,l ,d1,d5) - INSN2(sub,l ,MEM_POSTINC(res_ptr),d3) -LAB(L1) INSN2(move,l ,d3,MEM_POSTINC(s1_ptr)) - INSN2(mulu,l ,d0:d3,s2_limb) - INSN2(addx,l ,d3,d1) - INSN2(addx,l ,d0,d5) - INSN2(sub,l ,MEM_POSTINC(res_ptr),d3) - - dbf size,Loop - INSN2(addx,l ,d0,d5) - INSN2(sub,l ,size,#0x10000) - bcc Loop + movel MEM_DISP(sp,20),R(res_ptr) + movel MEM_DISP(sp,24),R(s1_ptr) + movel MEM_DISP(sp,28),R(s1_size) + movel MEM_DISP(sp,32),R(s2_limb) + + eorw #1,R(s1_size) + clrl R(d1) + clrl R(d5) + lsrl #1,R(s1_size) + bcc L(L1) + subql #1,R(s1_size) + subl R(d0),R(d0) /* (d0,cy) <= (0,0) */ + +L(Loop:) + movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d1):R(d3) + addxl R(d0),R(d3) + addxl R(d5),R(d1) + subl R(d3),MEM_POSTINC(res_ptr) +L(L1:) movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d0):R(d3) + addxl R(d1),R(d3) + addxl R(d5),R(d0) + subl R(d3),MEM_POSTINC(res_ptr) + + dbf R(s1_size),L(Loop) + addxl R(d5),R(d0) + subl #0x10000,R(s1_size) + bcc L(Loop) /* Restore used registers from stack frame. */ - INSN2(movem,l ,d2-d5,MEM_POSTINC(sp)) + moveml MEM_POSTINC(sp),R(d2)-R(d5) rts +EPILOG(__mpn_submul_1) diff --git a/sysdeps/m68k/rshift.S b/sysdeps/m68k/rshift.S new file mode 100644 index 0000000..494dfcb --- /dev/null +++ b/sysdeps/m68k/rshift.S @@ -0,0 +1,149 @@ +/* mc68020 __mpn_rshift -- Shift right a low-level natural-number integer. + +Copyright (C) 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s_ptr (sp + 8) + s_size (sp + 16) + cnt (sp + 12) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +#define res_ptr a1 +#define s_ptr a0 +#define s_size d6 +#define cnt d4 + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__mpn_rshift) + +C_SYMBOL_NAME(__mpn_rshift:) +PROLOG(__mpn_rshift) +/* Save used registers on the stack. */ + moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp) + +/* Copy the arguments to registers. */ + movel MEM_DISP(sp,28),R(res_ptr) + movel MEM_DISP(sp,32),R(s_ptr) + movel MEM_DISP(sp,36),R(s_size) + movel MEM_DISP(sp,40),R(cnt) + + moveql #1,R(d5) + cmpl R(d5),R(cnt) + bne L(Lnormal) + cmpl R(res_ptr),R(s_ptr) + bls L(Lspecial) /* jump if res_ptr >= s_ptr */ +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(res_ptr,s_size,l,4),R(a2) +#else /* not mc68020 */ + movel R(s_size),R(d0) + asll #2,R(d0) + lea MEM_INDX(res_ptr,d0,l),R(a2) +#endif + cmpl R(s_ptr),R(a2) + bls L(Lspecial) /* jump if s_ptr >= res_ptr + s_size */ + +L(Lnormal:) + moveql #32,R(d5) + subl R(cnt),R(d5) + movel MEM_POSTINC(s_ptr),R(d2) + movel R(d2),R(d0) + lsll R(d5),R(d0) /* compute carry limb */ + + lsrl R(cnt),R(d2) + movel R(d2),R(d1) + subql #1,R(s_size) + beq L(Lend) + lsrl #1,R(s_size) + bcs L(L1) + subql #1,R(s_size) + +L(Loop:) + movel MEM_POSTINC(s_ptr),R(d2) + movel R(d2),R(d3) + lsll R(d5),R(d3) + orl R(d3),R(d1) + movel R(d1),MEM_POSTINC(res_ptr) + lsrl R(cnt),R(d2) +L(L1:) + movel MEM_POSTINC(s_ptr),R(d1) + movel R(d1),R(d3) + lsll R(d5),R(d3) + orl R(d3),R(d2) + movel R(d2),MEM_POSTINC(res_ptr) + lsrl R(cnt),R(d1) + + dbf R(s_size),L(Loop) + subl #0x10000,R(s_size) + bcc L(Loop) + +L(Lend:) + movel R(d1),MEM(res_ptr) /* store most significant limb */ + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts + +/* We loop from most significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. */ + +L(Lspecial:) +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr) + lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr) +#else /* not mc68000 */ + movel R(s_size),R(d0) + asll #2,R(d0) + addl R(s_size),R(s_ptr) + addl R(s_size),R(res_ptr) +#endif + + clrl R(d0) /* initialize carry */ + eorw #1,R(s_size) + lsrl #1,R(s_size) + bcc L(LL1) + subql #1,R(s_size) + +L(LLoop:) + movel MEM_PREDEC(s_ptr),R(d2) + roxrl #1,R(d2) + movel R(d2),MEM_PREDEC(res_ptr) +L(LL1:) + movel MEM_PREDEC(s_ptr),R(d2) + roxrl #1,R(d2) + movel R(d2),MEM_PREDEC(res_ptr) + + dbf R(s_size),L(LLoop) + roxrl #1,R(d0) /* save cy in msb */ + subl #0x10000,R(s_size) + bcs L(LLend) + addl R(d0),R(d0) /* restore cy */ + bra L(LLoop) + +L(LLend:) +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts +EPILOG(__mpn_rshift) diff --git a/sysdeps/m68k/sub_n.S b/sysdeps/m68k/sub_n.S index 19f0ec1..39f5161 100644 --- a/sysdeps/m68k/sub_n.S +++ b/sysdeps/m68k/sub_n.S @@ -1,7 +1,7 @@ /* mc68020 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and store difference in a third limb vector. -Copyright (C) 1992, 1994 Free Software Foundation, Inc. +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -27,50 +27,53 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ size (sp + 12) */ +#include "sysdep.h" #include "asm-syntax.h" TEXT ALIGN - GLOBL ___mpn_sub_n + GLOBL C_SYMBOL_NAME(__mpn_sub_n) -LAB(___mpn_sub_n) +C_SYMBOL_NAME(__mpn_sub_n:) +PROLOG(__mpn_sub_n) /* Save used registers on the stack. */ - INSN2(move,l ,MEM_PREDEC(sp),d2) - INSN2(move,l ,MEM_PREDEC(sp),a2) + movel R(d2),MEM_PREDEC(sp) + movel R(a2),MEM_PREDEC(sp) /* Copy the arguments to registers. Better use movem? */ - INSN2(move,l ,a2,MEM_DISP(sp,12)) - INSN2(move,l ,a0,MEM_DISP(sp,16)) - INSN2(move,l ,a1,MEM_DISP(sp,20)) - INSN2(move,l ,d2,MEM_DISP(sp,24)) - - INSN2(eor,w ,d2,#1) - INSN2(lsr,l ,d2,#1) - bcc L1 - INSN2(subq,l ,d2,#1) /* clears cy as side effect */ - -LAB(Loop) - INSN2(move,l ,d0,MEM_POSTINC(a0)) - INSN2(move,l ,d1,MEM_POSTINC(a1)) - INSN2(subx,l ,d0,d1) - INSN2(move,l ,MEM_POSTINC(a2),d0) -LAB(L1) INSN2(move,l ,d0,MEM_POSTINC(a0)) - INSN2(move,l ,d1,MEM_POSTINC(a1)) - INSN2(subx,l ,d0,d1) - INSN2(move,l ,MEM_POSTINC(a2),d0) - - dbf d2,Loop /* loop until 16 lsb of %4 == -1 */ - INSN2(subx,l ,d0,d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */ - INSN2(sub,l ,d2,#0x10000) - bcs L2 - INSN2(add,l ,d0,d0) /* restore cy */ - bra Loop - -LAB(L2) - INSN1(neg,l ,d0) + movel MEM_DISP(sp,12),R(a2) + movel MEM_DISP(sp,16),R(a0) + movel MEM_DISP(sp,20),R(a1) + movel MEM_DISP(sp,24),R(d2) + + eorw #1,R(d2) + lsrl #1,R(d2) + bcc L(L1) + subql #1,R(d2) /* clears cy as side effect */ + +L(Loop:) + movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + subxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) +L(L1:) movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + subxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) + + dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */ + subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */ + subl #0x10000,R(d2) + bcs L(L2) + addl R(d0),R(d0) /* restore cy */ + bra L(Loop) + +L(L2:) + negl R(d0) /* Restore used registers from stack frame. */ - INSN2(move,l ,a2,MEM_POSTINC(sp)) - INSN2(move,l ,d2,MEM_POSTINC(sp)) + movel MEM_POSTINC(sp),R(a2) + movel MEM_POSTINC(sp),R(d2) rts +EPILOG(__mpn_sub_n) diff --git a/sysdeps/m88k/add_n.s b/sysdeps/m88k/add_n.s index 7e4cccc..d564479 100644 --- a/sysdeps/m88k/add_n.s +++ b/sysdeps/m88k/add_n.s @@ -1,7 +1,7 @@ ; mc88100 __mpn_add -- Add two limb vectors of the same length > 0 and store ; sum in a third limb vector. -; Copyright (C) 1992, 1994 Free Software Foundation, Inc. +; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. ; This file is part of the GNU MP Library. diff --git a/sysdeps/m88k/m88110/add_n.S b/sysdeps/m88k/m88110/add_n.S new file mode 100644 index 0000000..ab20630 --- /dev/null +++ b/sysdeps/m88k/m88110/add_n.S @@ -0,0 +1,199 @@ +; mc88110 __mpn_add_n -- Add two limb vectors of the same length > 0 and store +; sum in a third limb vector. + +; Copyright (C) 1995, 1996 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +; INPUT PARAMETERS +#define res_ptr r2 +#define s1_ptr r3 +#define s2_ptr r4 +#define size r5 + +#include "sysdep.h" + + text + align 16 + global C_SYMBOL_NAME(__mpn_add_n) +C_SYMBOL_NAME(__mpn_add_n): + addu.co r0,r0,r0 ; clear cy flag + xor r12,s2_ptr,res_ptr + bb1 2,r12,L1 +; ** V1a ** +L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned? +/* Add least significant limb separately to align res_ptr and s2_ptr */ + ld r10,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + ld r8,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + subu size,size,1 + addu.co r6,r10,r8 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 +L_v1: cmp r12,size,2 + bb1 lt,r12,Lend2 + + ld r10,s1_ptr,0 + ld r12,s1_ptr,4 + ld.d r8,s2_ptr,0 + subu size,size,10 + bcnd lt0,size,Lfin1 +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop1: subu size,size,8 + addu.cio r6,r10,r8 + ld r10,s1_ptr,8 + addu.cio r7,r12,r9 + ld r12,s1_ptr,12 + ld.d r8,s2_ptr,8 + st.d r6,res_ptr,0 + addu.cio r6,r10,r8 + ld r10,s1_ptr,16 + addu.cio r7,r12,r9 + ld r12,s1_ptr,20 + ld.d r8,s2_ptr,16 + st.d r6,res_ptr,8 + addu.cio r6,r10,r8 + ld r10,s1_ptr,24 + addu.cio r7,r12,r9 + ld r12,s1_ptr,28 + ld.d r8,s2_ptr,24 + st.d r6,res_ptr,16 + addu.cio r6,r10,r8 + ld r10,s1_ptr,32 + addu.cio r7,r12,r9 + ld r12,s1_ptr,36 + addu s1_ptr,s1_ptr,32 + ld.d r8,s2_ptr,32 + addu s2_ptr,s2_ptr,32 + st.d r6,res_ptr,24 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop1 + +Lfin1: addu size,size,8-2 + bcnd lt0,size,Lend1 +/* Add blocks of 2 limbs until less than 2 limbs remain */ +Loope1: addu.cio r6,r10,r8 + ld r10,s1_ptr,8 + addu.cio r7,r12,r9 + ld r12,s1_ptr,12 + ld.d r8,s2_ptr,8 + st.d r6,res_ptr,0 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope1 +Lend1: addu.cio r6,r10,r8 + addu.cio r7,r12,r9 + st.d r6,res_ptr,0 + + bb0 0,size,Lret1 +/* Add last limb */ + ld r10,s1_ptr,8 + ld r8,s2_ptr,8 + addu.cio r6,r10,r8 + st r6,res_ptr,8 + +Lret1: jmp.n r1 + addu.ci r2,r0,r0 ; return carry-out from most sign. limb + +L1: xor r12,s1_ptr,res_ptr + bb1 2,r12,L2 +; ** V1b ** + or r12,r0,s2_ptr + or s2_ptr,r0,s1_ptr + or s1_ptr,r0,r12 + br L0 + +; ** V2 ** +/* If we come here, the alignment of s1_ptr and res_ptr as well as the + alignment of s2_ptr and res_ptr differ. Since there are only two ways + things can be aligned (that we care about) we now know that the alignment + of s1_ptr and s2_ptr are the same. */ + +L2: cmp r12,size,1 + bb1 eq,r12,Ljone + bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned +/* Add least significant limb separately to align res_ptr and s2_ptr */ + ld r10,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + ld r8,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + subu size,size,1 + addu.co r6,r10,r8 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 + +L_v2: subu size,size,8 + bcnd lt0,size,Lfin2 +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop2: subu size,size,8 + ld.d r8,s1_ptr,0 + ld.d r6,s2_ptr,0 + addu.cio r8,r8,r6 + st r8,res_ptr,0 + addu.cio r9,r9,r7 + st r9,res_ptr,4 + ld.d r8,s1_ptr,8 + ld.d r6,s2_ptr,8 + addu.cio r8,r8,r6 + st r8,res_ptr,8 + addu.cio r9,r9,r7 + st r9,res_ptr,12 + ld.d r8,s1_ptr,16 + ld.d r6,s2_ptr,16 + addu.cio r8,r8,r6 + st r8,res_ptr,16 + addu.cio r9,r9,r7 + st r9,res_ptr,20 + ld.d r8,s1_ptr,24 + ld.d r6,s2_ptr,24 + addu.cio r8,r8,r6 + st r8,res_ptr,24 + addu.cio r9,r9,r7 + st r9,res_ptr,28 + addu s1_ptr,s1_ptr,32 + addu s2_ptr,s2_ptr,32 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop2 + +Lfin2: addu size,size,8-2 + bcnd lt0,size,Lend2 +Loope2: ld.d r8,s1_ptr,0 + ld.d r6,s2_ptr,0 + addu.cio r8,r8,r6 + st r8,res_ptr,0 + addu.cio r9,r9,r7 + st r9,res_ptr,4 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope2 +Lend2: bb0 0,size,Lret2 +/* Add last limb */ +Ljone: ld r10,s1_ptr,0 + ld r8,s2_ptr,0 + addu.cio r6,r10,r8 + st r6,res_ptr,0 + +Lret2: jmp.n r1 + addu.ci r2,r0,r0 ; return carry-out from most sign. limb diff --git a/sysdeps/m88k/m88110/addmul_1.s b/sysdeps/m88k/m88110/addmul_1.s new file mode 100644 index 0000000..1a4dfa1 --- /dev/null +++ b/sysdeps/m88k/m88110/addmul_1.s @@ -0,0 +1,60 @@ +; mc88110 __mpn_addmul_1 -- Multiply a limb vector with a single limb and +; store the product in a second limb vector. + +; Copyright (C) 1996 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +; INPUT PARAMETERS +; res_ptr r2 +; s1_ptr r3 +; size r4 +; s2_limb r5 + + text + align 16 + global ___mpn_addmul_1 +___mpn_addmul_1: + lda r3,r3[r4] + lda r8,r2[r4] ; RES_PTR in r8 since r2 is retval + subu r4,r0,r4 + addu.co r2,r0,r0 ; r2 = cy = 0 + + ld r6,r3[r4] + addu r4,r4,1 + subu r8,r8,4 + bcnd.n eq0,r4,Lend + mulu.d r10,r6,r5 + +Loop: ld r7,r8[r4] + ld r6,r3[r4] + addu.cio r9,r11,r2 + addu.ci r2,r10,r0 + addu.co r9,r9,r7 + st r9,r8[r4] + addu r4,r4,1 + mulu.d r10,r6,r5 + bcnd ne0,r4,Loop + +Lend: ld r7,r8,0 + addu.cio r9,r11,r2 + addu.ci r2,r10,r0 + addu.co r9,r9,r7 + st r9,r8,0 + jmp.n r1 + addu.ci r2,r2,r0 diff --git a/sysdeps/m88k/m88110/mul_1.s b/sysdeps/m88k/m88110/mul_1.s index 08c3ca0..b1352ce 100644 --- a/sysdeps/m88k/m88110/mul_1.s +++ b/sysdeps/m88k/m88110/mul_1.s @@ -1,7 +1,7 @@ ; mc88110 __mpn_mul_1 -- Multiply a limb vector with a single limb and ; store the product in a second limb vector. -; Copyright (C) 1992, 1994 Free Software Foundation, Inc. +; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. ; This file is part of the GNU MP Library. @@ -56,29 +56,3 @@ Lend: addu.cio r9,r11,r2 st r9,r8,4 jmp.n r1 addu.ci r2,r10,r0 - -; This is the Right Way to do this on '110. 4 cycles / 64-bit limb. -; ld.d r10, -; mulu.d -; addu.cio -; addu.cio -; st.d -; mulu.d ,r11,r5 -; ld.d r12, -; mulu.d ,r10,r5 -; addu.cio -; addu.cio -; st.d -; mulu.d -; ld.d r10, -; mulu.d -; addu.cio -; addu.cio -; st.d -; mulu.d -; ld.d r10, -; mulu.d -; addu.cio -; addu.cio -; st.d -; mulu.d diff --git a/sysdeps/m88k/m88110/sub_n.S b/sysdeps/m88k/m88110/sub_n.S new file mode 100644 index 0000000..74ee0ae --- /dev/null +++ b/sysdeps/m88k/m88110/sub_n.S @@ -0,0 +1,275 @@ +; mc88110 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +; store difference in a third limb vector. + +; Copyright (C) 1995, 1996 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +; INPUT PARAMETERS +#define res_ptr r2 +#define s1_ptr r3 +#define s2_ptr r4 +#define size r5 + +#include "sysdep.h" + + text + align 16 + global C_SYMBOL_NAME(__mpn_sub_n) +C_SYMBOL_NAME(__mpn_sub_n): + subu.co r0,r0,r0 ; set cy flag + xor r12,s2_ptr,res_ptr + bb1 2,r12,L1 +; ** V1a ** +L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned +/* Add least significant limb separately to align res_ptr and s2_ptr */ + ld r10,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + ld r8,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + subu size,size,1 + subu.co r6,r10,r8 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 +L_v1: cmp r12,size,2 + bb1 lt,r12,Lend2 + + ld r10,s1_ptr,0 + ld r12,s1_ptr,4 + ld.d r8,s2_ptr,0 + subu size,size,10 + bcnd lt0,size,Lfin1 +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop1: subu size,size,8 + subu.cio r6,r10,r8 + ld r10,s1_ptr,8 + subu.cio r7,r12,r9 + ld r12,s1_ptr,12 + ld.d r8,s2_ptr,8 + st.d r6,res_ptr,0 + subu.cio r6,r10,r8 + ld r10,s1_ptr,16 + subu.cio r7,r12,r9 + ld r12,s1_ptr,20 + ld.d r8,s2_ptr,16 + st.d r6,res_ptr,8 + subu.cio r6,r10,r8 + ld r10,s1_ptr,24 + subu.cio r7,r12,r9 + ld r12,s1_ptr,28 + ld.d r8,s2_ptr,24 + st.d r6,res_ptr,16 + subu.cio r6,r10,r8 + ld r10,s1_ptr,32 + subu.cio r7,r12,r9 + ld r12,s1_ptr,36 + addu s1_ptr,s1_ptr,32 + ld.d r8,s2_ptr,32 + addu s2_ptr,s2_ptr,32 + st.d r6,res_ptr,24 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop1 + +Lfin1: addu size,size,8-2 + bcnd lt0,size,Lend1 +/* Add blocks of 2 limbs until less than 2 limbs remain */ +Loope1: subu.cio r6,r10,r8 + ld r10,s1_ptr,8 + subu.cio r7,r12,r9 + ld r12,s1_ptr,12 + ld.d r8,s2_ptr,8 + st.d r6,res_ptr,0 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope1 +Lend1: subu.cio r6,r10,r8 + subu.cio r7,r12,r9 + st.d r6,res_ptr,0 + + bb0 0,size,Lret1 +/* Add last limb */ + ld r10,s1_ptr,8 + ld r8,s2_ptr,8 + subu.cio r6,r10,r8 + st r6,res_ptr,8 + +Lret1: addu.ci r2,r0,r0 ; return carry-out from most sign. limb + jmp.n r1 + xor r2,r2,1 + +L1: xor r12,s1_ptr,res_ptr + bb1 2,r12,L2 +; ** V1b ** + bb0 2,res_ptr,L_v1b ; branch if res_ptr is aligned +/* Add least significant limb separately to align res_ptr and s1_ptr */ + ld r10,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + ld r8,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + subu size,size,1 + subu.co r6,r8,r10 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 +L_v1b: cmp r12,size,2 + bb1 lt,r12,Lend2 + + ld r10,s2_ptr,0 + ld r12,s2_ptr,4 + ld.d r8,s1_ptr,0 + subu size,size,10 + bcnd lt0,size,Lfin1b +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop1b: subu size,size,8 + subu.cio r6,r8,r10 + ld r10,s2_ptr,8 + subu.cio r7,r9,r12 + ld r12,s2_ptr,12 + ld.d r8,s1_ptr,8 + st.d r6,res_ptr,0 + subu.cio r6,r8,r10 + ld r10,s2_ptr,16 + subu.cio r7,r9,r12 + ld r12,s2_ptr,20 + ld.d r8,s1_ptr,16 + st.d r6,res_ptr,8 + subu.cio r6,r8,r10 + ld r10,s2_ptr,24 + subu.cio r7,r9,r12 + ld r12,s2_ptr,28 + ld.d r8,s1_ptr,24 + st.d r6,res_ptr,16 + subu.cio r6,r8,r10 + ld r10,s2_ptr,32 + subu.cio r7,r9,r12 + ld r12,s2_ptr,36 + addu s2_ptr,s2_ptr,32 + ld.d r8,s1_ptr,32 + addu s1_ptr,s1_ptr,32 + st.d r6,res_ptr,24 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop1b + +Lfin1b: addu size,size,8-2 + bcnd lt0,size,Lend1b +/* Add blocks of 2 limbs until less than 2 limbs remain */ +Loope1b:subu.cio r6,r8,r10 + ld r10,s2_ptr,8 + subu.cio r7,r9,r12 + ld r12,s2_ptr,12 + ld.d r8,s1_ptr,8 + st.d r6,res_ptr,0 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope1b +Lend1b: subu.cio r6,r8,r10 + subu.cio r7,r9,r12 + st.d r6,res_ptr,0 + + bb0 0,size,Lret1b +/* Add last limb */ + ld r10,s2_ptr,8 + ld r8,s1_ptr,8 + subu.cio r6,r8,r10 + st r6,res_ptr,8 + +Lret1b: addu.ci r2,r0,r0 ; return carry-out from most sign. limb + jmp.n r1 + xor r2,r2,1 + +; ** V2 ** +/* If we come here, the alignment of s1_ptr and res_ptr as well as the + alignment of s2_ptr and res_ptr differ. Since there are only two ways + things can be aligned (that we care about) we now know that the alignment + of s1_ptr and s2_ptr are the same. */ + +L2: cmp r12,size,1 + bb1 eq,r12,Ljone + bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned +/* Add least significant limb separately to align res_ptr and s2_ptr */ + ld r10,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + ld r8,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + subu size,size,1 + subu.co r6,r10,r8 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 + +L_v2: subu size,size,8 + bcnd lt0,size,Lfin2 +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop2: subu size,size,8 + ld.d r8,s1_ptr,0 + ld.d r6,s2_ptr,0 + subu.cio r8,r8,r6 + st r8,res_ptr,0 + subu.cio r9,r9,r7 + st r9,res_ptr,4 + ld.d r8,s1_ptr,8 + ld.d r6,s2_ptr,8 + subu.cio r8,r8,r6 + st r8,res_ptr,8 + subu.cio r9,r9,r7 + st r9,res_ptr,12 + ld.d r8,s1_ptr,16 + ld.d r6,s2_ptr,16 + subu.cio r8,r8,r6 + st r8,res_ptr,16 + subu.cio r9,r9,r7 + st r9,res_ptr,20 + ld.d r8,s1_ptr,24 + ld.d r6,s2_ptr,24 + subu.cio r8,r8,r6 + st r8,res_ptr,24 + subu.cio r9,r9,r7 + st r9,res_ptr,28 + addu s1_ptr,s1_ptr,32 + addu s2_ptr,s2_ptr,32 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop2 + +Lfin2: addu size,size,8-2 + bcnd lt0,size,Lend2 +Loope2: ld.d r8,s1_ptr,0 + ld.d r6,s2_ptr,0 + subu.cio r8,r8,r6 + st r8,res_ptr,0 + subu.cio r9,r9,r7 + st r9,res_ptr,4 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope2 +Lend2: bb0 0,size,Lret2 +/* Add last limb */ +Ljone: ld r10,s1_ptr,0 + ld r8,s2_ptr,0 + subu.cio r6,r10,r8 + st r6,res_ptr,0 + +Lret2: addu.ci r2,r0,r0 ; return carry-out from most sign. limb + jmp.n r1 + xor r2,r2,1 diff --git a/sysdeps/m88k/mul_1.s b/sysdeps/m88k/mul_1.s index 35c238d..6b8492c 100644 --- a/sysdeps/m88k/mul_1.s +++ b/sysdeps/m88k/mul_1.s @@ -1,7 +1,7 @@ ; mc88100 __mpn_mul_1 -- Multiply a limb vector with a single limb and ; store the product in a second limb vector. -; Copyright (C) 1992, 1994 Free Software Foundation, Inc. +; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. ; This file is part of the GNU MP Library. @@ -55,14 +55,14 @@ ___mpn_mul_1: ; Make S1_PTR and RES_PTR point at the end of their blocks ; and negate SIZE. lda r3,r3[r4] - lda r6,r2[r4] ; RES_PTR in r6 since r2 is retval + lda r6,r2[r4] ; RES_PTR in r6 since r2 is retval subu r4,r0,r4 - addu.co r2,r0,r0 ; r2 = cy = 0 + addu.co r2,r0,r0 ; r2 = cy = 0 ld r9,r3[r4] - mask r7,r5,0xffff ; r7 = lo(S2_LIMB) - extu r8,r5,16 ; r8 = hi(S2_LIMB) - bcnd.n eq0,r8,Lsmall ; jump if (hi(S2_LIMB) == 0) + mask r7,r5,0xffff ; r7 = lo(S2_LIMB) + extu r8,r5,16 ; r8 = hi(S2_LIMB) + bcnd.n eq0,r8,Lsmall ; jump if (hi(S2_LIMB) == 0) subu r6,r6,4 ; General code for any value of S2_LIMB. @@ -75,28 +75,27 @@ ___mpn_mul_1: br.n L1 addu r4,r4,1 -Loop: - ld r9,r3[r4] +Loop: ld r9,r3[r4] st r26,r6[r4] -; bcnd ne0,r0,0 ; bubble +; bcnd ne0,r0,0 ; bubble addu r4,r4,1 -L1: mul r26,r9,r5 ; low word of product mul_1 WB ld - mask r12,r9,0xffff ; r12 = lo(s1_limb) mask_1 - mul r11,r12,r7 ; r11 = prod_0 mul_2 WB mask_1 - mul r10,r12,r8 ; r10 = prod_1a mul_3 - extu r13,r9,16 ; r13 = hi(s1_limb) extu_1 WB mul_1 - mul r12,r13,r7 ; r12 = prod_1b mul_4 WB extu_1 - mul r25,r13,r8 ; r25 = prod_2 mul_5 WB mul_2 - extu r11,r11,16 ; r11 = hi(prod_0) extu_2 WB mul_3 - addu r10,r10,r11 ; addu_1 WB extu_2 -; bcnd ne0,r0,0 ; bubble WB addu_1 - addu.co r10,r10,r12 ; WB mul_4 - mask.u r10,r10,0xffff ; move the 16 most significant bits... - addu.ci r10,r10,r0 ; ...to the low half of the word... - rot r10,r10,16 ; ...and put carry in pos 16. - addu.co r26,r26,r2 ; add old carry limb +L1: mul r26,r9,r5 ; low word of product mul_1 WB ld + mask r12,r9,0xffff ; r12 = lo(s1_limb) mask_1 + mul r11,r12,r7 ; r11 = prod_0 mul_2 WB mask_1 + mul r10,r12,r8 ; r10 = prod_1a mul_3 + extu r13,r9,16 ; r13 = hi(s1_limb) extu_1 WB mul_1 + mul r12,r13,r7 ; r12 = prod_1b mul_4 WB extu_1 + mul r25,r13,r8 ; r25 = prod_2 mul_5 WB mul_2 + extu r11,r11,16 ; r11 = hi(prod_0) extu_2 WB mul_3 + addu r10,r10,r11 ; addu_1 WB extu_2 +; bcnd ne0,r0,0 ; bubble WB addu_1 + addu.co r10,r10,r12 ; WB mul_4 + mask.u r10,r10,0xffff ; move the 16 most significant bits... + addu.ci r10,r10,r0 ; ...to the low half of the word... + rot r10,r10,16 ; ...and put carry in pos 16. + addu.co r26,r26,r2 ; add old carry limb bcnd.n ne0,r4,Loop - addu.ci r2,r25,r10 ; compute new carry limb + addu.ci r2,r25,r10 ; compute new carry limb st r26,r6[r4] ld.d r25,r31,8 @@ -109,20 +108,19 @@ Lsmall: br.n SL1 addu r4,r4,1 -SLoop: - ld r9,r3[r4] ; - st r8,r6[r4] ; - addu r4,r4,1 ; -SL1: mul r8,r9,r5 ; low word of product - mask r12,r9,0xffff ; r12 = lo(s1_limb) - extu r13,r9,16 ; r13 = hi(s1_limb) - mul r11,r12,r7 ; r11 = prod_0 - mul r12,r13,r7 ; r12 = prod_1b - addu.cio r8,r8,r2 ; add old carry limb - extu r10,r11,16 ; r11 = hi(prod_0) - addu r10,r10,r12 ; +SLoop: ld r9,r3[r4] ; + st r8,r6[r4] ; + addu r4,r4,1 ; +SL1: mul r8,r9,r5 ; low word of product + mask r12,r9,0xffff ; r12 = lo(s1_limb) + extu r13,r9,16 ; r13 = hi(s1_limb) + mul r11,r12,r7 ; r11 = prod_0 + mul r12,r13,r7 ; r12 = prod_1b + addu.cio r8,r8,r2 ; add old carry limb + extu r10,r11,16 ; r11 = hi(prod_0) + addu r10,r10,r12 ; bcnd.n ne0,r4,SLoop - extu r2,r10,16 ; r2 = new carry limb + extu r2,r10,16 ; r2 = new carry limb jmp.n r1 st r8,r6[r4] diff --git a/sysdeps/m88k/sub_n.s b/sysdeps/m88k/sub_n.s index 3963cd5..cd0b791 100644 --- a/sysdeps/m88k/sub_n.s +++ b/sysdeps/m88k/sub_n.s @@ -1,7 +1,7 @@ ; mc88100 __mpn_sub -- Subtract two limb vectors of the same length > 0 and ; store difference in a third limb vector. -; Copyright (C) 1992, 1994 Free Software Foundation, Inc. +; Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. ; This file is part of the GNU MP Library. @@ -41,9 +41,10 @@ ___mpn_sub_n: extu r10,r5,3 ld r7,r4,0 ; read first limb from s2_ptr - subu.co r5,r0,r5 ; (clear carry as side effect) + subu r5,r0,r5 mak r5,r5,3<4> - bcnd eq0,r5,Lzero + bcnd.n eq0,r5,Lzero + subu.co r0,r0,r0 ; initialize carry or r12,r0,lo16(Lbase) or.u r12,r12,hi16(Lbase) diff --git a/sysdeps/mips/addmul_1.s b/sysdeps/mips/addmul_1.s index abc2fb8..917af1b 100644 --- a/sysdeps/mips/addmul_1.s +++ b/sysdeps/mips/addmul_1.s @@ -1,7 +1,7 @@ # MIPS __mpn_addmul_1 -- Multiply a limb vector with a single limb and # add the product to a second limb vector. - # Copyright (C) 1992, 1994 Free Software Foundation, Inc. + # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. # This file is part of the GNU MP Library. @@ -63,7 +63,7 @@ Loop: lw $10,0($4) addu $2,$2,$10 sw $3,0($4) addiu $4,$4,4 - bne $6,$0,Loop # should be "bnel" + bne $6,$0,Loop addu $2,$9,$2 # add high product limb and carry from addition # cool down phase 1 diff --git a/sysdeps/mips/mips3/addmul_1.s b/sysdeps/mips/mips3/addmul_1.s index 7af0172..7dbc9ad 100644 --- a/sysdeps/mips/mips3/addmul_1.s +++ b/sysdeps/mips/mips3/addmul_1.s @@ -63,7 +63,7 @@ Loop: ld $10,0($4) daddu $2,$2,$10 sd $3,0($4) daddiu $4,$4,8 - bne $6,$0,Loop # should be "bnel" + bne $6,$0,Loop daddu $2,$9,$2 # add high product limb and carry from addition # cool down phase 1 diff --git a/sysdeps/mips/mips3/mul_1.s b/sysdeps/mips/mips3/mul_1.s index 87954e5..8376a02 100644 --- a/sysdeps/mips/mips3/mul_1.s +++ b/sysdeps/mips/mips3/mul_1.s @@ -59,7 +59,7 @@ Loop: mflo $10 sltu $2,$10,$2 # carry from previous addition -> $2 sd $10,0($4) daddiu $4,$4,8 - bne $6,$0,Loop # should be "bnel" + bne $6,$0,Loop daddu $2,$9,$2 # add high product limb and carry from addition # cool down phase 1 diff --git a/sysdeps/mips/mips3/submul_1.s b/sysdeps/mips/mips3/submul_1.s index f28c6a5..f041f6c 100644 --- a/sysdeps/mips/mips3/submul_1.s +++ b/sysdeps/mips/mips3/submul_1.s @@ -63,7 +63,7 @@ Loop: ld $10,0($4) daddu $2,$2,$10 sd $3,0($4) daddiu $4,$4,8 - bne $6,$0,Loop # should be "bnel" + bne $6,$0,Loop daddu $2,$9,$2 # add high product limb and carry from addition # cool down phase 1 diff --git a/sysdeps/mips/mul_1.s b/sysdeps/mips/mul_1.s index 01327e2..6f5324c 100644 --- a/sysdeps/mips/mul_1.s +++ b/sysdeps/mips/mul_1.s @@ -1,7 +1,7 @@ # MIPS __mpn_mul_1 -- Multiply a limb vector with a single limb and # store the product in a second limb vector. - # Copyright (C) 1992, 1994 Free Software Foundation, Inc. + # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. # This file is part of the GNU MP Library. @@ -59,7 +59,7 @@ Loop: mflo $10 sltu $2,$10,$2 # carry from previous addition -> $2 sw $10,0($4) addiu $4,$4,4 - bne $6,$0,Loop # should be "bnel" + bne $6,$0,Loop addu $2,$9,$2 # add high product limb and carry from addition # cool down phase 1 diff --git a/sysdeps/mips/submul_1.s b/sysdeps/mips/submul_1.s index 616dd1b..a78072a 100644 --- a/sysdeps/mips/submul_1.s +++ b/sysdeps/mips/submul_1.s @@ -1,7 +1,7 @@ # MIPS __mpn_submul_1 -- Multiply a limb vector with a single limb and # subtract the product from a second limb vector. - # Copyright (C) 1992, 1994 Free Software Foundation, Inc. + # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. # This file is part of the GNU MP Library. @@ -63,7 +63,7 @@ Loop: lw $10,0($4) addu $2,$2,$10 sw $3,0($4) addiu $4,$4,4 - bne $6,$0,Loop # should be "bnel" + bne $6,$0,Loop addu $2,$9,$2 # add high product limb and carry from addition # cool down phase 1 diff --git a/sysdeps/rs6000/add_n.s b/sysdeps/rs6000/add_n.s index 7090cf1..e2536d5 100644 --- a/sysdeps/rs6000/add_n.s +++ b/sysdeps/rs6000/add_n.s @@ -1,6 +1,6 @@ # IBM POWER __mpn_add_n -- Add two limb vectors of equal, non-zero length. -# Copyright (C) 1992, 1994 Free Software Foundation, Inc. +# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. # This file is part of the GNU MP Library. diff --git a/sysdeps/rs6000/sub_n.s b/sysdeps/rs6000/sub_n.s index 40fe7d6..c57675b 100644 --- a/sysdeps/rs6000/sub_n.s +++ b/sysdeps/rs6000/sub_n.s @@ -1,7 +1,7 @@ # IBM POWER __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and # store difference in a third limb vector. -# Copyright (C) 1992, 1994 Free Software Foundation, Inc. +# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. # This file is part of the GNU MP Library. diff --git a/sysdeps/sparc/add_n.S b/sysdeps/sparc/add_n.S index 80c3b99..49b31fc 100644 --- a/sysdeps/sparc/add_n.S +++ b/sysdeps/sparc/add_n.S @@ -1,7 +1,7 @@ -! sparc __mpn_add_n -- Add two limb vectors of the same length > 0 and store +! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store ! sum in a third limb vector. -! Copyright (C) 1995 Free Software Foundation, Inc. +! Copyright (C) 1995, 1996 Free Software Foundation, Inc. ! This file is part of the GNU MP Library. @@ -32,18 +32,14 @@ .align 4 .global C_SYMBOL_NAME(__mpn_add_n) C_SYMBOL_NAME(__mpn_add_n): - cmp size,8 - mov 0,%o4 ! clear cy-save register - blt,a Ltriv - addcc size,-2,size xor s2_ptr,res_ptr,%g1 andcc %g1,4,%g0 bne L1 ! branch if alignment differs nop +! ** V1a ** L0: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0 - beq L_v1 ! if no, branch + be L_v1 ! if no, branch nop -! ** V1a ** /* Add least significant limb separately to align res_ptr and s2_ptr */ ld [s1_ptr],%g4 add s1_ptr,4,s1_ptr @@ -53,12 +49,15 @@ L0: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0 addcc %g4,%g2,%o4 st %o4,[res_ptr] add res_ptr,4,res_ptr +L_v1: addx %g0,%g0,%o4 ! save cy in register + cmp size,2 ! if size < 2 ... + bl Lend2 ! ... branch to tail code + subcc %g0,%o4,%g0 ! restore cy -L_v1: ld [s1_ptr+0],%g4 + ld [s1_ptr+0],%g4 + addcc size,-10,size ld [s1_ptr+4],%g1 ldd [s2_ptr+0],%g2 - addx %g0,%g0,%o4 ! save cy in register - addcc size,-10,size blt Lfin1 subcc %g0,%o4,%g0 ! restore cy /* Add blocks of 8 limbs until less than 8 limbs remain */ @@ -98,7 +97,7 @@ Lfin1: addcc size,8-2,size blt Lend1 subcc %g0,%o4,%g0 ! restore cy /* Add blocks of 2 limbs until less than 2 limbs remain */ -Loop1b: addxcc %g4,%g2,%o4 +Loope1: addxcc %g4,%g2,%o4 ld [s1_ptr+8],%g4 addxcc %g1,%g3,%o5 ld [s1_ptr+12],%g1 @@ -109,7 +108,7 @@ Loop1b: addxcc %g4,%g2,%o4 add s1_ptr,8,s1_ptr add s2_ptr,8,s2_ptr add res_ptr,8,res_ptr - bge Loop1b + bge Loope1 subcc %g0,%o4,%g0 ! restore cy Lend1: addxcc %g4,%g2,%o4 addxcc %g1,%g3,%o5 @@ -144,10 +143,13 @@ L1: xor s1_ptr,res_ptr,%g1 things can be aligned (that we care about) we now know that the alignment of s1_ptr and s2_ptr are the same. */ -L2: andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0 - beq L_v2 ! if no, branch +L2: cmp size,1 + be Ljone nop -/* Add least significant limb separately to align res_ptr and s2_ptr */ + andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0 + be L_v2 ! if no, branch + nop +/* Add least significant limb separately to align s1_ptr and s2_ptr */ ld [s1_ptr],%g4 add s1_ptr,4,s1_ptr ld [s2_ptr],%g2 @@ -195,9 +197,9 @@ Loop2: ldd [s1_ptr+0],%g2 subcc %g0,%o4,%g0 ! restore cy Lfin2: addcc size,8-2,size -Ltriv: blt Lend2 + blt Lend2 subcc %g0,%o4,%g0 ! restore cy -Loop2b: ldd [s1_ptr+0],%g2 +Loope2: ldd [s1_ptr+0],%g2 ldd [s2_ptr+0],%o4 addxcc %g2,%o4,%g2 st %g2,[res_ptr+0] @@ -208,13 +210,13 @@ Loop2b: ldd [s1_ptr+0],%g2 add s1_ptr,8,s1_ptr add s2_ptr,8,s2_ptr add res_ptr,8,res_ptr - bge Loop2b + bge Loope2 subcc %g0,%o4,%g0 ! restore cy Lend2: andcc size,1,%g0 be Lret2 subcc %g0,%o4,%g0 ! restore cy /* Add last limb */ - ld [s1_ptr],%g4 +Ljone: ld [s1_ptr],%g4 ld [s2_ptr],%g2 addxcc %g4,%g2,%o4 st %o4,[res_ptr] diff --git a/sysdeps/sparc/lshift.S b/sysdeps/sparc/lshift.S index 497272a..6844fa2 100644 --- a/sysdeps/sparc/lshift.S +++ b/sysdeps/sparc/lshift.S @@ -1,6 +1,6 @@ ! sparc __mpn_lshift -- -! Copyright (C) 1995 Free Software Foundation, Inc. +! Copyright (C) 1995, 1996 Free Software Foundation, Inc. ! This file is part of the GNU MP Library. @@ -39,7 +39,7 @@ C_SYMBOL_NAME(__mpn_lshift): add %o2,-1,%o2 andcc %o2,4-1,%g4 ! number of limbs in first loop srl %g2,%o5,%g1 ! compute function result - beq L0 ! if multiple of 4 limbs, skip first loop + be L0 ! if multiple of 4 limbs, skip first loop st %g1,[%sp+80] sub %o2,%g4,%o2 ! adjust count for main loop @@ -56,7 +56,7 @@ Loop0: ld [%o1-8],%g3 st %o4,[%o0+0] L0: tst %o2 - beq Lend + be Lend nop Loop: ld [%o1-8],%g3 diff --git a/sysdeps/sparc/rshift.S b/sysdeps/sparc/rshift.S index 3428cfe..5a47926 100644 --- a/sysdeps/sparc/rshift.S +++ b/sysdeps/sparc/rshift.S @@ -1,6 +1,6 @@ ! sparc __mpn_rshift -- -! Copyright (C) 1995 Free Software Foundation, Inc. +! Copyright (C) 1995, 1996 Free Software Foundation, Inc. ! This file is part of the GNU MP Library. @@ -36,7 +36,7 @@ C_SYMBOL_NAME(__mpn_rshift): add %o2,-1,%o2 andcc %o2,4-1,%g4 ! number of limbs in first loop sll %g2,%o5,%g1 ! compute function result - beq L0 ! if multiple of 4 limbs, skip first loop + be L0 ! if multiple of 4 limbs, skip first loop st %g1,[%sp+80] sub %o2,%g4,%o2 ! adjust count for main loop @@ -53,7 +53,7 @@ Loop0: ld [%o1+4],%g3 st %o4,[%o0-4] L0: tst %o2 - beq Lend + be Lend nop Loop: ld [%o1+4],%g3 diff --git a/sysdeps/sparc/sparc64/add_n.s b/sysdeps/sparc/sparc64/add_n.s new file mode 100644 index 0000000..104a89e --- /dev/null +++ b/sysdeps/sparc/sparc64/add_n.s @@ -0,0 +1,57 @@ +! SPARC v9 __mpn_add_n -- Add two limb vectors of the same length > 0 and store +! sum in a third limb vector. + +! Copyright (C) 1995, 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +! INPUT PARAMETERS +! res_ptr %o0 +! s1_ptr %o1 +! s2_ptr %o2 +! size %o3 + +.section ".text" + .align 4 + .global __mpn_add_n + .type __mpn_add_n,#function + .proc 04 +__mpn_add_n: + sub %g0,%o3,%g3 + sllx %o3,3,%g1 + add %o1,%g1,%o1 ! make s1_ptr point at end + add %o2,%g1,%o2 ! make s2_ptr point at end + add %o0,%g1,%o0 ! make res_ptr point at end + mov 0,%o4 ! clear carry variable + sllx %g3,3,%o5 ! compute initial address index + +.Loop: ldx [%o2+%o5],%g1 ! load s2 limb + add %g3,1,%g3 ! increment loop count + ldx [%o1+%o5],%g2 ! load s1 limb + addcc %g1,%o4,%g1 ! add s2 limb and carry variable + movcc %xcc,0,%o4 ! if carry-out, o4 was 1; clear it + addcc %g1,%g2,%g1 ! add s1 limb to sum + stx %g1,[%o0+%o5] ! store result + add %o5,8,%o5 ! increment address index + brnz,pt %g3,.Loop + movcs %xcc,1,%o4 ! if s1 add gave carry, record it + + retl + mov %o4,%o0 +.LLfe1: + .size __mpn_add_n,.LLfe1-__mpn_add_n diff --git a/sysdeps/sparc/sparc64/addmul_1.s b/sysdeps/sparc/sparc64/addmul_1.s new file mode 100644 index 0000000..ef013ee --- /dev/null +++ b/sysdeps/sparc/sparc64/addmul_1.s @@ -0,0 +1,88 @@ +! SPARC v9 __mpn_addmul_1 -- Multiply a limb vector with a single limb and +! add the product to a second limb vector. + +! Copyright (C) 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +.section ".text" + .align 4 + .global __mpn_addmul_1 + .type __mpn_addmul_1,#function + .proc 016 +__mpn_addmul_1: + !#PROLOGUE# 0 + save %sp,-160,%sp + !#PROLOGUE# 1 + sub %g0,%i2,%o7 + sllx %o7,3,%g5 + sub %i1,%g5,%o3 + sub %i0,%g5,%o4 + mov 0,%o0 ! zero cy_limb + + srl %i3,0,%o1 ! extract low 32 bits of s2_limb + srlx %i3,32,%i3 ! extract high 32 bits of s2_limb + mov 1,%o2 + sllx %o2,32,%o2 ! o2 = 0x100000000 + + ! hi ! + ! mid-1 ! + ! mid-2 ! + ! lo ! +.Loop: + sllx %o7,3,%g1 + ldx [%o3+%g1],%g5 + srl %g5,0,%i0 ! zero hi bits + srlx %g5,32,%g5 + mulx %o1,%i0,%i4 ! lo product + mulx %i3,%i0,%i1 ! mid-1 product + mulx %o1,%g5,%l2 ! mid-2 product + mulx %i3,%g5,%i5 ! hi product + srlx %i4,32,%i0 ! extract high 32 bits of lo product... + add %i1,%i0,%i1 ! ...and add it to the mid-1 product + addcc %i1,%l2,%i1 ! add mid products + mov 0,%l0 ! we need the carry from that add... + movcs %xcc,%o2,%l0 ! ...compute it and... + add %i5,%l0,%i5 ! ...add to bit 32 of the hi product + sllx %i1,32,%i0 ! align low bits of mid product + srl %i4,0,%g5 ! zero high 32 bits of lo product + add %i0,%g5,%i0 ! combine into low 64 bits of result + srlx %i1,32,%i1 ! extract high bits of mid product... + add %i5,%i1,%i1 ! ...and add them to the high result + addcc %i0,%o0,%i0 ! add cy_limb to low 64 bits of result + mov 0,%g5 + movcs %xcc,1,%g5 + add %o7,1,%o7 + ldx [%o4+%g1],%l1 + addcc %l1,%i0,%i0 + movcs %xcc,1,%g5 + stx %i0,[%o4+%g1] + brnz %o7,.Loop + add %i1,%g5,%o0 ! compute new cy_limb + + mov %o0,%i0 + ret + restore +.LLfe1: + .size __mpn_addmul_1,.LLfe1-__mpn_addmul_1 diff --git a/sysdeps/sparc/sparc64/gmp-mparam.h b/sysdeps/sparc/sparc64/gmp-mparam.h new file mode 100644 index 0000000..05c893f --- /dev/null +++ b/sysdeps/sparc/sparc64/gmp-mparam.h @@ -0,0 +1,26 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 +#define BITS_PER_LONGINT 64 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 diff --git a/sysdeps/sparc/sparc64/lshift.s b/sysdeps/sparc/sparc64/lshift.s new file mode 100644 index 0000000..bd7fa01 --- /dev/null +++ b/sysdeps/sparc/sparc64/lshift.s @@ -0,0 +1,95 @@ +! SPARC v9 __mpn_lshift -- + +! Copyright (C) 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +! INPUT PARAMETERS +! res_ptr %o0 +! src_ptr %o1 +! size %o2 +! cnt %o3 + +.section ".text" + .align 4 + .global __mpn_lshift + .type __mpn_lshift,#function + .proc 04 +__mpn_lshift: + sllx %o2,3,%g1 + add %o1,%g1,%o1 ! make %o1 point at end of src + ldx [%o1-8],%g2 ! load first limb + sub %g0,%o3,%o5 ! negate shift count + add %o0,%g1,%o0 ! make %o0 point at end of res + add %o2,-1,%o2 + and %o2,4-1,%g4 ! number of limbs in first loop + srlx %g2,%o5,%g1 ! compute function result + brz,pn %g4,.L0 ! if multiple of 4 limbs, skip first loop + stx %g1,[%sp+80] + + sub %o2,%g4,%o2 ! adjust count for main loop + +.Loop0: ldx [%o1-16],%g3 + add %o0,-8,%o0 + add %o1,-8,%o1 + add %g4,-1,%g4 + sllx %g2,%o3,%o4 + srlx %g3,%o5,%g1 + mov %g3,%g2 + or %o4,%g1,%o4 + brnz,pt %g4,.Loop0 + stx %o4,[%o0+0] + +.L0: brz,pn %o2,.Lend + nop + +.Loop: ldx [%o1-16],%g3 + add %o0,-32,%o0 + add %o2,-4,%o2 + sllx %g2,%o3,%o4 + srlx %g3,%o5,%g1 + + ldx [%o1-24],%g2 + sllx %g3,%o3,%g4 + or %o4,%g1,%o4 + stx %o4,[%o0+24] + srlx %g2,%o5,%g1 + + ldx [%o1-32],%g3 + sllx %g2,%o3,%o4 + or %g4,%g1,%g4 + stx %g4,[%o0+16] + srlx %g3,%o5,%g1 + + ldx [%o1-40],%g2 + sllx %g3,%o3,%g4 + or %o4,%g1,%o4 + stx %o4,[%o0+8] + srlx %g2,%o5,%g1 + + add %o1,-32,%o1 + or %g4,%g1,%g4 + brnz,pt %o2,.Loop + stx %g4,[%o0+0] + +.Lend: sllx %g2,%o3,%g2 + stx %g2,[%o0-8] + retl + ldx [%sp+80],%o0 +.LLfe1: + .size __mpn_lshift,.LLfe1-__mpn_lshift diff --git a/sysdeps/sparc/sparc64/mul_1.s b/sysdeps/sparc/sparc64/mul_1.s new file mode 100644 index 0000000..41be370 --- /dev/null +++ b/sysdeps/sparc/sparc64/mul_1.s @@ -0,0 +1,85 @@ +! SPARC v9 __mpn_mul_1 -- Multiply a limb vector with a single limb and +! store the product in a second limb vector. + +! Copyright (C) 1995, 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +.section ".text" + .align 4 + .global __mpn_mul_1 + .type __mpn_mul_1,#function + .proc 016 +__mpn_mul_1: + !#PROLOGUE# 0 + save %sp,-160,%sp + !#PROLOGUE# 1 + sub %g0,%i2,%o7 + sllx %o7,3,%g5 + sub %i1,%g5,%o3 + sub %i0,%g5,%o4 + mov 0,%o0 ! zero cy_limb + + srl %i3,0,%o1 ! extract low 32 bits of s2_limb + srlx %i3,32,%i3 ! extract high 32 bits of s2_limb + mov 1,%o2 + sllx %o2,32,%o2 ! o2 = 0x100000000 + + ! hi ! + ! mid-1 ! + ! mid-2 ! + ! lo ! +.Loop: + sllx %o7,3,%g1 + ldx [%o3+%g1],%g5 + srl %g5,0,%i0 ! zero hi bits + srlx %g5,32,%g5 + mulx %o1,%i0,%i4 ! lo product + mulx %i3,%i0,%i1 ! mid-1 product + mulx %o1,%g5,%l2 ! mid-2 product + mulx %i3,%g5,%i5 ! hi product + srlx %i4,32,%i0 ! extract high 32 bits of lo product... + add %i1,%i0,%i1 ! ...and add it to the mid-1 product + addcc %i1,%l2,%i1 ! add mid products + mov 0,%l0 ! we need the carry from that add... + movcs %xcc,%o2,%l0 ! ...compute it and... + add %i5,%l0,%i5 ! ...add to bit 32 of the hi product + sllx %i1,32,%i0 ! align low bits of mid product + srl %i4,0,%g5 ! zero high 32 bits of lo product + add %i0,%g5,%i0 ! combine into low 64 bits of result + srlx %i1,32,%i1 ! extract high bits of mid product... + add %i5,%i1,%i1 ! ...and add them to the high result + addcc %i0,%o0,%i0 ! add cy_limb to low 64 bits of result + mov 0,%g5 + movcs %xcc,1,%g5 + add %o7,1,%o7 + stx %i0,[%o4+%g1] + brnz %o7,.Loop + add %i1,%g5,%o0 ! compute new cy_limb + + mov %o0,%i0 + ret + restore +.LLfe1: + .size __mpn_mul_1,.LLfe1-__mpn_mul_1 diff --git a/sysdeps/sparc/sparc64/rshift.s b/sysdeps/sparc/sparc64/rshift.s new file mode 100644 index 0000000..971deec --- /dev/null +++ b/sysdeps/sparc/sparc64/rshift.s @@ -0,0 +1,92 @@ +! SPARC v9 __mpn_rshift -- + +! Copyright (C) 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +! INPUT PARAMETERS +! res_ptr %o0 +! src_ptr %o1 +! size %o2 +! cnt %o3 + +.section ".text" + .align 4 + .global __mpn_rshift + .type __mpn_rshift,#function + .proc 04 +__mpn_rshift: + ldx [%o1],%g2 ! load first limb + sub %g0,%o3,%o5 ! negate shift count + add %o2,-1,%o2 + and %o2,4-1,%g4 ! number of limbs in first loop + sllx %g2,%o5,%g1 ! compute function result + brz,pn %g4,.L0 ! if multiple of 4 limbs, skip first loop + stx %g1,[%sp+80] + + sub %o2,%g4,%o2 ! adjust count for main loop + +.Loop0: ldx [%o1+8],%g3 + add %o0,8,%o0 + add %o1,8,%o1 + add %g4,-1,%g4 + srlx %g2,%o3,%o4 + sllx %g3,%o5,%g1 + mov %g3,%g2 + or %o4,%g1,%o4 + brnz,pt %g4,.Loop0 + stx %o4,[%o0-8] + +.L0: brz,pn %o2,.Lend + nop + +.Loop: ldx [%o1+8],%g3 + add %o0,32,%o0 + add %o2,-4,%o2 + srlx %g2,%o3,%o4 + sllx %g3,%o5,%g1 + + ldx [%o1+16],%g2 + srlx %g3,%o3,%g4 + or %o4,%g1,%o4 + stx %o4,[%o0-32] + sllx %g2,%o5,%g1 + + ldx [%o1+24],%g3 + srlx %g2,%o3,%o4 + or %g4,%g1,%g4 + stx %g4,[%o0-24] + sllx %g3,%o5,%g1 + + ldx [%o1+32],%g2 + srlx %g3,%o3,%g4 + or %o4,%g1,%o4 + stx %o4,[%o0-16] + sllx %g2,%o5,%g1 + + add %o1,32,%o1 + or %g4,%g1,%g4 + brnz %o2,.Loop + stx %g4,[%o0-8] + +.Lend: srlx %g2,%o3,%g2 + stx %g2,[%o0-0] + retl + ldx [%sp+80],%o0 +.LLfe1: + .size __mpn_rshift,.LLfe1-__mpn_rshift diff --git a/sysdeps/sparc/sparc64/sub_n.s b/sysdeps/sparc/sparc64/sub_n.s new file mode 100644 index 0000000..7099bf4 --- /dev/null +++ b/sysdeps/sparc/sparc64/sub_n.s @@ -0,0 +1,57 @@ +! SPARC v9 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +! store difference in a third limb vector. + +! Copyright (C) 1995, 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +! INPUT PARAMETERS +! res_ptr %o0 +! s1_ptr %o1 +! s2_ptr %o2 +! size %o3 + +.section ".text" + .align 4 + .global __mpn_sub_n + .type __mpn_sub_n,#function + .proc 04 +__mpn_sub_n: + sub %g0,%o3,%g3 + sllx %o3,3,%g1 + add %o1,%g1,%o1 ! make s1_ptr point at end + add %o2,%g1,%o2 ! make s2_ptr point at end + add %o0,%g1,%o0 ! make res_ptr point at end + mov 0,%o4 ! clear carry variable + sllx %g3,3,%o5 ! compute initial address index + +.Loop: ldx [%o2+%o5],%g1 ! load s2 limb + add %g3,1,%g3 ! increment loop count + ldx [%o1+%o5],%g2 ! load s1 limb + addcc %g1,%o4,%g1 ! add s2 limb and carry variable + movcc %xcc,0,%o4 ! if carry-out, o4 was 1; clear it + subcc %g1,%g2,%g1 ! subtract s1 limb from sum + stx %g1,[%o0+%o5] ! store result + add %o5,8,%o5 ! increment address index + brnz,pt %g3,.Loop + movcs %xcc,1,%o4 ! if s1 subtract gave carry, record it + + retl + mov %o4,%o0 +.LLfe1: + .size __mpn_sub_n,.LLfe1-__mpn_sub_n diff --git a/sysdeps/sparc/sparc64/submul_1.s b/sysdeps/sparc/sparc64/submul_1.s new file mode 100644 index 0000000..f0df38c --- /dev/null +++ b/sysdeps/sparc/sparc64/submul_1.s @@ -0,0 +1,88 @@ +! SPARC v9 __mpn_submul_1 -- Multiply a limb vector with a single limb and +! subtract the product from a second limb vector. + +! Copyright (C) 1996 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Library General Public License as published by +! the Free Software Foundation; either version 2 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +! License for more details. + +! You should have received a copy of the GNU Library General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +.section ".text" + .align 4 + .global __mpn_submul_1 + .type __mpn_submul_1,#function + .proc 016 +__mpn_submul_1: + !#PROLOGUE# 0 + save %sp,-160,%sp + !#PROLOGUE# 1 + sub %g0,%i2,%o7 + sllx %o7,3,%g5 + sub %i1,%g5,%o3 + sub %i0,%g5,%o4 + mov 0,%o0 ! zero cy_limb + + srl %i3,0,%o1 ! extract low 32 bits of s2_limb + srlx %i3,32,%i3 ! extract high 32 bits of s2_limb + mov 1,%o2 + sllx %o2,32,%o2 ! o2 = 0x100000000 + + ! hi ! + ! mid-1 ! + ! mid-2 ! + ! lo ! +.Loop: + sllx %o7,3,%g1 + ldx [%o3+%g1],%g5 + srl %g5,0,%i0 ! zero hi bits + srlx %g5,32,%g5 + mulx %o1,%i0,%i4 ! lo product + mulx %i3,%i0,%i1 ! mid-1 product + mulx %o1,%g5,%l2 ! mid-2 product + mulx %i3,%g5,%i5 ! hi product + srlx %i4,32,%i0 ! extract high 32 bits of lo product... + add %i1,%i0,%i1 ! ...and add it to the mid-1 product + addcc %i1,%l2,%i1 ! add mid products + mov 0,%l0 ! we need the carry from that add... + movcs %xcc,%o2,%l0 ! ...compute it and... + add %i5,%l0,%i5 ! ...add to bit 32 of the hi product + sllx %i1,32,%i0 ! align low bits of mid product + srl %i4,0,%g5 ! zero high 32 bits of lo product + add %i0,%g5,%i0 ! combine into low 64 bits of result + srlx %i1,32,%i1 ! extract high bits of mid product... + add %i5,%i1,%i1 ! ...and add them to the high result + addcc %i0,%o0,%i0 ! add cy_limb to low 64 bits of result + mov 0,%g5 + movcs %xcc,1,%g5 + add %o7,1,%o7 + ldx [%o4+%g1],%l1 + subcc %l1,%i0,%i0 + movcs %xcc,1,%g5 + stx %i0,[%o4+%g1] + brnz %o7,.Loop + add %i1,%g5,%o0 ! compute new cy_limb + + mov %o0,%i0 + ret + restore +.LLfe1: + .size __mpn_submul_1,.LLfe1-__mpn_submul_1 diff --git a/sysdeps/sparc/sparc8/addmul_1.S b/sysdeps/sparc/sparc8/addmul_1.S index d1de0c3..1cf5128 100644 --- a/sysdeps/sparc/sparc8/addmul_1.S +++ b/sysdeps/sparc/sparc8/addmul_1.S @@ -1,7 +1,7 @@ ! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and ! add the result to a second limb vector. -! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. +! Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc. ! This file is part of the GNU MP Library. diff --git a/sysdeps/sparc/sparc8/mul_1.S b/sysdeps/sparc/sparc8/mul_1.S index 42717be..d56394e 100644 --- a/sysdeps/sparc/sparc8/mul_1.S +++ b/sysdeps/sparc/sparc8/mul_1.S @@ -1,7 +1,7 @@ ! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and ! store the product in a second limb vector. -! Copyright (C) 1992, 1994 Free Software Foundation, Inc. +! Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. ! This file is part of the GNU MP Library. diff --git a/sysdeps/sparc/sub_n.S b/sysdeps/sparc/sub_n.S index 2e217ed..9ff5b7b 100644 --- a/sysdeps/sparc/sub_n.S +++ b/sysdeps/sparc/sub_n.S @@ -1,7 +1,7 @@ -! sparc __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and ! store difference in a third limb vector. -! Copyright (C) 1995 Free Software Foundation, Inc. +! Copyright (C) 1995, 1996 Free Software Foundation, Inc. ! This file is part of the GNU MP Library. @@ -38,7 +38,7 @@ C_SYMBOL_NAME(__mpn_sub_n): nop ! ** V1a ** andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0 - beq L_v1 ! if no, branch + be L_v1 ! if no, branch nop /* Add least significant limb separately to align res_ptr and s2_ptr */ ld [s1_ptr],%g4 @@ -133,9 +133,9 @@ L1: xor s1_ptr,res_ptr,%g1 nop ! ** V1b ** andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0 - beq L_v1b ! if no, branch + be L_v1b ! if no, branch nop -/* Add least significant limb separately to align res_ptr and s2_ptr */ +/* Add least significant limb separately to align res_ptr and s1_ptr */ ld [s2_ptr],%g4 add s2_ptr,4,s2_ptr ld [s1_ptr],%g2 @@ -232,7 +232,7 @@ L2: cmp size,1 be Ljone nop andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0 - beq L_v2 ! if no, branch + be L_v2 ! if no, branch nop /* Add least significant limb separately to align s1_ptr and s2_ptr */ ld [s1_ptr],%g4 diff --git a/sysdeps/unix/sysv/linux/m68k/profil-counter.h b/sysdeps/unix/sysv/linux/m68k/profil-counter.h new file mode 100644 index 0000000..4e7b132 --- /dev/null +++ b/sysdeps/unix/sysv/linux/m68k/profil-counter.h @@ -0,0 +1,24 @@ +/* Machine-dependent SIGPROF signal handler. Linux/m68k version. +Copyright (C) 1996 Free Software Foundation, Inc. +This file is part of the GNU C Library. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 675 Mass Ave, +Cambridge, MA 02139, USA. */ + +static void +profil_counter (int signr, int code, struct sigcontext *scp) +{ + profil_count ((void *) scp->sc_pc); +} diff --git a/sysdeps/vax/gmp-mparam.h b/sysdeps/vax/gmp-mparam.h index 687f12a..ddc308a 100644 --- a/sysdeps/vax/gmp-mparam.h +++ b/sysdeps/vax/gmp-mparam.h @@ -1,6 +1,6 @@ /* gmp-mparam.h -- Compiler/machine parameter header file. -Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. This file is part of the GNU MP Library. diff --git a/sysdeps/z8000/mul_1.s b/sysdeps/z8000/mul_1.s index 2075225..0150e85 100644 --- a/sysdeps/z8000/mul_1.s +++ b/sysdeps/z8000/mul_1.s @@ -1,7 +1,7 @@ ! Z8000 __mpn_mul_1 -- Multiply a limb vector with a limb and store ! the result in a second limb vector. -! Copyright (C) 1993, 1994 Free Software Foundation, Inc. +! Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc. ! This file is part of the GNU MP Library. -- cgit v1.1