aboutsummaryrefslogtreecommitdiff
path: root/sysdeps
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/alpha/addmul_1.s23
-rw-r--r--sysdeps/alpha/alphaev5/add_n.s175
-rw-r--r--sysdeps/alpha/alphaev5/lshift.s30
-rw-r--r--sysdeps/alpha/alphaev5/rshift.s28
-rw-r--r--sysdeps/alpha/alphaev5/sub_n.s148
-rw-r--r--sysdeps/alpha/lshift.s14
-rw-r--r--sysdeps/alpha/mul_1.s2
-rw-r--r--sysdeps/alpha/rshift.s16
-rw-r--r--sysdeps/alpha/submul_1.s23
-rw-r--r--sysdeps/alpha/udiv_qrnnd.S34
-rw-r--r--sysdeps/generic/add_n.c8
-rw-r--r--sysdeps/generic/addmul_1.c6
-rw-r--r--sysdeps/generic/cmp.c8
-rw-r--r--sysdeps/generic/divmod_1.c8
-rw-r--r--sysdeps/generic/lshift.c8
-rw-r--r--sysdeps/generic/mod_1.c8
-rw-r--r--sysdeps/generic/mul.c36
-rw-r--r--sysdeps/generic/mul_1.c6
-rw-r--r--sysdeps/generic/mul_n.c110
-rw-r--r--sysdeps/generic/rshift.c8
-rw-r--r--sysdeps/generic/sub_n.c8
-rw-r--r--sysdeps/generic/submul_1.c6
-rw-r--r--sysdeps/i386/gmp-mparam.h2
-rw-r--r--sysdeps/i386/i586/add_n.S133
-rw-r--r--sysdeps/i386/i586/addmul_1.S34
-rw-r--r--sysdeps/i386/i586/mul_1.S20
-rw-r--r--sysdeps/i386/i586/sub_n.S133
-rw-r--r--sysdeps/i386/i586/submul_1.S16
-rw-r--r--sysdeps/m68k/add_n.S75
-rw-r--r--sysdeps/m68k/lshift.S150
-rw-r--r--sysdeps/m68k/m68020/addmul_1.S75
-rw-r--r--sysdeps/m68k/m68020/mul_1.S83
-rw-r--r--sysdeps/m68k/m68020/submul_1.S75
-rw-r--r--sysdeps/m68k/rshift.S149
-rw-r--r--sysdeps/m68k/sub_n.S75
-rw-r--r--sysdeps/m88k/add_n.s2
-rw-r--r--sysdeps/m88k/m88110/add_n.S199
-rw-r--r--sysdeps/m88k/m88110/addmul_1.s60
-rw-r--r--sysdeps/m88k/m88110/mul_1.s28
-rw-r--r--sysdeps/m88k/m88110/sub_n.S275
-rw-r--r--sysdeps/m88k/mul_1.s74
-rw-r--r--sysdeps/m88k/sub_n.s7
-rw-r--r--sysdeps/mips/addmul_1.s4
-rw-r--r--sysdeps/mips/mips3/addmul_1.s2
-rw-r--r--sysdeps/mips/mips3/mul_1.s2
-rw-r--r--sysdeps/mips/mips3/submul_1.s2
-rw-r--r--sysdeps/mips/mul_1.s4
-rw-r--r--sysdeps/mips/submul_1.s4
-rw-r--r--sysdeps/rs6000/add_n.s2
-rw-r--r--sysdeps/rs6000/sub_n.s2
-rw-r--r--sysdeps/sparc/add_n.S42
-rw-r--r--sysdeps/sparc/lshift.S6
-rw-r--r--sysdeps/sparc/rshift.S6
-rw-r--r--sysdeps/sparc/sparc64/add_n.s57
-rw-r--r--sysdeps/sparc/sparc64/addmul_1.s88
-rw-r--r--sysdeps/sparc/sparc64/gmp-mparam.h26
-rw-r--r--sysdeps/sparc/sparc64/lshift.s95
-rw-r--r--sysdeps/sparc/sparc64/mul_1.s85
-rw-r--r--sysdeps/sparc/sparc64/rshift.s92
-rw-r--r--sysdeps/sparc/sparc64/sub_n.s57
-rw-r--r--sysdeps/sparc/sparc64/submul_1.s88
-rw-r--r--sysdeps/sparc/sparc8/addmul_1.S2
-rw-r--r--sysdeps/sparc/sparc8/mul_1.S2
-rw-r--r--sysdeps/sparc/sub_n.S12
-rw-r--r--sysdeps/unix/sysv/linux/m68k/profil-counter.h24
-rw-r--r--sysdeps/vax/gmp-mparam.h2
-rw-r--r--sysdeps/z8000/mul_1.s2
67 files changed, 2329 insertions, 757 deletions
diff --git a/sysdeps/alpha/addmul_1.s b/sysdeps/alpha/addmul_1.s
index 46d277d..8b168cb 100644
--- a/sysdeps/alpha/addmul_1.s
+++ b/sysdeps/alpha/addmul_1.s
@@ -26,16 +26,7 @@
# size r18
# s2_limb r19
- # This code runs at 42 cycles/limb on the 21064.
-
- # To improve performance for long multiplications, we would use
- # 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use
- # these instructions without slowing down the general code: 1. We can
- # only have two prefetches in operation at any time in the Alpha
- # architecture. 2. There will seldom be any special alignment
- # between RES_PTR and S1_PTR. Maybe we can simply divide the current
- # loop into an inner and outer loop, having the inner loop handle
- # exactly one prefetch block?
+ # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
.set noreorder
.set noat
@@ -52,7 +43,7 @@ __mpn_addmul_1:
mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
umulh $2,$19,$0 # $0 = prod_high
- beq $18,Lend1 # jump if size was == 1
+ beq $18,.Lend1 # jump if size was == 1
ldq $2,0($17) # $2 = s1_limb
addq $17,8,$17 # s1_ptr++
subq $18,1,$18 # size--
@@ -60,10 +51,10 @@ __mpn_addmul_1:
cmpult $3,$5,$4
stq $3,0($16)
addq $16,8,$16 # res_ptr++
- beq $18,Lend2 # jump if size was == 2
+ beq $18,.Lend2 # jump if size was == 2
.align 3
-Loop: mulq $2,$19,$3 # $3 = prod_low
+.Loop: mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
subq $18,1,$18 # size--
@@ -77,9 +68,9 @@ Loop: mulq $2,$19,$3 # $3 = prod_low
stq $3,0($16)
addq $16,8,$16 # res_ptr++
addq $5,$0,$0 # combine carries
- bne $18,Loop
+ bne $18,.Loop
-Lend2: mulq $2,$19,$3 # $3 = prod_low
+.Lend2: mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
umulh $2,$19,$4 # $4 = cy_limb
@@ -91,7 +82,7 @@ Lend2: mulq $2,$19,$3 # $3 = prod_low
addq $5,$0,$0 # combine carries
addq $4,$0,$0 # cy_limb = prod_high + cy
ret $31,($26),1
-Lend1: addq $5,$3,$3
+.Lend1: addq $5,$3,$3
cmpult $3,$5,$5
stq $3,0($16)
addq $0,$5,$0
diff --git a/sysdeps/alpha/alphaev5/add_n.s b/sysdeps/alpha/alphaev5/add_n.s
index 2aaf041..66cf82b 100644
--- a/sysdeps/alpha/alphaev5/add_n.s
+++ b/sysdeps/alpha/alphaev5/add_n.s
@@ -35,84 +35,113 @@
__mpn_add_n:
.frame $30,0,$26,0
- ldq $3,0($17)
- ldq $4,0($18)
-
- subq $19,1,$19
- and $19,4-1,$2 # number of limbs in first loop
- bis $31,$31,$0
- beq $2,.L0 # if multiple of 4 limbs, skip first loop
-
- subq $19,$2,$19
-
-.Loop0: subq $2,1,$2
+ or $31,$31,$25 # clear cy
+ subq $19,4,$19 # decr loop cnt
+ blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop
+ # Start software pipeline for 1st loop
+ ldq $0,0($18)
+ ldq $1,8($18)
+ ldq $4,0($17)
ldq $5,8($17)
- addq $4,$0,$4
- ldq $6,8($18)
- cmpult $4,$0,$1
- addq $3,$4,$4
- cmpult $4,$3,$0
- stq $4,0($16)
- or $0,$1,$0
-
- addq $17,8,$17
- addq $18,8,$18
- bis $5,$5,$3
- bis $6,$6,$4
- addq $16,8,$16
- bne $2,.Loop0
-
-.L0: beq $19,.Lend
-
+ addq $17,32,$17 # update s1_ptr
+ ldq $2,16($18)
+ addq $0,$4,$20 # 1st main add
+ ldq $3,24($18)
+ subq $19,4,$19 # decr loop cnt
+ ldq $6,-16($17)
+ cmpult $20,$0,$25 # compute cy from last add
+ ldq $7,-8($17)
+ addq $1,$25,$28 # cy add
+ addq $18,32,$18 # update s2_ptr
+ addq $5,$28,$21 # 2nd main add
+ cmpult $28,$25,$8 # compute cy from last add
+ blt $19,.Lend1 # if less than 4 limbs remain, jump
+ # 1st loop handles groups of 4 limbs in a software pipeline
.align 4
-.Loop: subq $19,4,$19
- unop
-
- ldq $6,8($18)
- addq $4,$0,$0
+.Loop: cmpult $21,$28,$25 # compute cy from last add
+ ldq $0,0($18)
+ or $8,$25,$25 # combine cy from the two adds
+ ldq $1,8($18)
+ addq $2,$25,$28 # cy add
+ ldq $4,0($17)
+ addq $28,$6,$22 # 3rd main add
ldq $5,8($17)
- cmpult $0,$4,$1
- ldq $4,16($18)
- addq $3,$0,$20
- cmpult $20,$3,$0
- ldq $3,16($17)
- or $0,$1,$0
- addq $6,$0,$0
- cmpult $0,$6,$1
- ldq $6,24($18)
- addq $5,$0,$21
- cmpult $21,$5,$0
- ldq $5,24($17)
- or $0,$1,$0
- addq $4,$0,$0
- cmpult $0,$4,$1
- ldq $4,32($18)
- addq $3,$0,$22
- cmpult $22,$3,$0
- ldq $3,32($17)
- or $0,$1,$0
- addq $6,$0,$0
- cmpult $0,$6,$1
- addq $5,$0,$23
- cmpult $23,$5,$0
- or $0,$1,$0
-
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $22,$28,$25 # compute cy from last add
stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
stq $21,8($16)
- stq $22,16($16)
- stq $23,24($16)
-
- addq $17,32,$17
- addq $18,32,$18
- addq $16,32,$16
- bne $19,.Loop
+ addq $3,$25,$28 # cy add
+ addq $28,$7,$23 # 4th main add
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $23,$28,$25 # compute cy from last add
+ addq $17,32,$17 # update s1_ptr
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,32,$16 # update res_ptr
+ addq $0,$25,$28 # cy add
+ ldq $2,16($18)
+ addq $4,$28,$20 # 1st main add
+ ldq $3,24($18)
+ cmpult $28,$25,$8 # compute cy from last add
+ ldq $6,-16($17)
+ cmpult $20,$28,$25 # compute cy from last add
+ ldq $7,-8($17)
+ or $8,$25,$25 # combine cy from the two adds
+ subq $19,4,$19 # decr loop cnt
+ stq $22,-16($16)
+ addq $1,$25,$28 # cy add
+ stq $23,-8($16)
+ addq $5,$28,$21 # 2nd main add
+ addq $18,32,$18 # update s2_ptr
+ cmpult $28,$25,$8 # compute cy from last add
+ bge $19,.Loop
+ # Finish software pipeline for 1st loop
+.Lend1: cmpult $21,$28,$25 # compute cy from last add
+ or $8,$25,$25 # combine cy from the two adds
+ addq $2,$25,$28 # cy add
+ addq $28,$6,$22 # 3rd main add
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $22,$28,$25 # compute cy from last add
+ stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
+ stq $21,8($16)
+ addq $3,$25,$28 # cy add
+ addq $28,$7,$23 # 4th main add
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $23,$28,$25 # compute cy from last add
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,32,$16 # update res_ptr
+ stq $22,-16($16)
+ stq $23,-8($16)
+.Lend2: addq $19,4,$19 # restore loop cnt
+ beq $19,.Lret
+ # Start software pipeline for 2nd loop
+ ldq $0,0($18)
+ ldq $4,0($17)
+ subq $19,1,$19
+ beq $19,.Lend0
+ # 2nd loop handles remaining 1-3 limbs
+ .align 4
+.Loop0: addq $0,$25,$28 # cy add
+ ldq $0,8($18)
+ addq $4,$28,$20 # main add
+ ldq $4,8($17)
+ addq $18,8,$18
+ cmpult $28,$25,$8 # compute cy from last add
+ addq $17,8,$17
+ stq $20,0($16)
+ cmpult $20,$28,$25 # compute cy from last add
+ subq $19,1,$19 # decr loop cnt
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,8,$16
+ bne $19,.Loop0
+.Lend0: addq $0,$25,$28 # cy add
+ addq $4,$28,$20 # main add
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $20,$28,$25 # compute cy from last add
+ stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
-.Lend: addq $4,$0,$4
- cmpult $4,$0,$1
- addq $3,$4,$4
- cmpult $4,$3,$0
- stq $4,0($16)
- or $0,$1,$0
+.Lret: or $25,$31,$0 # return cy
ret $31,($26),1
-
.end __mpn_add_n
diff --git a/sysdeps/alpha/alphaev5/lshift.s b/sysdeps/alpha/alphaev5/lshift.s
index fdb0895..392b424 100644
--- a/sysdeps/alpha/alphaev5/lshift.s
+++ b/sysdeps/alpha/alphaev5/lshift.s
@@ -25,7 +25,7 @@
# size r18
# cnt r19
- # This code runs at 4.25 cycles/limb on the EV5.
+ # This code runs at 3.25 cycles/limb on the EV5.
.set noreorder
.set noat
@@ -44,11 +44,11 @@ __mpn_lshift:
and $18,4-1,$28 # number of limbs in first loop
srl $4,$20,$0 # compute function result
- beq $28,L0
+ beq $28,.L0
subq $18,$28,$18
.align 3
-Loop0: ldq $3,-16($17)
+.Loop0: ldq $3,-16($17)
subq $16,8,$16
sll $4,$19,$5
subq $17,8,$17
@@ -57,17 +57,17 @@ Loop0: ldq $3,-16($17)
or $3,$3,$4
or $5,$6,$8
stq $8,0($16)
- bne $28,Loop0
+ bne $28,.Loop0
-L0: sll $4,$19,$24
- beq $18,Lend
+.L0: sll $4,$19,$24
+ beq $18,.Lend
# warm up phase 1
ldq $1,-16($17)
subq $18,4,$18
ldq $2,-24($17)
ldq $3,-32($17)
ldq $4,-40($17)
- beq $18,Lcool1
+ beq $18,.Lend1
# warm up phase 2
srl $1,$20,$7
sll $1,$19,$21
@@ -84,10 +84,10 @@ L0: sll $4,$19,$24
sll $4,$19,$24
ldq $4,-72($17)
subq $18,4,$18
- beq $18,Lcool1
+ beq $18,.Lend2
.align 4
# main loop
-Loop: stq $7,-8($16)
+.Loop: stq $7,-8($16)
or $5,$22,$5
stq $8,-16($16)
or $6,$23,$6
@@ -113,16 +113,14 @@ Loop: stq $7,-8($16)
subq $16,32,$16
srl $4,$20,$6
- ldq $3,-96($17
+ ldq $3,-96($17)
sll $4,$19,$24
ldq $4,-104($17)
subq $17,32,$17
- bne $18,Loop
- unop
- unop
+ bne $18,.Loop
# cool down phase 2/1
-Lcool1: stq $7,-8($16)
+.Lend2: stq $7,-8($16)
or $5,$22,$5
stq $8,-16($16)
or $6,$23,$6
@@ -150,7 +148,7 @@ Lcool1: stq $7,-8($16)
ret $31,($26),1
# cool down phase 1/1
-Lcool1: srl $1,$20,$7
+.Lend1: srl $1,$20,$7
sll $1,$19,$21
srl $2,$20,$8
sll $2,$19,$22
@@ -170,6 +168,6 @@ Lcool1: srl $1,$20,$7
stq $24,-40($16)
ret $31,($26),1
-Lend stq $24,-8($16)
+.Lend: stq $24,-8($16)
ret $31,($26),1
.end __mpn_lshift
diff --git a/sysdeps/alpha/alphaev5/rshift.s b/sysdeps/alpha/alphaev5/rshift.s
index 1da9960..d20dde3 100644
--- a/sysdeps/alpha/alphaev5/rshift.s
+++ b/sysdeps/alpha/alphaev5/rshift.s
@@ -25,7 +25,7 @@
# size r18
# cnt r19
- # This code runs at 4.25 cycles/limb on the EV5.
+ # This code runs at 3.25 cycles/limb on the EV5.
.set noreorder
.set noat
@@ -42,11 +42,11 @@ __mpn_rshift:
and $18,4-1,$28 # number of limbs in first loop
sll $4,$20,$0 # compute function result
- beq $28,L0
+ beq $28,.L0
subq $18,$28,$18
.align 3
-Loop0: ldq $3,8($17)
+.Loop0: ldq $3,8($17)
addq $16,8,$16
srl $4,$19,$5
addq $17,8,$17
@@ -55,17 +55,17 @@ Loop0: ldq $3,8($17)
or $3,$3,$4
or $5,$6,$8
stq $8,-8($16)
- bne $28,Loop0
+ bne $28,.Loop0
-L0: srl $4,$19,$24
- beq $18,Lend
+.L0: srl $4,$19,$24
+ beq $18,.Lend
# warm up phase 1
ldq $1,8($17)
subq $18,4,$18
ldq $2,16($17)
ldq $3,24($17)
ldq $4,32($17)
- beq $18,Lcool1
+ beq $18,.Lend1
# warm up phase 2
sll $1,$20,$7
srl $1,$19,$21
@@ -82,10 +82,10 @@ L0: srl $4,$19,$24
srl $4,$19,$24
ldq $4,64($17)
subq $18,4,$18
- beq $18,Lcool2
+ beq $18,.Lend2
.align 4
# main loop
-Loop: stq $7,0($16)
+.Loop: stq $7,0($16)
or $5,$22,$5
stq $8,8($16)
or $6,$23,$6
@@ -116,11 +116,9 @@ Loop: stq $7,0($16)
ldq $4,96($17)
addq $17,32,$17
- bne $18,Loop
- unop
- unop
+ bne $18,.Loop
# cool down phase 2/1
-Lcool2: stq $7,0($16)
+.Lend2: stq $7,0($16)
or $5,$22,$5
stq $8,8($16)
or $6,$23,$6
@@ -148,7 +146,7 @@ Lcool2: stq $7,0($16)
ret $31,($26),1
# cool down phase 1/1
-Lcool1: sll $1,$20,$7
+.Lend1: sll $1,$20,$7
srl $1,$19,$21
sll $2,$20,$8
srl $2,$19,$22
@@ -168,6 +166,6 @@ Lcool1: sll $1,$20,$7
stq $24,32($16)
ret $31,($26),1
-Lend: stq $24,0($16)
+.Lend: stq $24,0($16)
ret $31,($26),1
.end __mpn_rshift
diff --git a/sysdeps/alpha/alphaev5/sub_n.s b/sysdeps/alpha/alphaev5/sub_n.s
new file mode 100644
index 0000000..c9f3a4e
--- /dev/null
+++ b/sysdeps/alpha/alphaev5/sub_n.s
@@ -0,0 +1,148 @@
+ # Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+ # store difference in a third limb vector.
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $16
+ # s1_ptr $17
+ # s2_ptr $18
+ # size $19
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl __mpn_sub_n
+ .ent __mpn_sub_n
+__mpn_sub_n:
+ .frame $30,0,$26,0
+
+ or $31,$31,$25 # clear cy
+ subq $19,4,$19 # decr loop cnt
+ blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop
+ # Start software pipeline for 1st loop
+ ldq $0,0($18)
+ ldq $1,8($18)
+ ldq $4,0($17)
+ ldq $5,8($17)
+ addq $17,32,$17 # update s1_ptr
+ ldq $2,16($18)
+ subq $4,$0,$20 # 1st main sub
+ ldq $3,24($18)
+ subq $19,4,$19 # decr loop cnt
+ ldq $6,-16($17)
+ cmpult $4,$20,$25 # compute cy from last sub
+ ldq $7,-8($17)
+ addq $1,$25,$28 # cy add
+ addq $18,32,$18 # update s2_ptr
+ subq $5,$28,$21 # 2nd main sub
+ cmpult $28,$25,$8 # compute cy from last add
+ blt $19,.Lend1 # if less than 4 limbs remain, jump
+ # 1st loop handles groups of 4 limbs in a software pipeline
+ .align 4
+.Loop: cmpult $5,$21,$25 # compute cy from last add
+ ldq $0,0($18)
+ or $8,$25,$25 # combine cy from the two adds
+ ldq $1,8($18)
+ addq $2,$25,$28 # cy add
+ ldq $4,0($17)
+ subq $6,$28,$22 # 3rd main sub
+ ldq $5,8($17)
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $6,$22,$25 # compute cy from last add
+ stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
+ stq $21,8($16)
+ addq $3,$25,$28 # cy add
+ subq $7,$28,$23 # 4th main sub
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $7,$23,$25 # compute cy from last add
+ addq $17,32,$17 # update s1_ptr
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,32,$16 # update res_ptr
+ addq $0,$25,$28 # cy add
+ ldq $2,16($18)
+ subq $4,$28,$20 # 1st main sub
+ ldq $3,24($18)
+ cmpult $28,$25,$8 # compute cy from last add
+ ldq $6,-16($17)
+ cmpult $4,$20,$25 # compute cy from last add
+ ldq $7,-8($17)
+ or $8,$25,$25 # combine cy from the two adds
+ subq $19,4,$19 # decr loop cnt
+ stq $22,-16($16)
+ addq $1,$25,$28 # cy add
+ stq $23,-8($16)
+ subq $5,$28,$21 # 2nd main sub
+ addq $18,32,$18 # update s2_ptr
+ cmpult $28,$25,$8 # compute cy from last add
+ bge $19,.Loop
+ # Finish software pipeline for 1st loop
+.Lend1: cmpult $5,$21,$25 # compute cy from last add
+ or $8,$25,$25 # combine cy from the two adds
+ addq $2,$25,$28 # cy add
+ subq $6,$28,$22 # 3rd main sub
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $6,$22,$25 # compute cy from last add
+ stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
+ stq $21,8($16)
+ addq $3,$25,$28 # cy add
+ subq $7,$28,$23 # 4th main sub
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $7,$23,$25 # compute cy from last add
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,32,$16 # update res_ptr
+ stq $22,-16($16)
+ stq $23,-8($16)
+.Lend2: addq $19,4,$19 # restore loop cnt
+ beq $19,.Lret
+ # Start software pipeline for 2nd loop
+ ldq $0,0($18)
+ ldq $4,0($17)
+ subq $19,1,$19
+ beq $19,.Lend0
+ # 2nd loop handles remaining 1-3 limbs
+ .align 4
+.Loop0: addq $0,$25,$28 # cy add
+ ldq $0,8($18)
+ subq $4,$28,$20 # main sub
+ ldq $1,8($17)
+ addq $18,8,$18
+ cmpult $28,$25,$8 # compute cy from last add
+ addq $17,8,$17
+ stq $20,0($16)
+ cmpult $4,$20,$25 # compute cy from last add
+ subq $19,1,$19 # decr loop cnt
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,8,$16
+ or $1,$31,$4
+ bne $19,.Loop0
+.Lend0: addq $0,$25,$28 # cy add
+ subq $4,$28,$20 # main sub
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $4,$20,$25 # compute cy from last add
+ stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
+
+.Lret: or $25,$31,$0 # return cy
+ ret $31,($26),1
+ .end __mpn_sub_n
diff --git a/sysdeps/alpha/lshift.s b/sysdeps/alpha/lshift.s
index c284349..aa8417b 100644
--- a/sysdeps/alpha/lshift.s
+++ b/sysdeps/alpha/lshift.s
@@ -53,11 +53,11 @@ __mpn_lshift:
and $18,4-1,$20 # number of limbs in first loop
srl $4,$7,$0 # compute function result
- beq $20,L0
+ beq $20,.L0
subq $18,$20,$18
.align 3
-Loop0:
+.Loop0:
ldq $3,-8($17)
subq $16,8,$16
subq $17,8,$17
@@ -67,12 +67,12 @@ Loop0:
bis $3,$3,$4
bis $5,$6,$8
stq $8,0($16)
- bne $20,Loop0
+ bne $20,.Loop0
-L0: beq $18,Lend
+.L0: beq $18,.Lend
.align 3
-Loop: ldq $3,-8($17)
+.Loop: ldq $3,-8($17)
subq $16,32,$16
subq $18,4,$18
sll $4,$19,$5
@@ -100,9 +100,9 @@ Loop: ldq $3,-8($17)
bis $1,$2,$8
stq $8,0($16)
- bgt $18,Loop
+ bgt $18,.Loop
-Lend: sll $4,$19,$8
+.Lend: sll $4,$19,$8
stq $8,-8($16)
ret $31,($26),1
.end __mpn_lshift
diff --git a/sysdeps/alpha/mul_1.s b/sysdeps/alpha/mul_1.s
index 3ef194d..58a63df 100644
--- a/sysdeps/alpha/mul_1.s
+++ b/sysdeps/alpha/mul_1.s
@@ -1,7 +1,7 @@
# Alpha 21064 __mpn_mul_1 -- Multiply a limb vector with a limb and store
# the result in a second limb vector.
- # Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
diff --git a/sysdeps/alpha/rshift.s b/sysdeps/alpha/rshift.s
index 74eab04..037b776 100644
--- a/sysdeps/alpha/rshift.s
+++ b/sysdeps/alpha/rshift.s
@@ -34,7 +34,7 @@
# 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
# 2. Only aligned instruction pairs can be paired.
# 3. The store buffer or silo might not be able to deal with the bandwidth.
-
+
.set noreorder
.set noat
.text
@@ -51,11 +51,11 @@ __mpn_rshift:
and $18,4-1,$20 # number of limbs in first loop
sll $4,$7,$0 # compute function result
- beq $20,L0
+ beq $20,.L0
subq $18,$20,$18
.align 3
-Loop0:
+.Loop0:
ldq $3,0($17)
addq $16,8,$16
addq $17,8,$17
@@ -65,12 +65,12 @@ Loop0:
bis $3,$3,$4
bis $5,$6,$8
stq $8,-8($16)
- bne $20,Loop0
+ bne $20,.Loop0
-L0: beq $18,Lend
+.L0: beq $18,.Lend
.align 3
-Loop: ldq $3,0($17)
+.Loop: ldq $3,0($17)
addq $16,32,$16
subq $18,4,$18
srl $4,$19,$5
@@ -98,9 +98,9 @@ Loop: ldq $3,0($17)
bis $1,$2,$8
stq $8,-8($16)
- bgt $18,Loop
+ bgt $18,.Loop
-Lend: srl $4,$19,$8
+.Lend: srl $4,$19,$8
stq $8,0($16)
ret $31,($26),1
.end __mpn_rshift
diff --git a/sysdeps/alpha/submul_1.s b/sysdeps/alpha/submul_1.s
index acaa11c..292b2c1 100644
--- a/sysdeps/alpha/submul_1.s
+++ b/sysdeps/alpha/submul_1.s
@@ -26,16 +26,7 @@
# size r18
# s2_limb r19
- # This code runs at 42 cycles/limb on the 21064.
-
- # To improve performance for long multiplications, we would use
- # 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use
- # these instructions without slowing down the general code: 1. We can
- # only have two prefetches in operation at any time in the Alpha
- # architecture. 2. There will seldom be any special alignment
- # between RES_PTR and S1_PTR. Maybe we can simply divide the current
- # loop into an inner and outer loop, having the inner loop handle
- # exactly one prefetch block?
+ # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
.set noreorder
.set noat
@@ -52,7 +43,7 @@ __mpn_submul_1:
mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
umulh $2,$19,$0 # $0 = prod_high
- beq $18,Lend1 # jump if size was == 1
+ beq $18,.Lend1 # jump if size was == 1
ldq $2,0($17) # $2 = s1_limb
addq $17,8,$17 # s1_ptr++
subq $18,1,$18 # size--
@@ -60,10 +51,10 @@ __mpn_submul_1:
cmpult $5,$3,$4
stq $3,0($16)
addq $16,8,$16 # res_ptr++
- beq $18,Lend2 # jump if size was == 2
+ beq $18,.Lend2 # jump if size was == 2
.align 3
-Loop: mulq $2,$19,$3 # $3 = prod_low
+.Loop: mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
subq $18,1,$18 # size--
@@ -77,9 +68,9 @@ Loop: mulq $2,$19,$3 # $3 = prod_low
stq $3,0($16)
addq $16,8,$16 # res_ptr++
addq $5,$0,$0 # combine carries
- bne $18,Loop
+ bne $18,.Loop
-Lend2: mulq $2,$19,$3 # $3 = prod_low
+.Lend2: mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
umulh $2,$19,$4 # $4 = cy_limb
@@ -91,7 +82,7 @@ Lend2: mulq $2,$19,$3 # $3 = prod_low
addq $5,$0,$0 # combine carries
addq $4,$0,$0 # cy_limb = prod_high + cy
ret $31,($26),1
-Lend1: subq $5,$3,$3
+.Lend1: subq $5,$3,$3
cmpult $5,$3,$5
stq $3,0($16)
addq $0,$5,$0
diff --git a/sysdeps/alpha/udiv_qrnnd.S b/sysdeps/alpha/udiv_qrnnd.S
index bafafd6..ce590ed 100644
--- a/sysdeps/alpha/udiv_qrnnd.S
+++ b/sysdeps/alpha/udiv_qrnnd.S
@@ -1,6 +1,6 @@
# Alpha 21064 __udiv_qrnnd
- # Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
@@ -21,13 +21,11 @@
.set noreorder
.set noat
-
.text
- .align 3
- .globl __udiv_qrnnd
- .ent __udiv_qrnnd 0
+ .align 3
+ .globl __udiv_qrnnd
+ .ent __udiv_qrnnd
__udiv_qrnnd:
-__udiv_qrnnd..ng:
.frame $30,0,$26,0
.prologue 0
#define cnt $2
@@ -39,9 +37,9 @@ __udiv_qrnnd..ng:
#define qb $20
ldiq cnt,16
- blt d,Largedivisor
+ blt d,.Largedivisor
-Loop1: cmplt n0,0,tmp
+.Loop1: cmplt n0,0,tmp
addq n1,n1,n1
bis n1,tmp,n1
addq n0,n0,n0
@@ -74,12 +72,12 @@ Loop1: cmplt n0,0,tmp
cmovne qb,tmp,n1
bis n0,qb,n0
subq cnt,1,cnt
- bgt cnt,Loop1
+ bgt cnt,.Loop1
stq n1,0(rem_ptr)
bis $31,n0,$0
ret $31,($26),1
-Largedivisor:
+.Largedivisor:
and n0,1,$4
srl n0,1,n0
@@ -91,7 +89,7 @@ Largedivisor:
srl d,1,$5
addq $5,$6,$5
-Loop2: cmplt n0,0,tmp
+.Loop2: cmplt n0,0,tmp
addq n1,n1,n1
bis n1,tmp,n1
addq n0,n0,n0
@@ -124,27 +122,27 @@ Loop2: cmplt n0,0,tmp
cmovne qb,tmp,n1
bis n0,qb,n0
subq cnt,1,cnt
- bgt cnt,Loop2
+ bgt cnt,.Loop2
addq n1,n1,n1
addq $4,n1,n1
- bne $6,Odd
+ bne $6,.LOdd
stq n1,0(rem_ptr)
bis $31,n0,$0
ret $31,($26),1
-Odd:
+.LOdd:
/* q' in n0. r' in n1 */
addq n1,n0,n1
cmpult n1,n0,tmp # tmp := carry from addq
- beq tmp,LLp6
+ beq tmp,.LLp6
addq n0,1,n0
subq n1,d,n1
-LLp6: cmpult n1,d,tmp
- bne tmp,LLp7
+.LLp6: cmpult n1,d,tmp
+ bne tmp,.LLp7
addq n0,1,n0
subq n1,d,n1
-LLp7:
+.LLp7:
stq n1,0(rem_ptr)
bis $31,n0,$0
ret $31,($26),1
diff --git a/sysdeps/generic/add_n.c b/sysdeps/generic/add_n.c
index 6989ab0..647548d 100644
--- a/sysdeps/generic/add_n.c
+++ b/sysdeps/generic/add_n.c
@@ -1,6 +1,6 @@
-/* __mpn_add_n -- Add two limb vectors of equal, non-zero length.
+/* mpn_add_n -- Add two limb vectors of equal, non-zero length.
-Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -23,9 +23,9 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
mp_limb
#if __STDC__
-__mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
+mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
#else
-__mpn_add_n (res_ptr, s1_ptr, s2_ptr, size)
+mpn_add_n (res_ptr, s1_ptr, s2_ptr, size)
register mp_ptr res_ptr;
register mp_srcptr s1_ptr;
register mp_srcptr s2_ptr;
diff --git a/sysdeps/generic/addmul_1.c b/sysdeps/generic/addmul_1.c
index fdf3541..6156cab 100644
--- a/sysdeps/generic/addmul_1.c
+++ b/sysdeps/generic/addmul_1.c
@@ -1,9 +1,9 @@
-/* __mpn_addmul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
+/* mpn_addmul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
by S2_LIMB, add the S1_SIZE least significant limbs of the product to the
limb vector pointed to by RES_PTR. Return the most significant limb of
the product, adjusted for carry-out from the addition.
-Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -26,7 +26,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#include "longlong.h"
mp_limb
-__mpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+mpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
register mp_ptr res_ptr;
register mp_srcptr s1_ptr;
mp_size_t s1_size;
diff --git a/sysdeps/generic/cmp.c b/sysdeps/generic/cmp.c
index 144c885..e499b1e 100644
--- a/sysdeps/generic/cmp.c
+++ b/sysdeps/generic/cmp.c
@@ -1,6 +1,6 @@
-/* __mpn_cmp -- Compare two low-level natural-number integers.
+/* mpn_cmp -- Compare two low-level natural-number integers.
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -28,9 +28,9 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
int
#if __STDC__
-__mpn_cmp (mp_srcptr op1_ptr, mp_srcptr op2_ptr, mp_size_t size)
+mpn_cmp (mp_srcptr op1_ptr, mp_srcptr op2_ptr, mp_size_t size)
#else
-__mpn_cmp (op1_ptr, op2_ptr, size)
+mpn_cmp (op1_ptr, op2_ptr, size)
mp_srcptr op1_ptr;
mp_srcptr op2_ptr;
mp_size_t size;
diff --git a/sysdeps/generic/divmod_1.c b/sysdeps/generic/divmod_1.c
index 2989d36..c040327 100644
--- a/sysdeps/generic/divmod_1.c
+++ b/sysdeps/generic/divmod_1.c
@@ -1,4 +1,4 @@
-/* __mpn_divmod_1(quot_ptr, dividend_ptr, dividend_size, divisor_limb) --
+/* mpn_divmod_1(quot_ptr, dividend_ptr, dividend_size, divisor_limb) --
Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
Return the single-limb remainder.
@@ -6,7 +6,7 @@
QUOT_PTR and DIVIDEND_PTR might point to the same limb.
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -41,11 +41,11 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
mp_limb
#if __STDC__
-__mpn_divmod_1 (mp_ptr quot_ptr,
+mpn_divmod_1 (mp_ptr quot_ptr,
mp_srcptr dividend_ptr, mp_size_t dividend_size,
mp_limb divisor_limb)
#else
-__mpn_divmod_1 (quot_ptr, dividend_ptr, dividend_size, divisor_limb)
+mpn_divmod_1 (quot_ptr, dividend_ptr, dividend_size, divisor_limb)
mp_ptr quot_ptr;
mp_srcptr dividend_ptr;
mp_size_t dividend_size;
diff --git a/sysdeps/generic/lshift.c b/sysdeps/generic/lshift.c
index 1ba0903..35794e4 100644
--- a/sysdeps/generic/lshift.c
+++ b/sysdeps/generic/lshift.c
@@ -1,6 +1,6 @@
-/* __mpn_lshift -- Shift left low level.
+/* mpn_lshift -- Shift left low level.
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -32,11 +32,11 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
mp_limb
#if __STDC__
-__mpn_lshift (register mp_ptr wp,
+mpn_lshift (register mp_ptr wp,
register mp_srcptr up, mp_size_t usize,
register unsigned int cnt)
#else
-__mpn_lshift (wp, up, usize, cnt)
+mpn_lshift (wp, up, usize, cnt)
register mp_ptr wp;
register mp_srcptr up;
mp_size_t usize;
diff --git a/sysdeps/generic/mod_1.c b/sysdeps/generic/mod_1.c
index 8a49fb4..0842f6b 100644
--- a/sysdeps/generic/mod_1.c
+++ b/sysdeps/generic/mod_1.c
@@ -1,4 +1,4 @@
-/* __mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) --
+/* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) --
Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
Return the single-limb remainder.
There are no constraints on the value of the divisor.
@@ -38,10 +38,10 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
mp_limb
#if __STDC__
-__mpn_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size,
- mp_limb divisor_limb)
+mpn_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size,
+ mp_limb divisor_limb)
#else
-__mpn_mod_1 (dividend_ptr, dividend_size, divisor_limb)
+mpn_mod_1 (dividend_ptr, dividend_size, divisor_limb)
mp_srcptr dividend_ptr;
mp_size_t dividend_size;
mp_limb divisor_limb;
diff --git a/sysdeps/generic/mul.c b/sysdeps/generic/mul.c
index cd2acb5..3f3f41e 100644
--- a/sysdeps/generic/mul.c
+++ b/sysdeps/generic/mul.c
@@ -1,6 +1,6 @@
-/* __mpn_mul -- Multiply two natural numbers.
+/* mpn_mul -- Multiply two natural numbers.
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -43,11 +43,11 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
mp_limb
#if __STDC__
-__mpn_mul (mp_ptr prodp,
- mp_srcptr up, mp_size_t usize,
- mp_srcptr vp, mp_size_t vsize)
+mpn_mul (mp_ptr prodp,
+ mp_srcptr up, mp_size_t usize,
+ mp_srcptr vp, mp_size_t vsize)
#else
-__mpn_mul (prodp, up, usize, vp, vsize)
+mpn_mul (prodp, up, usize, vp, vsize)
mp_ptr prodp;
mp_srcptr up;
mp_size_t usize;
@@ -58,6 +58,7 @@ __mpn_mul (prodp, up, usize, vp, vsize)
mp_ptr prod_endp = prodp + usize + vsize - 1;
mp_limb cy;
mp_ptr tspace;
+ TMP_DECL (marker);
if (vsize < KARATSUBA_THRESHOLD)
{
@@ -86,7 +87,7 @@ __mpn_mul (prodp, up, usize, vp, vsize)
cy_limb = 0;
}
else
- cy_limb = __mpn_mul_1 (prodp, up, usize, v_limb);
+ cy_limb = mpn_mul_1 (prodp, up, usize, v_limb);
prodp[usize] = cy_limb;
prodp++;
@@ -100,10 +101,10 @@ __mpn_mul (prodp, up, usize, vp, vsize)
{
cy_limb = 0;
if (v_limb == 1)
- cy_limb = __mpn_add_n (prodp, prodp, up, usize);
+ cy_limb = mpn_add_n (prodp, prodp, up, usize);
}
else
- cy_limb = __mpn_addmul_1 (prodp, up, usize, v_limb);
+ cy_limb = mpn_addmul_1 (prodp, up, usize, v_limb);
prodp[usize] = cy_limb;
prodp++;
@@ -111,7 +112,9 @@ __mpn_mul (prodp, up, usize, vp, vsize)
return cy_limb;
}
- tspace = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB);
+ TMP_MARK (marker);
+
+ tspace = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB);
MPN_MUL_N_RECURSE (prodp, up, vp, vsize, tspace);
prodp += vsize;
@@ -119,12 +122,12 @@ __mpn_mul (prodp, up, usize, vp, vsize)
usize -= vsize;
if (usize >= vsize)
{
- mp_ptr tp = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB);
+ mp_ptr tp = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB);
do
{
MPN_MUL_N_RECURSE (tp, up, vp, vsize, tspace);
- cy = __mpn_add_n (prodp, prodp, tp, vsize);
- __mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy);
+ cy = mpn_add_n (prodp, prodp, tp, vsize);
+ mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy);
prodp += vsize;
up += vsize;
usize -= vsize;
@@ -138,10 +141,11 @@ __mpn_mul (prodp, up, usize, vp, vsize)
if (usize != 0)
{
- __mpn_mul (tspace, vp, vsize, up, usize);
- cy = __mpn_add_n (prodp, prodp, tspace, vsize);
- __mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy);
+ mpn_mul (tspace, vp, vsize, up, usize);
+ cy = mpn_add_n (prodp, prodp, tspace, vsize);
+ mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy);
}
+ TMP_FREE (marker);
return *prod_endp;
}
diff --git a/sysdeps/generic/mul_1.c b/sysdeps/generic/mul_1.c
index 37dbc33..01fdbbb 100644
--- a/sysdeps/generic/mul_1.c
+++ b/sysdeps/generic/mul_1.c
@@ -1,7 +1,7 @@
-/* __mpn_mul_1 -- Multiply a limb vector with a single limb and
+/* mpn_mul_1 -- Multiply a limb vector with a single limb and
store the product in a second limb vector.
-Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -24,7 +24,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#include "longlong.h"
mp_limb
-__mpn_mul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+mpn_mul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
register mp_ptr res_ptr;
register mp_srcptr s1_ptr;
mp_size_t s1_size;
diff --git a/sysdeps/generic/mul_n.c b/sysdeps/generic/mul_n.c
index e37c5d8..049f63d 100644
--- a/sysdeps/generic/mul_n.c
+++ b/sysdeps/generic/mul_n.c
@@ -1,6 +1,6 @@
-/* __mpn_mul_n -- Multiply two natural numbers of length n.
+/* mpn_mul_n -- Multiply two natural numbers of length n.
-Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -41,13 +41,6 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#define KARATSUBA_THRESHOLD 2
#endif
-void
-#if __STDC__
-____mpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
-#else
-____mpn_mul_n ();
-#endif
-
/* Handle simple cases with traditional multiplication.
This is the most critical code of multiplication. All multiplies rely
@@ -57,9 +50,9 @@ ____mpn_mul_n ();
void
#if __STDC__
-____mpn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
+impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
#else
-____mpn_mul_n_basecase (prodp, up, vp, size)
+impn_mul_n_basecase (prodp, up, vp, size)
mp_ptr prodp;
mp_srcptr up;
mp_srcptr vp;
@@ -82,7 +75,7 @@ ____mpn_mul_n_basecase (prodp, up, vp, size)
cy_limb = 0;
}
else
- cy_limb = __mpn_mul_1 (prodp, up, size, v_limb);
+ cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
prodp[size] = cy_limb;
prodp++;
@@ -96,10 +89,10 @@ ____mpn_mul_n_basecase (prodp, up, vp, size)
{
cy_limb = 0;
if (v_limb == 1)
- cy_limb = __mpn_add_n (prodp, prodp, up, size);
+ cy_limb = mpn_add_n (prodp, prodp, up, size);
}
else
- cy_limb = __mpn_addmul_1 (prodp, up, size, v_limb);
+ cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
prodp[size] = cy_limb;
prodp++;
@@ -108,10 +101,10 @@ ____mpn_mul_n_basecase (prodp, up, vp, size)
void
#if __STDC__
-____mpn_mul_n (mp_ptr prodp,
+impn_mul_n (mp_ptr prodp,
mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
#else
-____mpn_mul_n (prodp, up, vp, size, tspace)
+impn_mul_n (prodp, up, vp, size, tspace)
mp_ptr prodp;
mp_srcptr up;
mp_srcptr vp;
@@ -135,9 +128,9 @@ ____mpn_mul_n (prodp, up, vp, size, tspace)
mp_limb cy_limb;
MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
- cy_limb = __mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
+ cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
prodp[esize + esize] = cy_limb;
- cy_limb = __mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
+ cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
prodp[esize + size] = cy_limb;
}
@@ -170,24 +163,24 @@ ____mpn_mul_n (prodp, up, vp, size, tspace)
/*** Product M. ________________
|_(U1-U0)(V0-V1)_| */
- if (__mpn_cmp (up + hsize, up, hsize) >= 0)
+ if (mpn_cmp (up + hsize, up, hsize) >= 0)
{
- __mpn_sub_n (prodp, up + hsize, up, hsize);
+ mpn_sub_n (prodp, up + hsize, up, hsize);
negflg = 0;
}
else
{
- __mpn_sub_n (prodp, up, up + hsize, hsize);
+ mpn_sub_n (prodp, up, up + hsize, hsize);
negflg = 1;
}
- if (__mpn_cmp (vp + hsize, vp, hsize) >= 0)
+ if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
{
- __mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
+ mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
negflg ^= 1;
}
else
{
- __mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
+ mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
/* No change of NEGFLG. */
}
/* Read temporary operands from low part of PROD.
@@ -197,13 +190,13 @@ ____mpn_mul_n (prodp, up, vp, size, tspace)
/*** Add/copy product H. */
MPN_COPY (prodp + hsize, prodp + size, hsize);
- cy = __mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
+ cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
/*** Add product M (if NEGFLG M is a negative number). */
if (negflg)
- cy -= __mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
+ cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
else
- cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
+ cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
/*** Product L. ________________ ________________
|________________||____U0 x V0_____| */
@@ -214,22 +207,22 @@ ____mpn_mul_n (prodp, up, vp, size, tspace)
/*** Add/copy Product L (twice). */
- cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
+ cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
if (cy)
- __mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
+ mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
MPN_COPY (prodp, tspace, hsize);
- cy = __mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
+ cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
if (cy)
- __mpn_add_1 (prodp + size, prodp + size, size, 1);
+ mpn_add_1 (prodp + size, prodp + size, size, 1);
}
}
void
#if __STDC__
-____mpn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
+impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
#else
-____mpn_sqr_n_basecase (prodp, up, size)
+impn_sqr_n_basecase (prodp, up, size)
mp_ptr prodp;
mp_srcptr up;
mp_size_t size;
@@ -251,7 +244,7 @@ ____mpn_sqr_n_basecase (prodp, up, size)
cy_limb = 0;
}
else
- cy_limb = __mpn_mul_1 (prodp, up, size, v_limb);
+ cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
prodp[size] = cy_limb;
prodp++;
@@ -265,10 +258,10 @@ ____mpn_sqr_n_basecase (prodp, up, size)
{
cy_limb = 0;
if (v_limb == 1)
- cy_limb = __mpn_add_n (prodp, prodp, up, size);
+ cy_limb = mpn_add_n (prodp, prodp, up, size);
}
else
- cy_limb = __mpn_addmul_1 (prodp, up, size, v_limb);
+ cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
prodp[size] = cy_limb;
prodp++;
@@ -277,10 +270,10 @@ ____mpn_sqr_n_basecase (prodp, up, size)
void
#if __STDC__
-____mpn_sqr_n (mp_ptr prodp,
+impn_sqr_n (mp_ptr prodp,
mp_srcptr up, mp_size_t size, mp_ptr tspace)
#else
-____mpn_sqr_n (prodp, up, size, tspace)
+impn_sqr_n (prodp, up, size, tspace)
mp_ptr prodp;
mp_srcptr up;
mp_size_t size;
@@ -303,9 +296,9 @@ ____mpn_sqr_n (prodp, up, size, tspace)
mp_limb cy_limb;
MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
- cy_limb = __mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
+ cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
prodp[esize + esize] = cy_limb;
- cy_limb = __mpn_addmul_1 (prodp + esize, up, size, up[esize]);
+ cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]);
prodp[esize + size] = cy_limb;
}
@@ -322,13 +315,13 @@ ____mpn_sqr_n (prodp, up, size, tspace)
/*** Product M. ________________
|_(U1-U0)(U0-U1)_| */
- if (__mpn_cmp (up + hsize, up, hsize) >= 0)
+ if (mpn_cmp (up + hsize, up, hsize) >= 0)
{
- __mpn_sub_n (prodp, up + hsize, up, hsize);
+ mpn_sub_n (prodp, up + hsize, up, hsize);
}
else
{
- __mpn_sub_n (prodp, up, up + hsize, hsize);
+ mpn_sub_n (prodp, up, up + hsize, hsize);
}
/* Read temporary operands from low part of PROD.
@@ -338,10 +331,10 @@ ____mpn_sqr_n (prodp, up, size, tspace)
/*** Add/copy product H. */
MPN_COPY (prodp + hsize, prodp + size, hsize);
- cy = __mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
+ cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
/*** Add product M (if NEGFLG M is a negative number). */
- cy -= __mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
+ cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
/*** Product L. ________________ ________________
|________________||____U0 x U0_____| */
@@ -352,53 +345,56 @@ ____mpn_sqr_n (prodp, up, size, tspace)
/*** Add/copy Product L (twice). */
- cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
+ cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
if (cy)
- __mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
+ mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
MPN_COPY (prodp, tspace, hsize);
- cy = __mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
+ cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
if (cy)
- __mpn_add_1 (prodp + size, prodp + size, size, 1);
+ mpn_add_1 (prodp + size, prodp + size, size, 1);
}
}
/* This should be made into an inline function in gmp.h. */
inline void
#if __STDC__
-__mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
+mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
#else
-__mpn_mul_n (prodp, up, vp, size)
+mpn_mul_n (prodp, up, vp, size)
mp_ptr prodp;
mp_srcptr up;
mp_srcptr vp;
mp_size_t size;
#endif
{
+ TMP_DECL (marker);
+ TMP_MARK (marker);
if (up == vp)
{
if (size < KARATSUBA_THRESHOLD)
{
- ____mpn_sqr_n_basecase (prodp, up, size);
+ impn_sqr_n_basecase (prodp, up, size);
}
else
{
mp_ptr tspace;
- tspace = (mp_ptr) alloca (2 * size * BYTES_PER_MP_LIMB);
- ____mpn_sqr_n (prodp, up, size, tspace);
+ tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
+ impn_sqr_n (prodp, up, size, tspace);
}
}
else
{
if (size < KARATSUBA_THRESHOLD)
{
- ____mpn_mul_n_basecase (prodp, up, vp, size);
+ impn_mul_n_basecase (prodp, up, vp, size);
}
else
{
mp_ptr tspace;
- tspace = (mp_ptr) alloca (2 * size * BYTES_PER_MP_LIMB);
- ____mpn_mul_n (prodp, up, vp, size, tspace);
+ tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
+ impn_mul_n (prodp, up, vp, size, tspace);
}
}
+ TMP_FREE (marker);
}
diff --git a/sysdeps/generic/rshift.c b/sysdeps/generic/rshift.c
index 966cc7b..7ce02e0 100644
--- a/sysdeps/generic/rshift.c
+++ b/sysdeps/generic/rshift.c
@@ -1,6 +1,6 @@
-/* __mpn_rshift -- Shift right a low-level natural-number integer.
+/* mpn_rshift -- Shift right a low-level natural-number integer.
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -32,11 +32,11 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
mp_limb
#if __STDC__
-__mpn_rshift (register mp_ptr wp,
+mpn_rshift (register mp_ptr wp,
register mp_srcptr up, mp_size_t usize,
register unsigned int cnt)
#else
-__mpn_rshift (wp, up, usize, cnt)
+mpn_rshift (wp, up, usize, cnt)
register mp_ptr wp;
register mp_srcptr up;
mp_size_t usize;
diff --git a/sysdeps/generic/sub_n.c b/sysdeps/generic/sub_n.c
index 6b33e66..f3c83d1 100644
--- a/sysdeps/generic/sub_n.c
+++ b/sysdeps/generic/sub_n.c
@@ -1,6 +1,6 @@
-/* __mpn_sub_n -- Subtract two limb vectors of equal, non-zero length.
+/* mpn_sub_n -- Subtract two limb vectors of equal, non-zero length.
-Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -23,9 +23,9 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
mp_limb
#if __STDC__
-__mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
+mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
#else
-__mpn_sub_n (res_ptr, s1_ptr, s2_ptr, size)
+mpn_sub_n (res_ptr, s1_ptr, s2_ptr, size)
register mp_ptr res_ptr;
register mp_srcptr s1_ptr;
register mp_srcptr s2_ptr;
diff --git a/sysdeps/generic/submul_1.c b/sysdeps/generic/submul_1.c
index 855dd3f..57122a5 100644
--- a/sysdeps/generic/submul_1.c
+++ b/sysdeps/generic/submul_1.c
@@ -1,9 +1,9 @@
-/* __mpn_submul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
+/* mpn_submul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
by S2_LIMB, subtract the S1_SIZE least significant limbs of the product
from the limb vector pointed to by RES_PTR. Return the most significant
limb of the product, adjusted for carry-out from the subtraction.
-Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -26,7 +26,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#include "longlong.h"
mp_limb
-__mpn_submul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+mpn_submul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
register mp_ptr res_ptr;
register mp_srcptr s1_ptr;
mp_size_t s1_size;
diff --git a/sysdeps/i386/gmp-mparam.h b/sysdeps/i386/gmp-mparam.h
index 687f12a..ddc308a 100644
--- a/sysdeps/i386/gmp-mparam.h
+++ b/sysdeps/i386/gmp-mparam.h
@@ -1,6 +1,6 @@
/* gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
diff --git a/sysdeps/i386/i586/add_n.S b/sysdeps/i386/i586/add_n.S
index f52f9c6..f214c8c 100644
--- a/sysdeps/i386/i586/add_n.S
+++ b/sysdeps/i386/i586/add_n.S
@@ -1,7 +1,7 @@
/* Pentium __mpn_add_n -- Add two limb vectors of the same length > 0 and store
sum in a third limb vector.
-Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -30,13 +30,6 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#include "sysdep.h"
#include "asm-syntax.h"
-#define t1 %eax
-#define t2 %edx
-#define src1 %esi
-#define src2 %ebp
-#define dst %edi
-#define x %ebx
-
.text
ALIGN (3)
.globl C_SYMBOL_NAME(__mpn_add_n)
@@ -46,85 +39,85 @@ C_SYMBOL_NAME(__mpn_add_n:)
pushl %ebx
pushl %ebp
- movl 20(%esp),dst /* res_ptr */
- movl 24(%esp),src1 /* s1_ptr */
- movl 28(%esp),src2 /* s2_ptr */
+ movl 20(%esp),%edi /* res_ptr */
+ movl 24(%esp),%esi /* s1_ptr */
+ movl 28(%esp),%ebp /* s2_ptr */
movl 32(%esp),%ecx /* size */
- movl (src2),x
+ movl (%ebp),%ebx
decl %ecx
- movl %ecx,t2
+ movl %ecx,%edx
shrl $3,%ecx
- andl $7,t2
+ andl $7,%edx
testl %ecx,%ecx /* zero carry flag */
jz Lend
- pushl t2
+ pushl %edx
ALIGN (3)
-Loop: movl 28(dst),%eax /* fetch destination cache line */
- leal 32(dst),dst
-
-L1: movl (src1),t1
- movl 4(src1),t2
- adcl x,t1
- movl 4(src2),x
- adcl x,t2
- movl 8(src2),x
- movl t1,-32(dst)
- movl t2,-28(dst)
-
-L2: movl 8(src1),t1
- movl 12(src1),t2
- adcl x,t1
- movl 12(src2),x
- adcl x,t2
- movl 16(src2),x
- movl t1,-24(dst)
- movl t2,-20(dst)
-
-L3: movl 16(src1),t1
- movl 20(src1),t2
- adcl x,t1
- movl 20(src2),x
- adcl x,t2
- movl 24(src2),x
- movl t1,-16(dst)
- movl t2,-12(dst)
-
-L4: movl 24(src1),t1
- movl 28(src1),t2
- adcl x,t1
- movl 28(src2),x
- adcl x,t2
- movl 32(src2),x
- movl t1,-8(dst)
- movl t2,-4(dst)
-
- leal 32(src1),src1
- leal 32(src2),src2
+Loop: movl 28(%edi),%eax /* fetch destination cache line */
+ leal 32(%edi),%edi
+
+L1: movl (%esi),%eax
+ movl 4(%esi),%edx
+ adcl %ebx,%eax
+ movl 4(%ebp),%ebx
+ adcl %ebx,%edx
+ movl 8(%ebp),%ebx
+ movl %eax,-32(%edi)
+ movl %edx,-28(%edi)
+
+L2: movl 8(%esi),%eax
+ movl 12(%esi),%edx
+ adcl %ebx,%eax
+ movl 12(%ebp),%ebx
+ adcl %ebx,%edx
+ movl 16(%ebp),%ebx
+ movl %eax,-24(%edi)
+ movl %edx,-20(%edi)
+
+L3: movl 16(%esi),%eax
+ movl 20(%esi),%edx
+ adcl %ebx,%eax
+ movl 20(%ebp),%ebx
+ adcl %ebx,%edx
+ movl 24(%ebp),%ebx
+ movl %eax,-16(%edi)
+ movl %edx,-12(%edi)
+
+L4: movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ adcl %ebx,%eax
+ movl 28(%ebp),%ebx
+ adcl %ebx,%edx
+ movl 32(%ebp),%ebx
+ movl %eax,-8(%edi)
+ movl %edx,-4(%edi)
+
+ leal 32(%esi),%esi
+ leal 32(%ebp),%ebp
decl %ecx
jnz Loop
- popl t2
+ popl %edx
Lend:
- decl t2 /* test t2 w/o clobbering carry */
+ decl %edx /* test %edx w/o clobbering carry */
js Lend2
- incl t2
+ incl %edx
Loop2:
- leal 4(dst),dst
- movl (src1),t1
- adcl x,t1
- movl 4(src2),x
- movl t1,-4(dst)
- leal 4(src1),src1
- leal 4(src2),src2
- decl t2
+ leal 4(%edi),%edi
+ movl (%esi),%eax
+ adcl %ebx,%eax
+ movl 4(%ebp),%ebx
+ movl %eax,-4(%edi)
+ leal 4(%esi),%esi
+ leal 4(%ebp),%ebp
+ decl %edx
jnz Loop2
Lend2:
- movl (src1),t1
- adcl x,t1
- movl t1,(dst)
+ movl (%esi),%eax
+ adcl %ebx,%eax
+ movl %eax,(%edi)
sbbl %eax,%eax
negl %eax
diff --git a/sysdeps/i386/i586/addmul_1.S b/sysdeps/i386/i586/addmul_1.S
index b222840..5bf2603 100644
--- a/sysdeps/i386/i586/addmul_1.S
+++ b/sysdeps/i386/i586/addmul_1.S
@@ -1,7 +1,7 @@
/* Pentium __mpn_addmul_1 -- Multiply a limb vector with a limb and add
the result to a second limb vector.
-Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -32,12 +32,12 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#define res_ptr edi
#define s1_ptr esi
+#define size ecx
#define s2_limb ebp
TEXT
ALIGN (3)
GLOBL C_SYMBOL_NAME(__mpn_addmul_1)
- .type C_SYMBOL_NAME(__mpn_addmul_1),@function
C_SYMBOL_NAME(__mpn_addmul_1:)
INSN1(push,l ,R(edi))
@@ -47,38 +47,36 @@ C_SYMBOL_NAME(__mpn_addmul_1:)
INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20))
INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24))
- INSN2(mov,l ,R(ecx),MEM_DISP(esp,28))
+ INSN2(mov,l ,R(size),MEM_DISP(esp,28))
INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32))
- INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,ecx,4))
- INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,ecx,4))
- INSN1(neg,l ,R(ecx))
- INSN2(xor,l ,R(edx),R(edx))
+ INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+ INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+ INSN1(neg,l ,R(size))
+ INSN2(xor,l ,R(ebx),R(ebx))
ALIGN (3)
-Loop:
- INSN2(mov,l ,R(ebx),R(edx))
- INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,ecx,4))
+
+Loop: INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
INSN1(mul,l ,R(s2_limb))
INSN2(add,l ,R(eax),R(ebx))
- INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,ecx,4))
+ INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4))
INSN2(adc,l ,R(edx),$0)
INSN2(add,l ,R(ebx),R(eax))
- INSN2(adc,l ,R(edx),$0)
- INSN2(mov,l ,MEM_INDEX(res_ptr,ecx,4),R(ebx))
+ INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx))
+ INSN1(inc,l ,R(size))
- INSN1(inc,l ,R(ecx))
+ INSN2(mov,l ,R(ebx),R(edx))
INSN1(jnz, ,Loop)
-
- INSN2(mov,l ,R(eax),R(edx))
+ INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),R(ebx))
INSN1(pop,l ,R(ebp))
INSN1(pop,l ,R(ebx))
INSN1(pop,l ,R(esi))
INSN1(pop,l ,R(edi))
ret
-Lfe1:
- .size C_SYMBOL_NAME(__mpn_addmul_1),Lfe1-C_SYMBOL_NAME(__mpn_addmul_1)
diff --git a/sysdeps/i386/i586/mul_1.S b/sysdeps/i386/i586/mul_1.S
index 2b7258e..048c060 100644
--- a/sysdeps/i386/i586/mul_1.S
+++ b/sysdeps/i386/i586/mul_1.S
@@ -1,7 +1,7 @@
/* Pentium __mpn_mul_1 -- Multiply a limb vector with a limb and store
the result in a second limb vector.
-Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -53,24 +53,24 @@ C_SYMBOL_NAME(__mpn_mul_1:)
INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
INSN1(neg,l ,R(size))
- INSN2(xor,l ,R(edx),R(edx))
+ INSN2(xor,l ,R(ebx),R(ebx))
ALIGN (3)
-Loop:
- INSN2(mov,l ,R(ebx),R(edx))
+
+Loop: INSN2(adc,l ,R(ebx),$0)
INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
INSN1(mul,l ,R(s2_limb))
- INSN2(add,l ,R(eax),R(ebx))
-
- INSN2(adc,l ,R(edx),$0)
- INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(eax))
+ INSN2(add,l ,R(ebx),R(eax))
+ INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx))
INSN1(inc,l ,R(size))
- INSN1(jnz, ,Loop)
+ INSN2(mov,l ,R(ebx),R(edx))
+ INSN1(jnz, ,Loop)
- INSN2(mov,l ,R(eax),R(edx))
+ INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),R(ebx))
INSN1(pop,l ,R(ebp))
INSN1(pop,l ,R(ebx))
INSN1(pop,l ,R(esi))
diff --git a/sysdeps/i386/i586/sub_n.S b/sysdeps/i386/i586/sub_n.S
index 9c964a8..cd158a5 100644
--- a/sysdeps/i386/i586/sub_n.S
+++ b/sysdeps/i386/i586/sub_n.S
@@ -1,7 +1,7 @@
/* Pentium __mpn_sub_n -- Subtract two limb vectors of the same length > 0
and store difference in a third limb vector.
-Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -30,13 +30,6 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#include "sysdep.h"
#include "asm-syntax.h"
-#define t1 %eax
-#define t2 %edx
-#define src1 %esi
-#define src2 %ebp
-#define dst %edi
-#define x %ebx
-
.text
ALIGN (3)
.globl C_SYMBOL_NAME(__mpn_sub_n)
@@ -46,85 +39,85 @@ C_SYMBOL_NAME(__mpn_sub_n:)
pushl %ebx
pushl %ebp
- movl 20(%esp),dst /* res_ptr */
- movl 24(%esp),src1 /* s1_ptr */
- movl 28(%esp),src2 /* s2_ptr */
+ movl 20(%esp),%edi /* res_ptr */
+ movl 24(%esp),%esi /* s1_ptr */
+ movl 28(%esp),%ebp /* s2_ptr */
movl 32(%esp),%ecx /* size */
- movl (src2),x
+ movl (%ebp),%ebx
decl %ecx
- movl %ecx,t2
+ movl %ecx,%edx
shrl $3,%ecx
- andl $7,t2
+ andl $7,%edx
testl %ecx,%ecx /* zero carry flag */
jz Lend
- pushl t2
+ pushl %edx
ALIGN (3)
-Loop: movl 28(dst),%eax /* fetch destination cache line */
- leal 32(dst),dst
-
-L1: movl (src1),t1
- movl 4(src1),t2
- sbbl x,t1
- movl 4(src2),x
- sbbl x,t2
- movl 8(src2),x
- movl t1,-32(dst)
- movl t2,-28(dst)
-
-L2: movl 8(src1),t1
- movl 12(src1),t2
- sbbl x,t1
- movl 12(src2),x
- sbbl x,t2
- movl 16(src2),x
- movl t1,-24(dst)
- movl t2,-20(dst)
-
-L3: movl 16(src1),t1
- movl 20(src1),t2
- sbbl x,t1
- movl 20(src2),x
- sbbl x,t2
- movl 24(src2),x
- movl t1,-16(dst)
- movl t2,-12(dst)
-
-L4: movl 24(src1),t1
- movl 28(src1),t2
- sbbl x,t1
- movl 28(src2),x
- sbbl x,t2
- movl 32(src2),x
- movl t1,-8(dst)
- movl t2,-4(dst)
-
- leal 32(src1),src1
- leal 32(src2),src2
+Loop: movl 28(%edi),%eax /* fetch destination cache line */
+ leal 32(%edi),%edi
+
+L1: movl (%esi),%eax
+ movl 4(%esi),%edx
+ sbbl %ebx,%eax
+ movl 4(%ebp),%ebx
+ sbbl %ebx,%edx
+ movl 8(%ebp),%ebx
+ movl %eax,-32(%edi)
+ movl %edx,-28(%edi)
+
+L2: movl 8(%esi),%eax
+ movl 12(%esi),%edx
+ sbbl %ebx,%eax
+ movl 12(%ebp),%ebx
+ sbbl %ebx,%edx
+ movl 16(%ebp),%ebx
+ movl %eax,-24(%edi)
+ movl %edx,-20(%edi)
+
+L3: movl 16(%esi),%eax
+ movl 20(%esi),%edx
+ sbbl %ebx,%eax
+ movl 20(%ebp),%ebx
+ sbbl %ebx,%edx
+ movl 24(%ebp),%ebx
+ movl %eax,-16(%edi)
+ movl %edx,-12(%edi)
+
+L4: movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ sbbl %ebx,%eax
+ movl 28(%ebp),%ebx
+ sbbl %ebx,%edx
+ movl 32(%ebp),%ebx
+ movl %eax,-8(%edi)
+ movl %edx,-4(%edi)
+
+ leal 32(%esi),%esi
+ leal 32(%ebp),%ebp
decl %ecx
jnz Loop
- popl t2
+ popl %edx
Lend:
- decl t2 /* test t2 w/o clobbering carry */
+ decl %edx /* test %edx w/o clobbering carry */
js Lend2
- incl t2
+ incl %edx
Loop2:
- leal 4(dst),dst
- movl (src1),t1
- sbbl x,t1
- movl 4(src2),x
- movl t1,-4(dst)
- leal 4(src1),src1
- leal 4(src2),src2
- decl t2
+ leal 4(%edi),%edi
+ movl (%esi),%eax
+ sbbl %ebx,%eax
+ movl 4(%ebp),%ebx
+ movl %eax,-4(%edi)
+ leal 4(%esi),%esi
+ leal 4(%ebp),%ebp
+ decl %edx
jnz Loop2
Lend2:
- movl (src1),t1
- sbbl x,t1
- movl t1,(dst)
+ movl (%esi),%eax
+ sbbl %ebx,%eax
+ movl %eax,(%edi)
sbbl %eax,%eax
negl %eax
diff --git a/sysdeps/i386/i586/submul_1.S b/sysdeps/i386/i586/submul_1.S
index 14bfe54..440f64f 100644
--- a/sysdeps/i386/i586/submul_1.S
+++ b/sysdeps/i386/i586/submul_1.S
@@ -1,7 +1,7 @@
/* Pentium __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
the result from a second limb vector.
-Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -53,10 +53,10 @@ C_SYMBOL_NAME(__mpn_submul_1:)
INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
INSN1(neg,l ,R(size))
- INSN2(xor,l ,R(edx),R(edx))
+ INSN2(xor,l ,R(ebx),R(ebx))
ALIGN (3)
-Loop:
- INSN2(mov,l ,R(ebx),R(edx))
+
+Loop: INSN2(adc,l ,R(ebx),$0)
INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
INSN1(mul,l ,R(s2_limb))
@@ -67,14 +67,14 @@ Loop:
INSN2(adc,l ,R(edx),$0)
INSN2(sub,l ,R(ebx),R(eax))
- INSN2(adc,l ,R(edx),$0)
INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx))
-
INSN1(inc,l ,R(size))
- INSN1(jnz, ,Loop)
+ INSN2(mov,l ,R(ebx),R(edx))
+ INSN1(jnz, ,Loop)
- INSN2(mov,l ,R(eax),R(edx))
+ INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),R(ebx))
INSN1(pop,l ,R(ebp))
INSN1(pop,l ,R(ebx))
INSN1(pop,l ,R(esi))
diff --git a/sysdeps/m68k/add_n.S b/sysdeps/m68k/add_n.S
index ea7a4458..754af9f 100644
--- a/sysdeps/m68k/add_n.S
+++ b/sysdeps/m68k/add_n.S
@@ -1,7 +1,7 @@
/* mc68020 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
sum in a third limb vector.
-Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -27,50 +27,53 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
size (sp + 12)
*/
+#include "sysdep.h"
#include "asm-syntax.h"
TEXT
ALIGN
- GLOBL ___mpn_add_n
+ GLOBL C_SYMBOL_NAME(__mpn_add_n)
-LAB(___mpn_add_n)
+C_SYMBOL_NAME(__mpn_add_n:)
+PROLOG(__mpn_add_n)
/* Save used registers on the stack. */
- INSN2(move,l ,MEM_PREDEC(sp),d2)
- INSN2(move,l ,MEM_PREDEC(sp),a2)
+ movel R(d2),MEM_PREDEC(sp)
+ movel R(a2),MEM_PREDEC(sp)
/* Copy the arguments to registers. Better use movem? */
- INSN2(move,l ,a2,MEM_DISP(sp,12))
- INSN2(move,l ,a0,MEM_DISP(sp,16))
- INSN2(move,l ,a1,MEM_DISP(sp,20))
- INSN2(move,l ,d2,MEM_DISP(sp,24))
-
- INSN2(eor,w ,d2,#1)
- INSN2(lsr,l ,d2,#1)
- bcc L1
- INSN2(subq,l ,d2,#1) /* clears cy as side effect */
-
-LAB(Loop)
- INSN2(move,l ,d0,MEM_POSTINC(a0))
- INSN2(move,l ,d1,MEM_POSTINC(a1))
- INSN2(addx,l ,d0,d1)
- INSN2(move,l ,MEM_POSTINC(a2),d0)
-LAB(L1) INSN2(move,l ,d0,MEM_POSTINC(a0))
- INSN2(move,l ,d1,MEM_POSTINC(a1))
- INSN2(addx,l ,d0,d1)
- INSN2(move,l ,MEM_POSTINC(a2),d0)
-
- dbf d2,Loop /* loop until 16 lsb of %4 == -1 */
- INSN2(subx,l ,d0,d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */
- INSN2(sub,l ,d2,#0x10000)
- bcs L2
- INSN2(add,l ,d0,d0) /* restore cy */
- bra Loop
-
-LAB(L2)
- INSN1(neg,l ,d0)
+ movel MEM_DISP(sp,12),R(a2)
+ movel MEM_DISP(sp,16),R(a0)
+ movel MEM_DISP(sp,20),R(a1)
+ movel MEM_DISP(sp,24),R(d2)
+
+ eorw #1,R(d2)
+ lsrl #1,R(d2)
+ bcc L(L1)
+ subql #1,R(d2) /* clears cy as side effect */
+
+L(Loop:)
+ movel MEM_POSTINC(a0),R(d0)
+ movel MEM_POSTINC(a1),R(d1)
+ addxl R(d1),R(d0)
+ movel R(d0),MEM_POSTINC(a2)
+L(L1:) movel MEM_POSTINC(a0),R(d0)
+ movel MEM_POSTINC(a1),R(d1)
+ addxl R(d1),R(d0)
+ movel R(d0),MEM_POSTINC(a2)
+
+ dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */
+ subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */
+ subl #0x10000,R(d2)
+ bcs L(L2)
+ addl R(d0),R(d0) /* restore cy */
+ bra L(Loop)
+
+L(L2:)
+ negl R(d0)
/* Restore used registers from stack frame. */
- INSN2(move,l ,a2,MEM_POSTINC(sp))
- INSN2(move,l ,d2,MEM_POSTINC(sp))
+ movel MEM_POSTINC(sp),R(a2)
+ movel MEM_POSTINC(sp),R(d2)
rts
+EPILOG(__mpn_add_n)
diff --git a/sysdeps/m68k/lshift.S b/sysdeps/m68k/lshift.S
new file mode 100644
index 0000000..c58594a
--- /dev/null
+++ b/sysdeps/m68k/lshift.S
@@ -0,0 +1,150 @@
+/* mc68020 __mpn_lshift -- Shift left a low-level natural-number integer.
+
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s_ptr (sp + 8)
+ s_size (sp + 16)
+ cnt (sp + 12)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define res_ptr a1
+#define s_ptr a0
+#define s_size d6
+#define cnt d4
+
+ TEXT
+ ALIGN
+ GLOBL C_SYMBOL_NAME(__mpn_lshift)
+
+C_SYMBOL_NAME(__mpn_lshift:)
+PROLOG(__mpn_lshift)
+
+/* Save used registers on the stack. */
+ moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers. */
+ movel MEM_DISP(sp,28),R(res_ptr)
+ movel MEM_DISP(sp,32),R(s_ptr)
+ movel MEM_DISP(sp,36),R(s_size)
+ movel MEM_DISP(sp,40),R(cnt)
+
+ moveql #1,R(d5)
+ cmpl R(d5),R(cnt)
+ bne L(Lnormal)
+ cmpl R(s_ptr),R(res_ptr)
+ bls L(Lspecial) /* jump if s_ptr >= res_ptr */
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+ lea MEM_INDX1(s_ptr,s_size,l,4),R(a2)
+#else /* not mc68020 */
+ movel R(s_size),R(d0)
+ asll #2,R(d0)
+ lea MEM_INDX(s_ptr,d0,l),R(a2)
+#endif
+ cmpl R(res_ptr),R(a2)
+ bls L(Lspecial) /* jump if res_ptr >= s_ptr + s_size */
+
+L(Lnormal:)
+ moveql #32,R(d5)
+ subl R(cnt),R(d5)
+
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+ lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
+ lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
+#else /* not mc68000 */
+ movel R(s_size),R(d0)
+ asll #2,R(d0)
+ addl R(s_size),R(s_ptr)
+ addl R(s_size),R(res_ptr)
+#endif
+ movel MEM_PREDEC(s_ptr),R(d2)
+ movel R(d2),R(d0)
+ lsrl R(d5),R(d0) /* compute carry limb */
+
+ lsll R(cnt),R(d2)
+ movel R(d2),R(d1)
+ subql #1,R(s_size)
+ beq L(Lend)
+ lsrl #1,R(s_size)
+ bcs L(L1)
+ subql #1,R(s_size)
+
+L(Loop:)
+ movel MEM_PREDEC(s_ptr),R(d2)
+ movel R(d2),R(d3)
+ lsrl R(d5),R(d3)
+ orl R(d3),R(d1)
+ movel R(d1),MEM_PREDEC(res_ptr)
+ lsll R(cnt),R(d2)
+L(L1:)
+ movel MEM_PREDEC(s_ptr),R(d1)
+ movel R(d1),R(d3)
+ lsrl R(d5),R(d3)
+ orl R(d3),R(d2)
+ movel R(d2),MEM_PREDEC(res_ptr)
+ lsll R(cnt),R(d1)
+
+ dbf R(s_size),L(Loop)
+ subl #0x10000,R(s_size)
+ bcc L(Loop)
+
+L(Lend:)
+ movel R(d1),MEM_PREDEC(res_ptr) /* store least significant limb */
+
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+ rts
+
+/* We loop from least significant end of the arrays, which is only
+ permissable if the source and destination don't overlap, since the
+ function is documented to work for overlapping source and destination. */
+
+L(Lspecial:)
+ clrl R(d0) /* initialize carry */
+ eorw #1,R(s_size)
+ lsrl #1,R(s_size)
+ bcc L(LL1)
+ subql #1,R(s_size)
+
+L(LLoop:)
+ movel MEM_POSTINC(s_ptr),R(d2)
+ addxl R(d2),R(d2)
+ movel R(d2),MEM_POSTINC(res_ptr)
+L(LL1:)
+ movel MEM_POSTINC(s_ptr),R(d2)
+ addxl R(d2),R(d2)
+ movel R(d2),MEM_POSTINC(res_ptr)
+
+ dbf R(s_size),L(LLoop)
+ addxl R(d0),R(d0) /* save cy in lsb */
+ subl #0x10000,R(s_size)
+ bcs L(LLend)
+ lsrl #1,R(d0) /* restore cy */
+ bra L(LLoop)
+
+L(LLend:)
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+ rts
+EPILOG(__mpn_lshift)
diff --git a/sysdeps/m68k/m68020/addmul_1.S b/sysdeps/m68k/m68020/addmul_1.S
index 3f244c4..169f113 100644
--- a/sysdeps/m68k/m68020/addmul_1.S
+++ b/sysdeps/m68k/m68020/addmul_1.S
@@ -1,7 +1,7 @@
/* mc68020 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
the result to a second limb vector.
-Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -23,58 +23,61 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
INPUT PARAMETERS
res_ptr (sp + 4)
s1_ptr (sp + 8)
- size (sp + 12)
+ s1_size (sp + 12)
s2_limb (sp + 16)
*/
+#include "sysdep.h"
#include "asm-syntax.h"
TEXT
ALIGN
- GLOBL ___mpn_addmul_1
+ GLOBL C_SYMBOL_NAME(__mpn_addmul_1)
-LAB(___mpn_addmul_1)
+C_SYMBOL_NAME(__mpn_addmul_1:)
+PROLOG(__mpn_addmul_1)
#define res_ptr a0
#define s1_ptr a1
-#define size d2
+#define s1_size d2
#define s2_limb d4
/* Save used registers on the stack. */
- INSN2(movem,l ,MEM_PREDEC(sp),d2-d5)
+ moveml R(d2)-R(d5),MEM_PREDEC(sp)
/* Copy the arguments to registers. Better use movem? */
- INSN2(move,l ,res_ptr,MEM_DISP(sp,20))
- INSN2(move,l ,s1_ptr,MEM_DISP(sp,24))
- INSN2(move,l ,size,MEM_DISP(sp,28))
- INSN2(move,l ,s2_limb,MEM_DISP(sp,32))
-
- INSN2(eor,w ,size,#1)
- INSN1(clr,l ,d1)
- INSN1(clr,l ,d5)
- INSN2(lsr,l ,size,#1)
- bcc L1
- INSN2(subq,l ,size,#1)
- INSN2(sub,l ,d0,d0) /* (d0,cy) <= (0,0) */
-
-LAB(Loop)
- INSN2(move,l ,d3,MEM_POSTINC(s1_ptr))
- INSN2(mulu,l ,d1:d3,s2_limb)
- INSN2(addx,l ,d3,d0)
- INSN2(addx,l ,d1,d5)
- INSN2(add,l ,MEM_POSTINC(res_ptr),d3)
-LAB(L1) INSN2(move,l ,d3,MEM_POSTINC(s1_ptr))
- INSN2(mulu,l ,d0:d3,s2_limb)
- INSN2(addx,l ,d3,d1)
- INSN2(addx,l ,d0,d5)
- INSN2(add,l ,MEM_POSTINC(res_ptr),d3)
-
- dbf size,Loop
- INSN2(addx,l ,d0,d5)
- INSN2(sub,l ,size,#0x10000)
- bcc Loop
+ movel MEM_DISP(sp,20),R(res_ptr)
+ movel MEM_DISP(sp,24),R(s1_ptr)
+ movel MEM_DISP(sp,28),R(s1_size)
+ movel MEM_DISP(sp,32),R(s2_limb)
+
+ eorw #1,R(s1_size)
+ clrl R(d1)
+ clrl R(d5)
+ lsrl #1,R(s1_size)
+ bcc L(L1)
+ subql #1,R(s1_size)
+ subl R(d0),R(d0) /* (d0,cy) <= (0,0) */
+
+L(Loop:)
+ movel MEM_POSTINC(s1_ptr),R(d3)
+ mulul R(s2_limb),R(d1):R(d3)
+ addxl R(d0),R(d3)
+ addxl R(d5),R(d1)
+ addl R(d3),MEM_POSTINC(res_ptr)
+L(L1:) movel MEM_POSTINC(s1_ptr),R(d3)
+ mulul R(s2_limb),R(d0):R(d3)
+ addxl R(d1),R(d3)
+ addxl R(d5),R(d0)
+ addl R(d3),MEM_POSTINC(res_ptr)
+
+ dbf R(s1_size),L(Loop)
+ addxl R(d5),R(d0)
+ subl #0x10000,R(s1_size)
+ bcc L(Loop)
/* Restore used registers from stack frame. */
- INSN2(movem,l ,d2-d5,MEM_POSTINC(sp))
+ moveml MEM_POSTINC(sp),R(d2)-R(d5)
rts
+EPILOG(__mpn_addmul_1)
diff --git a/sysdeps/m68k/m68020/mul_1.S b/sysdeps/m68k/m68020/mul_1.S
index 548ca00..4db1cca 100644
--- a/sysdeps/m68k/m68020/mul_1.S
+++ b/sysdeps/m68k/m68020/mul_1.S
@@ -1,7 +1,7 @@
/* mc68020 __mpn_mul_1 -- Multiply a limb vector with a limb and store
the result in a second limb vector.
-Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -23,65 +23,68 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
INPUT PARAMETERS
res_ptr (sp + 4)
s1_ptr (sp + 8)
- size (sp + 12)
+ s1_size (sp + 12)
s2_limb (sp + 16)
*/
+#include "sysdep.h"
#include "asm-syntax.h"
TEXT
ALIGN
- GLOBL ___mpn_mul_1
+ GLOBL C_SYMBOL_NAME(__mpn_mul_1)
-LAB(___mpn_mul_1)
+C_SYMBOL_NAME(__mpn_mul_1:)
+PROLOG(__mpn_mul_1)
#define res_ptr a0
#define s1_ptr a1
-#define size d2
+#define s1_size d2
#define s2_limb d4
/* Save used registers on the stack. */
- INSN2(movem,l ,MEM_PREDEC(sp),d2-d4)
+ moveml R(d2)-R(d4),MEM_PREDEC(sp)
#if 0
- INSN2(move,l ,MEM_PREDEC(sp),d2)
- INSN2(move,l ,MEM_PREDEC(sp),d3)
- INSN2(move,l ,MEM_PREDEC(sp),d4)
+ movel R(d2),MEM_PREDEC(sp)
+ movel R(d3),MEM_PREDEC(sp)
+ movel R(d4),MEM_PREDEC(sp)
#endif
/* Copy the arguments to registers. Better use movem? */
- INSN2(move,l ,res_ptr,MEM_DISP(sp,16))
- INSN2(move,l ,s1_ptr,MEM_DISP(sp,20))
- INSN2(move,l ,size,MEM_DISP(sp,24))
- INSN2(move,l ,s2_limb,MEM_DISP(sp,28))
-
- INSN2(eor,w ,size,#1)
- INSN1(clr,l ,d1)
- INSN2(lsr,l ,size,#1)
- bcc L1
- INSN2(subq,l ,size,#1)
- INSN2(sub,l ,d0,d0) /* (d0,cy) <= (0,0) */
-
-LAB(Loop)
- INSN2(move,l ,d3,MEM_POSTINC(s1_ptr))
- INSN2(mulu,l ,d1:d3,s2_limb)
- INSN2(addx,l ,d3,d0)
- INSN2(move,l ,MEM_POSTINC(res_ptr),d3)
-LAB(L1) INSN2(move,l ,d3,MEM_POSTINC(s1_ptr))
- INSN2(mulu,l ,d0:d3,s2_limb)
- INSN2(addx,l ,d3,d1)
- INSN2(move,l ,MEM_POSTINC(res_ptr),d3)
-
- dbf size,Loop
- INSN1(clr,l ,d3)
- INSN2(addx,l ,d0,d3)
- INSN2(sub,l ,size,#0x10000)
- bcc Loop
+ movel MEM_DISP(sp,16),R(res_ptr)
+ movel MEM_DISP(sp,20),R(s1_ptr)
+ movel MEM_DISP(sp,24),R(s1_size)
+ movel MEM_DISP(sp,28),R(s2_limb)
+
+ eorw #1,R(s1_size)
+ clrl R(d1)
+ lsrl #1,R(s1_size)
+ bcc L(L1)
+ subql #1,R(s1_size)
+ subl R(d0),R(d0) /* (d0,cy) <= (0,0) */
+
+L(Loop:)
+ movel MEM_POSTINC(s1_ptr),R(d3)
+ mulul R(s2_limb),R(d1):R(d3)
+ addxl R(d0),R(d3)
+ movel R(d3),MEM_POSTINC(res_ptr)
+L(L1:) movel MEM_POSTINC(s1_ptr),R(d3)
+ mulul R(s2_limb),R(d0):R(d3)
+ addxl R(d1),R(d3)
+ movel R(d3),MEM_POSTINC(res_ptr)
+
+ dbf R(s1_size),L(Loop)
+ clrl R(d3)
+ addxl R(d3),R(d0)
+ subl #0x10000,R(s1_size)
+ bcc L(Loop)
/* Restore used registers from stack frame. */
- INSN2(movem,l ,d2-d4,MEM_POSTINC(sp))
+ moveml MEM_POSTINC(sp),R(d2)-R(d4)
#if 0
- INSN2(move,l ,d4,MEM_POSTINC(sp))
- INSN2(move,l ,d3,MEM_POSTINC(sp))
- INSN2(move,l ,d2,MEM_POSTINC(sp))
+ movel MEM_POSTINC(sp),R(d4)
+ movel MEM_POSTINC(sp),R(d3)
+ movel MEM_POSTINC(sp),R(d2)
#endif
rts
+EPILOG(__mpn_mul_1)
diff --git a/sysdeps/m68k/m68020/submul_1.S b/sysdeps/m68k/m68020/submul_1.S
index ef7f39d..cf30029 100644
--- a/sysdeps/m68k/m68020/submul_1.S
+++ b/sysdeps/m68k/m68020/submul_1.S
@@ -1,7 +1,7 @@
/* mc68020 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
the result from a second limb vector.
-Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -23,58 +23,61 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
INPUT PARAMETERS
res_ptr (sp + 4)
s1_ptr (sp + 8)
- size (sp + 12)
+ s1_size (sp + 12)
s2_limb (sp + 16)
*/
+#include "sysdep.h"
#include "asm-syntax.h"
TEXT
ALIGN
- GLOBL ___mpn_submul_1
+ GLOBL C_SYMBOL_NAME(__mpn_submul_1)
-LAB(___mpn_submul_1)
+C_SYMBOL_NAME(__mpn_submul_1:)
+PROLOG(__mpn_submul_1)
#define res_ptr a0
#define s1_ptr a1
-#define size d2
+#define s1_size d2
#define s2_limb d4
/* Save used registers on the stack. */
- INSN2(movem,l ,MEM_PREDEC(sp),d2-d5)
+ moveml R(d2)-R(d5),MEM_PREDEC(sp)
/* Copy the arguments to registers. Better use movem? */
- INSN2(move,l ,res_ptr,MEM_DISP(sp,20))
- INSN2(move,l ,s1_ptr,MEM_DISP(sp,24))
- INSN2(move,l ,size,MEM_DISP(sp,28))
- INSN2(move,l ,s2_limb,MEM_DISP(sp,32))
-
- INSN2(eor,w ,size,#1)
- INSN1(clr,l ,d1)
- INSN1(clr,l ,d5)
- INSN2(lsr,l ,size,#1)
- bcc L1
- INSN2(subq,l ,size,#1)
- INSN2(sub,l ,d0,d0) /* (d0,cy) <= (0,0) */
-
-LAB(Loop)
- INSN2(move,l ,d3,MEM_POSTINC(s1_ptr))
- INSN2(mulu,l ,d1:d3,s2_limb)
- INSN2(addx,l ,d3,d0)
- INSN2(addx,l ,d1,d5)
- INSN2(sub,l ,MEM_POSTINC(res_ptr),d3)
-LAB(L1) INSN2(move,l ,d3,MEM_POSTINC(s1_ptr))
- INSN2(mulu,l ,d0:d3,s2_limb)
- INSN2(addx,l ,d3,d1)
- INSN2(addx,l ,d0,d5)
- INSN2(sub,l ,MEM_POSTINC(res_ptr),d3)
-
- dbf size,Loop
- INSN2(addx,l ,d0,d5)
- INSN2(sub,l ,size,#0x10000)
- bcc Loop
+ movel MEM_DISP(sp,20),R(res_ptr)
+ movel MEM_DISP(sp,24),R(s1_ptr)
+ movel MEM_DISP(sp,28),R(s1_size)
+ movel MEM_DISP(sp,32),R(s2_limb)
+
+ eorw #1,R(s1_size)
+ clrl R(d1)
+ clrl R(d5)
+ lsrl #1,R(s1_size)
+ bcc L(L1)
+ subql #1,R(s1_size)
+ subl R(d0),R(d0) /* (d0,cy) <= (0,0) */
+
+L(Loop:)
+ movel MEM_POSTINC(s1_ptr),R(d3)
+ mulul R(s2_limb),R(d1):R(d3)
+ addxl R(d0),R(d3)
+ addxl R(d5),R(d1)
+ subl R(d3),MEM_POSTINC(res_ptr)
+L(L1:) movel MEM_POSTINC(s1_ptr),R(d3)
+ mulul R(s2_limb),R(d0):R(d3)
+ addxl R(d1),R(d3)
+ addxl R(d5),R(d0)
+ subl R(d3),MEM_POSTINC(res_ptr)
+
+ dbf R(s1_size),L(Loop)
+ addxl R(d5),R(d0)
+ subl #0x10000,R(s1_size)
+ bcc L(Loop)
/* Restore used registers from stack frame. */
- INSN2(movem,l ,d2-d5,MEM_POSTINC(sp))
+ moveml MEM_POSTINC(sp),R(d2)-R(d5)
rts
+EPILOG(__mpn_submul_1)
diff --git a/sysdeps/m68k/rshift.S b/sysdeps/m68k/rshift.S
new file mode 100644
index 0000000..494dfcb
--- /dev/null
+++ b/sysdeps/m68k/rshift.S
@@ -0,0 +1,149 @@
+/* mc68020 __mpn_rshift -- Shift right a low-level natural-number integer.
+
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s_ptr (sp + 8)
+ s_size (sp + 16)
+ cnt (sp + 12)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define res_ptr a1
+#define s_ptr a0
+#define s_size d6
+#define cnt d4
+
+ TEXT
+ ALIGN
+ GLOBL C_SYMBOL_NAME(__mpn_rshift)
+
+C_SYMBOL_NAME(__mpn_rshift:)
+PROLOG(__mpn_rshift)
+/* Save used registers on the stack. */
+ moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers. */
+ movel MEM_DISP(sp,28),R(res_ptr)
+ movel MEM_DISP(sp,32),R(s_ptr)
+ movel MEM_DISP(sp,36),R(s_size)
+ movel MEM_DISP(sp,40),R(cnt)
+
+ moveql #1,R(d5)
+ cmpl R(d5),R(cnt)
+ bne L(Lnormal)
+ cmpl R(res_ptr),R(s_ptr)
+ bls L(Lspecial) /* jump if res_ptr >= s_ptr */
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+ lea MEM_INDX1(res_ptr,s_size,l,4),R(a2)
+#else /* not mc68020 */
+ movel R(s_size),R(d0)
+ asll #2,R(d0)
+ lea MEM_INDX(res_ptr,d0,l),R(a2)
+#endif
+ cmpl R(s_ptr),R(a2)
+ bls L(Lspecial) /* jump if s_ptr >= res_ptr + s_size */
+
+L(Lnormal:)
+ moveql #32,R(d5)
+ subl R(cnt),R(d5)
+ movel MEM_POSTINC(s_ptr),R(d2)
+ movel R(d2),R(d0)
+ lsll R(d5),R(d0) /* compute carry limb */
+
+ lsrl R(cnt),R(d2)
+ movel R(d2),R(d1)
+ subql #1,R(s_size)
+ beq L(Lend)
+ lsrl #1,R(s_size)
+ bcs L(L1)
+ subql #1,R(s_size)
+
+L(Loop:)
+ movel MEM_POSTINC(s_ptr),R(d2)
+ movel R(d2),R(d3)
+ lsll R(d5),R(d3)
+ orl R(d3),R(d1)
+ movel R(d1),MEM_POSTINC(res_ptr)
+ lsrl R(cnt),R(d2)
+L(L1:)
+ movel MEM_POSTINC(s_ptr),R(d1)
+ movel R(d1),R(d3)
+ lsll R(d5),R(d3)
+ orl R(d3),R(d2)
+ movel R(d2),MEM_POSTINC(res_ptr)
+ lsrl R(cnt),R(d1)
+
+ dbf R(s_size),L(Loop)
+ subl #0x10000,R(s_size)
+ bcc L(Loop)
+
+L(Lend:)
+ movel R(d1),MEM(res_ptr) /* store most significant limb */
+
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+ rts
+
+/* We loop from most significant end of the arrays, which is only
+ permissable if the source and destination don't overlap, since the
+ function is documented to work for overlapping source and destination. */
+
+L(Lspecial:)
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+ lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
+ lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
+#else /* not mc68000 */
+ movel R(s_size),R(d0)
+ asll #2,R(d0)
+ addl R(s_size),R(s_ptr)
+ addl R(s_size),R(res_ptr)
+#endif
+
+ clrl R(d0) /* initialize carry */
+ eorw #1,R(s_size)
+ lsrl #1,R(s_size)
+ bcc L(LL1)
+ subql #1,R(s_size)
+
+L(LLoop:)
+ movel MEM_PREDEC(s_ptr),R(d2)
+ roxrl #1,R(d2)
+ movel R(d2),MEM_PREDEC(res_ptr)
+L(LL1:)
+ movel MEM_PREDEC(s_ptr),R(d2)
+ roxrl #1,R(d2)
+ movel R(d2),MEM_PREDEC(res_ptr)
+
+ dbf R(s_size),L(LLoop)
+ roxrl #1,R(d0) /* save cy in msb */
+ subl #0x10000,R(s_size)
+ bcs L(LLend)
+ addl R(d0),R(d0) /* restore cy */
+ bra L(LLoop)
+
+L(LLend:)
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+ rts
+EPILOG(__mpn_rshift)
diff --git a/sysdeps/m68k/sub_n.S b/sysdeps/m68k/sub_n.S
index 19f0ec1..39f5161 100644
--- a/sysdeps/m68k/sub_n.S
+++ b/sysdeps/m68k/sub_n.S
@@ -1,7 +1,7 @@
/* mc68020 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
store difference in a third limb vector.
-Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -27,50 +27,53 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
size (sp + 12)
*/
+#include "sysdep.h"
#include "asm-syntax.h"
TEXT
ALIGN
- GLOBL ___mpn_sub_n
+ GLOBL C_SYMBOL_NAME(__mpn_sub_n)
-LAB(___mpn_sub_n)
+C_SYMBOL_NAME(__mpn_sub_n:)
+PROLOG(__mpn_sub_n)
/* Save used registers on the stack. */
- INSN2(move,l ,MEM_PREDEC(sp),d2)
- INSN2(move,l ,MEM_PREDEC(sp),a2)
+ movel R(d2),MEM_PREDEC(sp)
+ movel R(a2),MEM_PREDEC(sp)
/* Copy the arguments to registers. Better use movem? */
- INSN2(move,l ,a2,MEM_DISP(sp,12))
- INSN2(move,l ,a0,MEM_DISP(sp,16))
- INSN2(move,l ,a1,MEM_DISP(sp,20))
- INSN2(move,l ,d2,MEM_DISP(sp,24))
-
- INSN2(eor,w ,d2,#1)
- INSN2(lsr,l ,d2,#1)
- bcc L1
- INSN2(subq,l ,d2,#1) /* clears cy as side effect */
-
-LAB(Loop)
- INSN2(move,l ,d0,MEM_POSTINC(a0))
- INSN2(move,l ,d1,MEM_POSTINC(a1))
- INSN2(subx,l ,d0,d1)
- INSN2(move,l ,MEM_POSTINC(a2),d0)
-LAB(L1) INSN2(move,l ,d0,MEM_POSTINC(a0))
- INSN2(move,l ,d1,MEM_POSTINC(a1))
- INSN2(subx,l ,d0,d1)
- INSN2(move,l ,MEM_POSTINC(a2),d0)
-
- dbf d2,Loop /* loop until 16 lsb of %4 == -1 */
- INSN2(subx,l ,d0,d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */
- INSN2(sub,l ,d2,#0x10000)
- bcs L2
- INSN2(add,l ,d0,d0) /* restore cy */
- bra Loop
-
-LAB(L2)
- INSN1(neg,l ,d0)
+ movel MEM_DISP(sp,12),R(a2)
+ movel MEM_DISP(sp,16),R(a0)
+ movel MEM_DISP(sp,20),R(a1)
+ movel MEM_DISP(sp,24),R(d2)
+
+ eorw #1,R(d2)
+ lsrl #1,R(d2)
+ bcc L(L1)
+ subql #1,R(d2) /* clears cy as side effect */
+
+L(Loop:)
+ movel MEM_POSTINC(a0),R(d0)
+ movel MEM_POSTINC(a1),R(d1)
+ subxl R(d1),R(d0)
+ movel R(d0),MEM_POSTINC(a2)
+L(L1:) movel MEM_POSTINC(a0),R(d0)
+ movel MEM_POSTINC(a1),R(d1)
+ subxl R(d1),R(d0)
+ movel R(d0),MEM_POSTINC(a2)
+
+ dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */
+ subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */
+ subl #0x10000,R(d2)
+ bcs L(L2)
+ addl R(d0),R(d0) /* restore cy */
+ bra L(Loop)
+
+L(L2:)
+ negl R(d0)
/* Restore used registers from stack frame. */
- INSN2(move,l ,a2,MEM_POSTINC(sp))
- INSN2(move,l ,d2,MEM_POSTINC(sp))
+ movel MEM_POSTINC(sp),R(a2)
+ movel MEM_POSTINC(sp),R(d2)
rts
+EPILOG(__mpn_sub_n)
diff --git a/sysdeps/m88k/add_n.s b/sysdeps/m88k/add_n.s
index 7e4cccc..d564479 100644
--- a/sysdeps/m88k/add_n.s
+++ b/sysdeps/m88k/add_n.s
@@ -1,7 +1,7 @@
; mc88100 __mpn_add -- Add two limb vectors of the same length > 0 and store
; sum in a third limb vector.
-; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
diff --git a/sysdeps/m88k/m88110/add_n.S b/sysdeps/m88k/m88110/add_n.S
new file mode 100644
index 0000000..ab20630
--- /dev/null
+++ b/sysdeps/m88k/m88110/add_n.S
@@ -0,0 +1,199 @@
+; mc88110 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+; INPUT PARAMETERS
+#define res_ptr r2
+#define s1_ptr r3
+#define s2_ptr r4
+#define size r5
+
+#include "sysdep.h"
+
+ text
+ align 16
+ global C_SYMBOL_NAME(__mpn_add_n)
+C_SYMBOL_NAME(__mpn_add_n):
+ addu.co r0,r0,r0 ; clear cy flag
+ xor r12,s2_ptr,res_ptr
+ bb1 2,r12,L1
+; ** V1a **
+L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned?
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+ ld r10,s1_ptr,0
+ addu s1_ptr,s1_ptr,4
+ ld r8,s2_ptr,0
+ addu s2_ptr,s2_ptr,4
+ subu size,size,1
+ addu.co r6,r10,r8
+ st r6,res_ptr,0
+ addu res_ptr,res_ptr,4
+L_v1: cmp r12,size,2
+ bb1 lt,r12,Lend2
+
+ ld r10,s1_ptr,0
+ ld r12,s1_ptr,4
+ ld.d r8,s2_ptr,0
+ subu size,size,10
+ bcnd lt0,size,Lfin1
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+ align 8
+Loop1: subu size,size,8
+ addu.cio r6,r10,r8
+ ld r10,s1_ptr,8
+ addu.cio r7,r12,r9
+ ld r12,s1_ptr,12
+ ld.d r8,s2_ptr,8
+ st.d r6,res_ptr,0
+ addu.cio r6,r10,r8
+ ld r10,s1_ptr,16
+ addu.cio r7,r12,r9
+ ld r12,s1_ptr,20
+ ld.d r8,s2_ptr,16
+ st.d r6,res_ptr,8
+ addu.cio r6,r10,r8
+ ld r10,s1_ptr,24
+ addu.cio r7,r12,r9
+ ld r12,s1_ptr,28
+ ld.d r8,s2_ptr,24
+ st.d r6,res_ptr,16
+ addu.cio r6,r10,r8
+ ld r10,s1_ptr,32
+ addu.cio r7,r12,r9
+ ld r12,s1_ptr,36
+ addu s1_ptr,s1_ptr,32
+ ld.d r8,s2_ptr,32
+ addu s2_ptr,s2_ptr,32
+ st.d r6,res_ptr,24
+ addu res_ptr,res_ptr,32
+ bcnd ge0,size,Loop1
+
+Lfin1: addu size,size,8-2
+ bcnd lt0,size,Lend1
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1: addu.cio r6,r10,r8
+ ld r10,s1_ptr,8
+ addu.cio r7,r12,r9
+ ld r12,s1_ptr,12
+ ld.d r8,s2_ptr,8
+ st.d r6,res_ptr,0
+ subu size,size,2
+ addu s1_ptr,s1_ptr,8
+ addu s2_ptr,s2_ptr,8
+ addu res_ptr,res_ptr,8
+ bcnd ge0,size,Loope1
+Lend1: addu.cio r6,r10,r8
+ addu.cio r7,r12,r9
+ st.d r6,res_ptr,0
+
+ bb0 0,size,Lret1
+/* Add last limb */
+ ld r10,s1_ptr,8
+ ld r8,s2_ptr,8
+ addu.cio r6,r10,r8
+ st r6,res_ptr,8
+
+Lret1: jmp.n r1
+ addu.ci r2,r0,r0 ; return carry-out from most sign. limb
+
+L1: xor r12,s1_ptr,res_ptr
+ bb1 2,r12,L2
+; ** V1b **
+ or r12,r0,s2_ptr
+ or s2_ptr,r0,s1_ptr
+ or s1_ptr,r0,r12
+ br L0
+
+; ** V2 **
+/* If we come here, the alignment of s1_ptr and res_ptr as well as the
+ alignment of s2_ptr and res_ptr differ. Since there are only two ways
+ things can be aligned (that we care about) we now know that the alignment
+ of s1_ptr and s2_ptr are the same. */
+
+L2: cmp r12,size,1
+ bb1 eq,r12,Ljone
+ bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+ ld r10,s1_ptr,0
+ addu s1_ptr,s1_ptr,4
+ ld r8,s2_ptr,0
+ addu s2_ptr,s2_ptr,4
+ subu size,size,1
+ addu.co r6,r10,r8
+ st r6,res_ptr,0
+ addu res_ptr,res_ptr,4
+
+L_v2: subu size,size,8
+ bcnd lt0,size,Lfin2
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+ align 8
+Loop2: subu size,size,8
+ ld.d r8,s1_ptr,0
+ ld.d r6,s2_ptr,0
+ addu.cio r8,r8,r6
+ st r8,res_ptr,0
+ addu.cio r9,r9,r7
+ st r9,res_ptr,4
+ ld.d r8,s1_ptr,8
+ ld.d r6,s2_ptr,8
+ addu.cio r8,r8,r6
+ st r8,res_ptr,8
+ addu.cio r9,r9,r7
+ st r9,res_ptr,12
+ ld.d r8,s1_ptr,16
+ ld.d r6,s2_ptr,16
+ addu.cio r8,r8,r6
+ st r8,res_ptr,16
+ addu.cio r9,r9,r7
+ st r9,res_ptr,20
+ ld.d r8,s1_ptr,24
+ ld.d r6,s2_ptr,24
+ addu.cio r8,r8,r6
+ st r8,res_ptr,24
+ addu.cio r9,r9,r7
+ st r9,res_ptr,28
+ addu s1_ptr,s1_ptr,32
+ addu s2_ptr,s2_ptr,32
+ addu res_ptr,res_ptr,32
+ bcnd ge0,size,Loop2
+
+Lfin2: addu size,size,8-2
+ bcnd lt0,size,Lend2
+Loope2: ld.d r8,s1_ptr,0
+ ld.d r6,s2_ptr,0
+ addu.cio r8,r8,r6
+ st r8,res_ptr,0
+ addu.cio r9,r9,r7
+ st r9,res_ptr,4
+ subu size,size,2
+ addu s1_ptr,s1_ptr,8
+ addu s2_ptr,s2_ptr,8
+ addu res_ptr,res_ptr,8
+ bcnd ge0,size,Loope2
+Lend2: bb0 0,size,Lret2
+/* Add last limb */
+Ljone: ld r10,s1_ptr,0
+ ld r8,s2_ptr,0
+ addu.cio r6,r10,r8
+ st r6,res_ptr,0
+
+Lret2: jmp.n r1
+ addu.ci r2,r0,r0 ; return carry-out from most sign. limb
diff --git a/sysdeps/m88k/m88110/addmul_1.s b/sysdeps/m88k/m88110/addmul_1.s
new file mode 100644
index 0000000..1a4dfa1
--- /dev/null
+++ b/sysdeps/m88k/m88110/addmul_1.s
@@ -0,0 +1,60 @@
+; mc88110 __mpn_addmul_1 -- Multiply a limb vector with a single limb and
+; store the product in a second limb vector.
+
+; Copyright (C) 1996 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr r2
+; s1_ptr r3
+; size r4
+; s2_limb r5
+
+ text
+ align 16
+ global ___mpn_addmul_1
+___mpn_addmul_1:
+ lda r3,r3[r4]
+ lda r8,r2[r4] ; RES_PTR in r8 since r2 is retval
+ subu r4,r0,r4
+ addu.co r2,r0,r0 ; r2 = cy = 0
+
+ ld r6,r3[r4]
+ addu r4,r4,1
+ subu r8,r8,4
+ bcnd.n eq0,r4,Lend
+ mulu.d r10,r6,r5
+
+Loop: ld r7,r8[r4]
+ ld r6,r3[r4]
+ addu.cio r9,r11,r2
+ addu.ci r2,r10,r0
+ addu.co r9,r9,r7
+ st r9,r8[r4]
+ addu r4,r4,1
+ mulu.d r10,r6,r5
+ bcnd ne0,r4,Loop
+
+Lend: ld r7,r8,0
+ addu.cio r9,r11,r2
+ addu.ci r2,r10,r0
+ addu.co r9,r9,r7
+ st r9,r8,0
+ jmp.n r1
+ addu.ci r2,r2,r0
diff --git a/sysdeps/m88k/m88110/mul_1.s b/sysdeps/m88k/m88110/mul_1.s
index 08c3ca0..b1352ce 100644
--- a/sysdeps/m88k/m88110/mul_1.s
+++ b/sysdeps/m88k/m88110/mul_1.s
@@ -1,7 +1,7 @@
; mc88110 __mpn_mul_1 -- Multiply a limb vector with a single limb and
; store the product in a second limb vector.
-; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
@@ -56,29 +56,3 @@ Lend: addu.cio r9,r11,r2
st r9,r8,4
jmp.n r1
addu.ci r2,r10,r0
-
-; This is the Right Way to do this on '110. 4 cycles / 64-bit limb.
-; ld.d r10,
-; mulu.d
-; addu.cio
-; addu.cio
-; st.d
-; mulu.d ,r11,r5
-; ld.d r12,
-; mulu.d ,r10,r5
-; addu.cio
-; addu.cio
-; st.d
-; mulu.d
-; ld.d r10,
-; mulu.d
-; addu.cio
-; addu.cio
-; st.d
-; mulu.d
-; ld.d r10,
-; mulu.d
-; addu.cio
-; addu.cio
-; st.d
-; mulu.d
diff --git a/sysdeps/m88k/m88110/sub_n.S b/sysdeps/m88k/m88110/sub_n.S
new file mode 100644
index 0000000..74ee0ae
--- /dev/null
+++ b/sysdeps/m88k/m88110/sub_n.S
@@ -0,0 +1,275 @@
+; mc88110 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+; INPUT PARAMETERS
+#define res_ptr r2
+#define s1_ptr r3
+#define s2_ptr r4
+#define size r5
+
+#include "sysdep.h"
+
+ text
+ align 16
+ global C_SYMBOL_NAME(__mpn_sub_n)
+C_SYMBOL_NAME(__mpn_sub_n):
+ subu.co r0,r0,r0 ; set cy flag
+ xor r12,s2_ptr,res_ptr
+ bb1 2,r12,L1
+; ** V1a **
+L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+ ld r10,s1_ptr,0
+ addu s1_ptr,s1_ptr,4
+ ld r8,s2_ptr,0
+ addu s2_ptr,s2_ptr,4
+ subu size,size,1
+ subu.co r6,r10,r8
+ st r6,res_ptr,0
+ addu res_ptr,res_ptr,4
+L_v1: cmp r12,size,2
+ bb1 lt,r12,Lend2
+
+ ld r10,s1_ptr,0
+ ld r12,s1_ptr,4
+ ld.d r8,s2_ptr,0
+ subu size,size,10
+ bcnd lt0,size,Lfin1
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+ align 8
+Loop1: subu size,size,8
+ subu.cio r6,r10,r8
+ ld r10,s1_ptr,8
+ subu.cio r7,r12,r9
+ ld r12,s1_ptr,12
+ ld.d r8,s2_ptr,8
+ st.d r6,res_ptr,0
+ subu.cio r6,r10,r8
+ ld r10,s1_ptr,16
+ subu.cio r7,r12,r9
+ ld r12,s1_ptr,20
+ ld.d r8,s2_ptr,16
+ st.d r6,res_ptr,8
+ subu.cio r6,r10,r8
+ ld r10,s1_ptr,24
+ subu.cio r7,r12,r9
+ ld r12,s1_ptr,28
+ ld.d r8,s2_ptr,24
+ st.d r6,res_ptr,16
+ subu.cio r6,r10,r8
+ ld r10,s1_ptr,32
+ subu.cio r7,r12,r9
+ ld r12,s1_ptr,36
+ addu s1_ptr,s1_ptr,32
+ ld.d r8,s2_ptr,32
+ addu s2_ptr,s2_ptr,32
+ st.d r6,res_ptr,24
+ addu res_ptr,res_ptr,32
+ bcnd ge0,size,Loop1
+
+Lfin1: addu size,size,8-2
+ bcnd lt0,size,Lend1
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1: subu.cio r6,r10,r8
+ ld r10,s1_ptr,8
+ subu.cio r7,r12,r9
+ ld r12,s1_ptr,12
+ ld.d r8,s2_ptr,8
+ st.d r6,res_ptr,0
+ subu size,size,2
+ addu s1_ptr,s1_ptr,8
+ addu s2_ptr,s2_ptr,8
+ addu res_ptr,res_ptr,8
+ bcnd ge0,size,Loope1
+Lend1: subu.cio r6,r10,r8
+ subu.cio r7,r12,r9
+ st.d r6,res_ptr,0
+
+ bb0 0,size,Lret1
+/* Add last limb */
+ ld r10,s1_ptr,8
+ ld r8,s2_ptr,8
+ subu.cio r6,r10,r8
+ st r6,res_ptr,8
+
+Lret1: addu.ci r2,r0,r0 ; return carry-out from most sign. limb
+ jmp.n r1
+ xor r2,r2,1
+
+L1: xor r12,s1_ptr,res_ptr
+ bb1 2,r12,L2
+; ** V1b **
+ bb0 2,res_ptr,L_v1b ; branch if res_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s1_ptr */
+ ld r10,s2_ptr,0
+ addu s2_ptr,s2_ptr,4
+ ld r8,s1_ptr,0
+ addu s1_ptr,s1_ptr,4
+ subu size,size,1
+ subu.co r6,r8,r10
+ st r6,res_ptr,0
+ addu res_ptr,res_ptr,4
+L_v1b: cmp r12,size,2
+ bb1 lt,r12,Lend2
+
+ ld r10,s2_ptr,0
+ ld r12,s2_ptr,4
+ ld.d r8,s1_ptr,0
+ subu size,size,10
+ bcnd lt0,size,Lfin1b
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+ align 8
+Loop1b: subu size,size,8
+ subu.cio r6,r8,r10
+ ld r10,s2_ptr,8
+ subu.cio r7,r9,r12
+ ld r12,s2_ptr,12
+ ld.d r8,s1_ptr,8
+ st.d r6,res_ptr,0
+ subu.cio r6,r8,r10
+ ld r10,s2_ptr,16
+ subu.cio r7,r9,r12
+ ld r12,s2_ptr,20
+ ld.d r8,s1_ptr,16
+ st.d r6,res_ptr,8
+ subu.cio r6,r8,r10
+ ld r10,s2_ptr,24
+ subu.cio r7,r9,r12
+ ld r12,s2_ptr,28
+ ld.d r8,s1_ptr,24
+ st.d r6,res_ptr,16
+ subu.cio r6,r8,r10
+ ld r10,s2_ptr,32
+ subu.cio r7,r9,r12
+ ld r12,s2_ptr,36
+ addu s2_ptr,s2_ptr,32
+ ld.d r8,s1_ptr,32
+ addu s1_ptr,s1_ptr,32
+ st.d r6,res_ptr,24
+ addu res_ptr,res_ptr,32
+ bcnd ge0,size,Loop1b
+
+Lfin1b: addu size,size,8-2
+ bcnd lt0,size,Lend1b
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1b:subu.cio r6,r8,r10
+ ld r10,s2_ptr,8
+ subu.cio r7,r9,r12
+ ld r12,s2_ptr,12
+ ld.d r8,s1_ptr,8
+ st.d r6,res_ptr,0
+ subu size,size,2
+ addu s1_ptr,s1_ptr,8
+ addu s2_ptr,s2_ptr,8
+ addu res_ptr,res_ptr,8
+ bcnd ge0,size,Loope1b
+Lend1b: subu.cio r6,r8,r10
+ subu.cio r7,r9,r12
+ st.d r6,res_ptr,0
+
+ bb0 0,size,Lret1b
+/* Add last limb */
+ ld r10,s2_ptr,8
+ ld r8,s1_ptr,8
+ subu.cio r6,r8,r10
+ st r6,res_ptr,8
+
+Lret1b: addu.ci r2,r0,r0 ; return carry-out from most sign. limb
+ jmp.n r1
+ xor r2,r2,1
+
+; ** V2 **
+/* If we come here, the alignment of s1_ptr and res_ptr as well as the
+ alignment of s2_ptr and res_ptr differ. Since there are only two ways
+ things can be aligned (that we care about) we now know that the alignment
+ of s1_ptr and s2_ptr are the same. */
+
+L2: cmp r12,size,1
+ bb1 eq,r12,Ljone
+ bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+ ld r10,s1_ptr,0
+ addu s1_ptr,s1_ptr,4
+ ld r8,s2_ptr,0
+ addu s2_ptr,s2_ptr,4
+ subu size,size,1
+ subu.co r6,r10,r8
+ st r6,res_ptr,0
+ addu res_ptr,res_ptr,4
+
+L_v2: subu size,size,8
+ bcnd lt0,size,Lfin2
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+ align 8
+Loop2: subu size,size,8
+ ld.d r8,s1_ptr,0
+ ld.d r6,s2_ptr,0
+ subu.cio r8,r8,r6
+ st r8,res_ptr,0
+ subu.cio r9,r9,r7
+ st r9,res_ptr,4
+ ld.d r8,s1_ptr,8
+ ld.d r6,s2_ptr,8
+ subu.cio r8,r8,r6
+ st r8,res_ptr,8
+ subu.cio r9,r9,r7
+ st r9,res_ptr,12
+ ld.d r8,s1_ptr,16
+ ld.d r6,s2_ptr,16
+ subu.cio r8,r8,r6
+ st r8,res_ptr,16
+ subu.cio r9,r9,r7
+ st r9,res_ptr,20
+ ld.d r8,s1_ptr,24
+ ld.d r6,s2_ptr,24
+ subu.cio r8,r8,r6
+ st r8,res_ptr,24
+ subu.cio r9,r9,r7
+ st r9,res_ptr,28
+ addu s1_ptr,s1_ptr,32
+ addu s2_ptr,s2_ptr,32
+ addu res_ptr,res_ptr,32
+ bcnd ge0,size,Loop2
+
+Lfin2: addu size,size,8-2
+ bcnd lt0,size,Lend2
+Loope2: ld.d r8,s1_ptr,0
+ ld.d r6,s2_ptr,0
+ subu.cio r8,r8,r6
+ st r8,res_ptr,0
+ subu.cio r9,r9,r7
+ st r9,res_ptr,4
+ subu size,size,2
+ addu s1_ptr,s1_ptr,8
+ addu s2_ptr,s2_ptr,8
+ addu res_ptr,res_ptr,8
+ bcnd ge0,size,Loope2
+Lend2: bb0 0,size,Lret2
+/* Add last limb */
+Ljone: ld r10,s1_ptr,0
+ ld r8,s2_ptr,0
+ subu.cio r6,r10,r8
+ st r6,res_ptr,0
+
+Lret2: addu.ci r2,r0,r0 ; return carry-out from most sign. limb
+ jmp.n r1
+ xor r2,r2,1
diff --git a/sysdeps/m88k/mul_1.s b/sysdeps/m88k/mul_1.s
index 35c238d..6b8492c 100644
--- a/sysdeps/m88k/mul_1.s
+++ b/sysdeps/m88k/mul_1.s
@@ -1,7 +1,7 @@
; mc88100 __mpn_mul_1 -- Multiply a limb vector with a single limb and
; store the product in a second limb vector.
-; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
@@ -55,14 +55,14 @@ ___mpn_mul_1:
; Make S1_PTR and RES_PTR point at the end of their blocks
; and negate SIZE.
lda r3,r3[r4]
- lda r6,r2[r4] ; RES_PTR in r6 since r2 is retval
+ lda r6,r2[r4] ; RES_PTR in r6 since r2 is retval
subu r4,r0,r4
- addu.co r2,r0,r0 ; r2 = cy = 0
+ addu.co r2,r0,r0 ; r2 = cy = 0
ld r9,r3[r4]
- mask r7,r5,0xffff ; r7 = lo(S2_LIMB)
- extu r8,r5,16 ; r8 = hi(S2_LIMB)
- bcnd.n eq0,r8,Lsmall ; jump if (hi(S2_LIMB) == 0)
+ mask r7,r5,0xffff ; r7 = lo(S2_LIMB)
+ extu r8,r5,16 ; r8 = hi(S2_LIMB)
+ bcnd.n eq0,r8,Lsmall ; jump if (hi(S2_LIMB) == 0)
subu r6,r6,4
; General code for any value of S2_LIMB.
@@ -75,28 +75,27 @@ ___mpn_mul_1:
br.n L1
addu r4,r4,1
-Loop:
- ld r9,r3[r4]
+Loop: ld r9,r3[r4]
st r26,r6[r4]
-; bcnd ne0,r0,0 ; bubble
+; bcnd ne0,r0,0 ; bubble
addu r4,r4,1
-L1: mul r26,r9,r5 ; low word of product mul_1 WB ld
- mask r12,r9,0xffff ; r12 = lo(s1_limb) mask_1
- mul r11,r12,r7 ; r11 = prod_0 mul_2 WB mask_1
- mul r10,r12,r8 ; r10 = prod_1a mul_3
- extu r13,r9,16 ; r13 = hi(s1_limb) extu_1 WB mul_1
- mul r12,r13,r7 ; r12 = prod_1b mul_4 WB extu_1
- mul r25,r13,r8 ; r25 = prod_2 mul_5 WB mul_2
- extu r11,r11,16 ; r11 = hi(prod_0) extu_2 WB mul_3
- addu r10,r10,r11 ; addu_1 WB extu_2
-; bcnd ne0,r0,0 ; bubble WB addu_1
- addu.co r10,r10,r12 ; WB mul_4
- mask.u r10,r10,0xffff ; move the 16 most significant bits...
- addu.ci r10,r10,r0 ; ...to the low half of the word...
- rot r10,r10,16 ; ...and put carry in pos 16.
- addu.co r26,r26,r2 ; add old carry limb
+L1: mul r26,r9,r5 ; low word of product mul_1 WB ld
+ mask r12,r9,0xffff ; r12 = lo(s1_limb) mask_1
+ mul r11,r12,r7 ; r11 = prod_0 mul_2 WB mask_1
+ mul r10,r12,r8 ; r10 = prod_1a mul_3
+ extu r13,r9,16 ; r13 = hi(s1_limb) extu_1 WB mul_1
+ mul r12,r13,r7 ; r12 = prod_1b mul_4 WB extu_1
+ mul r25,r13,r8 ; r25 = prod_2 mul_5 WB mul_2
+ extu r11,r11,16 ; r11 = hi(prod_0) extu_2 WB mul_3
+ addu r10,r10,r11 ; addu_1 WB extu_2
+; bcnd ne0,r0,0 ; bubble WB addu_1
+ addu.co r10,r10,r12 ; WB mul_4
+ mask.u r10,r10,0xffff ; move the 16 most significant bits...
+ addu.ci r10,r10,r0 ; ...to the low half of the word...
+ rot r10,r10,16 ; ...and put carry in pos 16.
+ addu.co r26,r26,r2 ; add old carry limb
bcnd.n ne0,r4,Loop
- addu.ci r2,r25,r10 ; compute new carry limb
+ addu.ci r2,r25,r10 ; compute new carry limb
st r26,r6[r4]
ld.d r25,r31,8
@@ -109,20 +108,19 @@ Lsmall:
br.n SL1
addu r4,r4,1
-SLoop:
- ld r9,r3[r4] ;
- st r8,r6[r4] ;
- addu r4,r4,1 ;
-SL1: mul r8,r9,r5 ; low word of product
- mask r12,r9,0xffff ; r12 = lo(s1_limb)
- extu r13,r9,16 ; r13 = hi(s1_limb)
- mul r11,r12,r7 ; r11 = prod_0
- mul r12,r13,r7 ; r12 = prod_1b
- addu.cio r8,r8,r2 ; add old carry limb
- extu r10,r11,16 ; r11 = hi(prod_0)
- addu r10,r10,r12 ;
+SLoop: ld r9,r3[r4] ;
+ st r8,r6[r4] ;
+ addu r4,r4,1 ;
+SL1: mul r8,r9,r5 ; low word of product
+ mask r12,r9,0xffff ; r12 = lo(s1_limb)
+ extu r13,r9,16 ; r13 = hi(s1_limb)
+ mul r11,r12,r7 ; r11 = prod_0
+ mul r12,r13,r7 ; r12 = prod_1b
+ addu.cio r8,r8,r2 ; add old carry limb
+ extu r10,r11,16 ; r11 = hi(prod_0)
+ addu r10,r10,r12 ;
bcnd.n ne0,r4,SLoop
- extu r2,r10,16 ; r2 = new carry limb
+ extu r2,r10,16 ; r2 = new carry limb
jmp.n r1
st r8,r6[r4]
diff --git a/sysdeps/m88k/sub_n.s b/sysdeps/m88k/sub_n.s
index 3963cd5..cd0b791 100644
--- a/sysdeps/m88k/sub_n.s
+++ b/sysdeps/m88k/sub_n.s
@@ -1,7 +1,7 @@
; mc88100 __mpn_sub -- Subtract two limb vectors of the same length > 0 and
; store difference in a third limb vector.
-; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+; Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
@@ -41,9 +41,10 @@ ___mpn_sub_n:
extu r10,r5,3
ld r7,r4,0 ; read first limb from s2_ptr
- subu.co r5,r0,r5 ; (clear carry as side effect)
+ subu r5,r0,r5
mak r5,r5,3<4>
- bcnd eq0,r5,Lzero
+ bcnd.n eq0,r5,Lzero
+ subu.co r0,r0,r0 ; initialize carry
or r12,r0,lo16(Lbase)
or.u r12,r12,hi16(Lbase)
diff --git a/sysdeps/mips/addmul_1.s b/sysdeps/mips/addmul_1.s
index abc2fb8..917af1b 100644
--- a/sysdeps/mips/addmul_1.s
+++ b/sysdeps/mips/addmul_1.s
@@ -1,7 +1,7 @@
# MIPS __mpn_addmul_1 -- Multiply a limb vector with a single limb and
# add the product to a second limb vector.
- # Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
@@ -63,7 +63,7 @@ Loop: lw $10,0($4)
addu $2,$2,$10
sw $3,0($4)
addiu $4,$4,4
- bne $6,$0,Loop # should be "bnel"
+ bne $6,$0,Loop
addu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1
diff --git a/sysdeps/mips/mips3/addmul_1.s b/sysdeps/mips/mips3/addmul_1.s
index 7af0172..7dbc9ad 100644
--- a/sysdeps/mips/mips3/addmul_1.s
+++ b/sysdeps/mips/mips3/addmul_1.s
@@ -63,7 +63,7 @@ Loop: ld $10,0($4)
daddu $2,$2,$10
sd $3,0($4)
daddiu $4,$4,8
- bne $6,$0,Loop # should be "bnel"
+ bne $6,$0,Loop
daddu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1
diff --git a/sysdeps/mips/mips3/mul_1.s b/sysdeps/mips/mips3/mul_1.s
index 87954e5..8376a02 100644
--- a/sysdeps/mips/mips3/mul_1.s
+++ b/sysdeps/mips/mips3/mul_1.s
@@ -59,7 +59,7 @@ Loop: mflo $10
sltu $2,$10,$2 # carry from previous addition -> $2
sd $10,0($4)
daddiu $4,$4,8
- bne $6,$0,Loop # should be "bnel"
+ bne $6,$0,Loop
daddu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1
diff --git a/sysdeps/mips/mips3/submul_1.s b/sysdeps/mips/mips3/submul_1.s
index f28c6a5..f041f6c 100644
--- a/sysdeps/mips/mips3/submul_1.s
+++ b/sysdeps/mips/mips3/submul_1.s
@@ -63,7 +63,7 @@ Loop: ld $10,0($4)
daddu $2,$2,$10
sd $3,0($4)
daddiu $4,$4,8
- bne $6,$0,Loop # should be "bnel"
+ bne $6,$0,Loop
daddu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1
diff --git a/sysdeps/mips/mul_1.s b/sysdeps/mips/mul_1.s
index 01327e2..6f5324c 100644
--- a/sysdeps/mips/mul_1.s
+++ b/sysdeps/mips/mul_1.s
@@ -1,7 +1,7 @@
# MIPS __mpn_mul_1 -- Multiply a limb vector with a single limb and
# store the product in a second limb vector.
- # Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
@@ -59,7 +59,7 @@ Loop: mflo $10
sltu $2,$10,$2 # carry from previous addition -> $2
sw $10,0($4)
addiu $4,$4,4
- bne $6,$0,Loop # should be "bnel"
+ bne $6,$0,Loop
addu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1
diff --git a/sysdeps/mips/submul_1.s b/sysdeps/mips/submul_1.s
index 616dd1b..a78072a 100644
--- a/sysdeps/mips/submul_1.s
+++ b/sysdeps/mips/submul_1.s
@@ -1,7 +1,7 @@
# MIPS __mpn_submul_1 -- Multiply a limb vector with a single limb and
# subtract the product from a second limb vector.
- # Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
@@ -63,7 +63,7 @@ Loop: lw $10,0($4)
addu $2,$2,$10
sw $3,0($4)
addiu $4,$4,4
- bne $6,$0,Loop # should be "bnel"
+ bne $6,$0,Loop
addu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1
diff --git a/sysdeps/rs6000/add_n.s b/sysdeps/rs6000/add_n.s
index 7090cf1..e2536d5 100644
--- a/sysdeps/rs6000/add_n.s
+++ b/sysdeps/rs6000/add_n.s
@@ -1,6 +1,6 @@
# IBM POWER __mpn_add_n -- Add two limb vectors of equal, non-zero length.
-# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
diff --git a/sysdeps/rs6000/sub_n.s b/sysdeps/rs6000/sub_n.s
index 40fe7d6..c57675b 100644
--- a/sysdeps/rs6000/sub_n.s
+++ b/sysdeps/rs6000/sub_n.s
@@ -1,7 +1,7 @@
# IBM POWER __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
# store difference in a third limb vector.
-# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
diff --git a/sysdeps/sparc/add_n.S b/sysdeps/sparc/add_n.S
index 80c3b99..49b31fc 100644
--- a/sysdeps/sparc/add_n.S
+++ b/sysdeps/sparc/add_n.S
@@ -1,7 +1,7 @@
-! sparc __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store
! sum in a third limb vector.
-! Copyright (C) 1995 Free Software Foundation, Inc.
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.
@@ -32,18 +32,14 @@
.align 4
.global C_SYMBOL_NAME(__mpn_add_n)
C_SYMBOL_NAME(__mpn_add_n):
- cmp size,8
- mov 0,%o4 ! clear cy-save register
- blt,a Ltriv
- addcc size,-2,size
xor s2_ptr,res_ptr,%g1
andcc %g1,4,%g0
bne L1 ! branch if alignment differs
nop
+! ** V1a **
L0: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
- beq L_v1 ! if no, branch
+ be L_v1 ! if no, branch
nop
-! ** V1a **
/* Add least significant limb separately to align res_ptr and s2_ptr */
ld [s1_ptr],%g4
add s1_ptr,4,s1_ptr
@@ -53,12 +49,15 @@ L0: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
addcc %g4,%g2,%o4
st %o4,[res_ptr]
add res_ptr,4,res_ptr
+L_v1: addx %g0,%g0,%o4 ! save cy in register
+ cmp size,2 ! if size < 2 ...
+ bl Lend2 ! ... branch to tail code
+ subcc %g0,%o4,%g0 ! restore cy
-L_v1: ld [s1_ptr+0],%g4
+ ld [s1_ptr+0],%g4
+ addcc size,-10,size
ld [s1_ptr+4],%g1
ldd [s2_ptr+0],%g2
- addx %g0,%g0,%o4 ! save cy in register
- addcc size,-10,size
blt Lfin1
subcc %g0,%o4,%g0 ! restore cy
/* Add blocks of 8 limbs until less than 8 limbs remain */
@@ -98,7 +97,7 @@ Lfin1: addcc size,8-2,size
blt Lend1
subcc %g0,%o4,%g0 ! restore cy
/* Add blocks of 2 limbs until less than 2 limbs remain */
-Loop1b: addxcc %g4,%g2,%o4
+Loope1: addxcc %g4,%g2,%o4
ld [s1_ptr+8],%g4
addxcc %g1,%g3,%o5
ld [s1_ptr+12],%g1
@@ -109,7 +108,7 @@ Loop1b: addxcc %g4,%g2,%o4
add s1_ptr,8,s1_ptr
add s2_ptr,8,s2_ptr
add res_ptr,8,res_ptr
- bge Loop1b
+ bge Loope1
subcc %g0,%o4,%g0 ! restore cy
Lend1: addxcc %g4,%g2,%o4
addxcc %g1,%g3,%o5
@@ -144,10 +143,13 @@ L1: xor s1_ptr,res_ptr,%g1
things can be aligned (that we care about) we now know that the alignment
of s1_ptr and s2_ptr are the same. */
-L2: andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0
- beq L_v2 ! if no, branch
+L2: cmp size,1
+ be Ljone
nop
-/* Add least significant limb separately to align res_ptr and s2_ptr */
+ andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0
+ be L_v2 ! if no, branch
+ nop
+/* Add least significant limb separately to align s1_ptr and s2_ptr */
ld [s1_ptr],%g4
add s1_ptr,4,s1_ptr
ld [s2_ptr],%g2
@@ -195,9 +197,9 @@ Loop2: ldd [s1_ptr+0],%g2
subcc %g0,%o4,%g0 ! restore cy
Lfin2: addcc size,8-2,size
-Ltriv: blt Lend2
+ blt Lend2
subcc %g0,%o4,%g0 ! restore cy
-Loop2b: ldd [s1_ptr+0],%g2
+Loope2: ldd [s1_ptr+0],%g2
ldd [s2_ptr+0],%o4
addxcc %g2,%o4,%g2
st %g2,[res_ptr+0]
@@ -208,13 +210,13 @@ Loop2b: ldd [s1_ptr+0],%g2
add s1_ptr,8,s1_ptr
add s2_ptr,8,s2_ptr
add res_ptr,8,res_ptr
- bge Loop2b
+ bge Loope2
subcc %g0,%o4,%g0 ! restore cy
Lend2: andcc size,1,%g0
be Lret2
subcc %g0,%o4,%g0 ! restore cy
/* Add last limb */
- ld [s1_ptr],%g4
+Ljone: ld [s1_ptr],%g4
ld [s2_ptr],%g2
addxcc %g4,%g2,%o4
st %o4,[res_ptr]
diff --git a/sysdeps/sparc/lshift.S b/sysdeps/sparc/lshift.S
index 497272a..6844fa2 100644
--- a/sysdeps/sparc/lshift.S
+++ b/sysdeps/sparc/lshift.S
@@ -1,6 +1,6 @@
! sparc __mpn_lshift --
-! Copyright (C) 1995 Free Software Foundation, Inc.
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.
@@ -39,7 +39,7 @@ C_SYMBOL_NAME(__mpn_lshift):
add %o2,-1,%o2
andcc %o2,4-1,%g4 ! number of limbs in first loop
srl %g2,%o5,%g1 ! compute function result
- beq L0 ! if multiple of 4 limbs, skip first loop
+ be L0 ! if multiple of 4 limbs, skip first loop
st %g1,[%sp+80]
sub %o2,%g4,%o2 ! adjust count for main loop
@@ -56,7 +56,7 @@ Loop0: ld [%o1-8],%g3
st %o4,[%o0+0]
L0: tst %o2
- beq Lend
+ be Lend
nop
Loop: ld [%o1-8],%g3
diff --git a/sysdeps/sparc/rshift.S b/sysdeps/sparc/rshift.S
index 3428cfe..5a47926 100644
--- a/sysdeps/sparc/rshift.S
+++ b/sysdeps/sparc/rshift.S
@@ -1,6 +1,6 @@
! sparc __mpn_rshift --
-! Copyright (C) 1995 Free Software Foundation, Inc.
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.
@@ -36,7 +36,7 @@ C_SYMBOL_NAME(__mpn_rshift):
add %o2,-1,%o2
andcc %o2,4-1,%g4 ! number of limbs in first loop
sll %g2,%o5,%g1 ! compute function result
- beq L0 ! if multiple of 4 limbs, skip first loop
+ be L0 ! if multiple of 4 limbs, skip first loop
st %g1,[%sp+80]
sub %o2,%g4,%o2 ! adjust count for main loop
@@ -53,7 +53,7 @@ Loop0: ld [%o1+4],%g3
st %o4,[%o0-4]
L0: tst %o2
- beq Lend
+ be Lend
nop
Loop: ld [%o1+4],%g3
diff --git a/sysdeps/sparc/sparc64/add_n.s b/sysdeps/sparc/sparc64/add_n.s
new file mode 100644
index 0000000..104a89e
--- /dev/null
+++ b/sysdeps/sparc/sparc64/add_n.s
@@ -0,0 +1,57 @@
+! SPARC v9 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+! sum in a third limb vector.
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! s1_ptr %o1
+! s2_ptr %o2
+! size %o3
+
+.section ".text"
+ .align 4
+ .global __mpn_add_n
+ .type __mpn_add_n,#function
+ .proc 04
+__mpn_add_n:
+ sub %g0,%o3,%g3
+ sllx %o3,3,%g1
+ add %o1,%g1,%o1 ! make s1_ptr point at end
+ add %o2,%g1,%o2 ! make s2_ptr point at end
+ add %o0,%g1,%o0 ! make res_ptr point at end
+ mov 0,%o4 ! clear carry variable
+ sllx %g3,3,%o5 ! compute initial address index
+
+.Loop: ldx [%o2+%o5],%g1 ! load s2 limb
+ add %g3,1,%g3 ! increment loop count
+ ldx [%o1+%o5],%g2 ! load s1 limb
+ addcc %g1,%o4,%g1 ! add s2 limb and carry variable
+ movcc %xcc,0,%o4 ! if carry-out, o4 was 1; clear it
+ addcc %g1,%g2,%g1 ! add s1 limb to sum
+ stx %g1,[%o0+%o5] ! store result
+ add %o5,8,%o5 ! increment address index
+ brnz,pt %g3,.Loop
+ movcs %xcc,1,%o4 ! if s1 add gave carry, record it
+
+ retl
+ mov %o4,%o0
+.LLfe1:
+ .size __mpn_add_n,.LLfe1-__mpn_add_n
diff --git a/sysdeps/sparc/sparc64/addmul_1.s b/sysdeps/sparc/sparc64/addmul_1.s
new file mode 100644
index 0000000..ef013ee
--- /dev/null
+++ b/sysdeps/sparc/sparc64/addmul_1.s
@@ -0,0 +1,88 @@
+! SPARC v9 __mpn_addmul_1 -- Multiply a limb vector with a single limb and
+! add the product to a second limb vector.
+
+! Copyright (C) 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+.section ".text"
+ .align 4
+ .global __mpn_addmul_1
+ .type __mpn_addmul_1,#function
+ .proc 016
+__mpn_addmul_1:
+ !#PROLOGUE# 0
+ save %sp,-160,%sp
+ !#PROLOGUE# 1
+ sub %g0,%i2,%o7
+ sllx %o7,3,%g5
+ sub %i1,%g5,%o3
+ sub %i0,%g5,%o4
+ mov 0,%o0 ! zero cy_limb
+
+ srl %i3,0,%o1 ! extract low 32 bits of s2_limb
+ srlx %i3,32,%i3 ! extract high 32 bits of s2_limb
+ mov 1,%o2
+ sllx %o2,32,%o2 ! o2 = 0x100000000
+
+ ! hi !
+ ! mid-1 !
+ ! mid-2 !
+ ! lo !
+.Loop:
+ sllx %o7,3,%g1
+ ldx [%o3+%g1],%g5
+ srl %g5,0,%i0 ! zero hi bits
+ srlx %g5,32,%g5
+ mulx %o1,%i0,%i4 ! lo product
+ mulx %i3,%i0,%i1 ! mid-1 product
+ mulx %o1,%g5,%l2 ! mid-2 product
+ mulx %i3,%g5,%i5 ! hi product
+ srlx %i4,32,%i0 ! extract high 32 bits of lo product...
+ add %i1,%i0,%i1 ! ...and add it to the mid-1 product
+ addcc %i1,%l2,%i1 ! add mid products
+ mov 0,%l0 ! we need the carry from that add...
+ movcs %xcc,%o2,%l0 ! ...compute it and...
+ add %i5,%l0,%i5 ! ...add to bit 32 of the hi product
+ sllx %i1,32,%i0 ! align low bits of mid product
+ srl %i4,0,%g5 ! zero high 32 bits of lo product
+ add %i0,%g5,%i0 ! combine into low 64 bits of result
+ srlx %i1,32,%i1 ! extract high bits of mid product...
+ add %i5,%i1,%i1 ! ...and add them to the high result
+ addcc %i0,%o0,%i0 ! add cy_limb to low 64 bits of result
+ mov 0,%g5
+ movcs %xcc,1,%g5
+ add %o7,1,%o7
+ ldx [%o4+%g1],%l1
+ addcc %l1,%i0,%i0
+ movcs %xcc,1,%g5
+ stx %i0,[%o4+%g1]
+ brnz %o7,.Loop
+ add %i1,%g5,%o0 ! compute new cy_limb
+
+ mov %o0,%i0
+ ret
+ restore
+.LLfe1:
+ .size __mpn_addmul_1,.LLfe1-__mpn_addmul_1
diff --git a/sysdeps/sparc/sparc64/gmp-mparam.h b/sysdeps/sparc/sparc64/gmp-mparam.h
new file mode 100644
index 0000000..05c893f
--- /dev/null
+++ b/sysdeps/sparc/sparc64/gmp-mparam.h
@@ -0,0 +1,26 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
diff --git a/sysdeps/sparc/sparc64/lshift.s b/sysdeps/sparc/sparc64/lshift.s
new file mode 100644
index 0000000..bd7fa01
--- /dev/null
+++ b/sysdeps/sparc/sparc64/lshift.s
@@ -0,0 +1,95 @@
+! SPARC v9 __mpn_lshift --
+
+! Copyright (C) 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! src_ptr %o1
+! size %o2
+! cnt %o3
+
+.section ".text"
+ .align 4
+ .global __mpn_lshift
+ .type __mpn_lshift,#function
+ .proc 04
+__mpn_lshift:
+ sllx %o2,3,%g1
+ add %o1,%g1,%o1 ! make %o1 point at end of src
+ ldx [%o1-8],%g2 ! load first limb
+ sub %g0,%o3,%o5 ! negate shift count
+ add %o0,%g1,%o0 ! make %o0 point at end of res
+ add %o2,-1,%o2
+ and %o2,4-1,%g4 ! number of limbs in first loop
+ srlx %g2,%o5,%g1 ! compute function result
+ brz,pn %g4,.L0 ! if multiple of 4 limbs, skip first loop
+ stx %g1,[%sp+80]
+
+ sub %o2,%g4,%o2 ! adjust count for main loop
+
+.Loop0: ldx [%o1-16],%g3
+ add %o0,-8,%o0
+ add %o1,-8,%o1
+ add %g4,-1,%g4
+ sllx %g2,%o3,%o4
+ srlx %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ brnz,pt %g4,.Loop0
+ stx %o4,[%o0+0]
+
+.L0: brz,pn %o2,.Lend
+ nop
+
+.Loop: ldx [%o1-16],%g3
+ add %o0,-32,%o0
+ add %o2,-4,%o2
+ sllx %g2,%o3,%o4
+ srlx %g3,%o5,%g1
+
+ ldx [%o1-24],%g2
+ sllx %g3,%o3,%g4
+ or %o4,%g1,%o4
+ stx %o4,[%o0+24]
+ srlx %g2,%o5,%g1
+
+ ldx [%o1-32],%g3
+ sllx %g2,%o3,%o4
+ or %g4,%g1,%g4
+ stx %g4,[%o0+16]
+ srlx %g3,%o5,%g1
+
+ ldx [%o1-40],%g2
+ sllx %g3,%o3,%g4
+ or %o4,%g1,%o4
+ stx %o4,[%o0+8]
+ srlx %g2,%o5,%g1
+
+ add %o1,-32,%o1
+ or %g4,%g1,%g4
+ brnz,pt %o2,.Loop
+ stx %g4,[%o0+0]
+
+.Lend: sllx %g2,%o3,%g2
+ stx %g2,[%o0-8]
+ retl
+ ldx [%sp+80],%o0
+.LLfe1:
+ .size __mpn_lshift,.LLfe1-__mpn_lshift
diff --git a/sysdeps/sparc/sparc64/mul_1.s b/sysdeps/sparc/sparc64/mul_1.s
new file mode 100644
index 0000000..41be370
--- /dev/null
+++ b/sysdeps/sparc/sparc64/mul_1.s
@@ -0,0 +1,85 @@
+! SPARC v9 __mpn_mul_1 -- Multiply a limb vector with a single limb and
+! store the product in a second limb vector.
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+.section ".text"
+ .align 4
+ .global __mpn_mul_1
+ .type __mpn_mul_1,#function
+ .proc 016
+__mpn_mul_1:
+ !#PROLOGUE# 0
+ save %sp,-160,%sp
+ !#PROLOGUE# 1
+ sub %g0,%i2,%o7
+ sllx %o7,3,%g5
+ sub %i1,%g5,%o3
+ sub %i0,%g5,%o4
+ mov 0,%o0 ! zero cy_limb
+
+ srl %i3,0,%o1 ! extract low 32 bits of s2_limb
+ srlx %i3,32,%i3 ! extract high 32 bits of s2_limb
+ mov 1,%o2
+ sllx %o2,32,%o2 ! o2 = 0x100000000
+
+ ! hi !
+ ! mid-1 !
+ ! mid-2 !
+ ! lo !
+.Loop:
+ sllx %o7,3,%g1
+ ldx [%o3+%g1],%g5
+ srl %g5,0,%i0 ! zero hi bits
+ srlx %g5,32,%g5
+ mulx %o1,%i0,%i4 ! lo product
+ mulx %i3,%i0,%i1 ! mid-1 product
+ mulx %o1,%g5,%l2 ! mid-2 product
+ mulx %i3,%g5,%i5 ! hi product
+ srlx %i4,32,%i0 ! extract high 32 bits of lo product...
+ add %i1,%i0,%i1 ! ...and add it to the mid-1 product
+ addcc %i1,%l2,%i1 ! add mid products
+ mov 0,%l0 ! we need the carry from that add...
+ movcs %xcc,%o2,%l0 ! ...compute it and...
+ add %i5,%l0,%i5 ! ...add to bit 32 of the hi product
+ sllx %i1,32,%i0 ! align low bits of mid product
+ srl %i4,0,%g5 ! zero high 32 bits of lo product
+ add %i0,%g5,%i0 ! combine into low 64 bits of result
+ srlx %i1,32,%i1 ! extract high bits of mid product...
+ add %i5,%i1,%i1 ! ...and add them to the high result
+ addcc %i0,%o0,%i0 ! add cy_limb to low 64 bits of result
+ mov 0,%g5
+ movcs %xcc,1,%g5
+ add %o7,1,%o7
+ stx %i0,[%o4+%g1]
+ brnz %o7,.Loop
+ add %i1,%g5,%o0 ! compute new cy_limb
+
+ mov %o0,%i0
+ ret
+ restore
+.LLfe1:
+ .size __mpn_mul_1,.LLfe1-__mpn_mul_1
diff --git a/sysdeps/sparc/sparc64/rshift.s b/sysdeps/sparc/sparc64/rshift.s
new file mode 100644
index 0000000..971deec
--- /dev/null
+++ b/sysdeps/sparc/sparc64/rshift.s
@@ -0,0 +1,92 @@
+! SPARC v9 __mpn_rshift --
+
+! Copyright (C) 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! src_ptr %o1
+! size %o2
+! cnt %o3
+
+.section ".text"
+ .align 4
+ .global __mpn_rshift
+ .type __mpn_rshift,#function
+ .proc 04
+__mpn_rshift:
+ ldx [%o1],%g2 ! load first limb
+ sub %g0,%o3,%o5 ! negate shift count
+ add %o2,-1,%o2
+ and %o2,4-1,%g4 ! number of limbs in first loop
+ sllx %g2,%o5,%g1 ! compute function result
+ brz,pn %g4,.L0 ! if multiple of 4 limbs, skip first loop
+ stx %g1,[%sp+80]
+
+ sub %o2,%g4,%o2 ! adjust count for main loop
+
+.Loop0: ldx [%o1+8],%g3
+ add %o0,8,%o0
+ add %o1,8,%o1
+ add %g4,-1,%g4
+ srlx %g2,%o3,%o4
+ sllx %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ brnz,pt %g4,.Loop0
+ stx %o4,[%o0-8]
+
+.L0: brz,pn %o2,.Lend
+ nop
+
+.Loop: ldx [%o1+8],%g3
+ add %o0,32,%o0
+ add %o2,-4,%o2
+ srlx %g2,%o3,%o4
+ sllx %g3,%o5,%g1
+
+ ldx [%o1+16],%g2
+ srlx %g3,%o3,%g4
+ or %o4,%g1,%o4
+ stx %o4,[%o0-32]
+ sllx %g2,%o5,%g1
+
+ ldx [%o1+24],%g3
+ srlx %g2,%o3,%o4
+ or %g4,%g1,%g4
+ stx %g4,[%o0-24]
+ sllx %g3,%o5,%g1
+
+ ldx [%o1+32],%g2
+ srlx %g3,%o3,%g4
+ or %o4,%g1,%o4
+ stx %o4,[%o0-16]
+ sllx %g2,%o5,%g1
+
+ add %o1,32,%o1
+ or %g4,%g1,%g4
+ brnz %o2,.Loop
+ stx %g4,[%o0-8]
+
+.Lend: srlx %g2,%o3,%g2
+ stx %g2,[%o0-0]
+ retl
+ ldx [%sp+80],%o0
+.LLfe1:
+ .size __mpn_rshift,.LLfe1-__mpn_rshift
diff --git a/sysdeps/sparc/sparc64/sub_n.s b/sysdeps/sparc/sparc64/sub_n.s
new file mode 100644
index 0000000..7099bf4
--- /dev/null
+++ b/sysdeps/sparc/sparc64/sub_n.s
@@ -0,0 +1,57 @@
+! SPARC v9 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+! store difference in a third limb vector.
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! s1_ptr %o1
+! s2_ptr %o2
+! size %o3
+
+.section ".text"
+ .align 4
+ .global __mpn_sub_n
+ .type __mpn_sub_n,#function
+ .proc 04
+__mpn_sub_n:
+ sub %g0,%o3,%g3
+ sllx %o3,3,%g1
+ add %o1,%g1,%o1 ! make s1_ptr point at end
+ add %o2,%g1,%o2 ! make s2_ptr point at end
+ add %o0,%g1,%o0 ! make res_ptr point at end
+ mov 0,%o4 ! clear carry variable
+ sllx %g3,3,%o5 ! compute initial address index
+
+.Loop: ldx [%o2+%o5],%g1 ! load s2 limb
+ add %g3,1,%g3 ! increment loop count
+ ldx [%o1+%o5],%g2 ! load s1 limb
+ addcc %g1,%o4,%g1 ! add s2 limb and carry variable
+ movcc %xcc,0,%o4 ! if carry-out, o4 was 1; clear it
+ subcc %g1,%g2,%g1 ! subtract s1 limb from sum
+ stx %g1,[%o0+%o5] ! store result
+ add %o5,8,%o5 ! increment address index
+ brnz,pt %g3,.Loop
+ movcs %xcc,1,%o4 ! if s1 subtract gave carry, record it
+
+ retl
+ mov %o4,%o0
+.LLfe1:
+ .size __mpn_sub_n,.LLfe1-__mpn_sub_n
diff --git a/sysdeps/sparc/sparc64/submul_1.s b/sysdeps/sparc/sparc64/submul_1.s
new file mode 100644
index 0000000..f0df38c
--- /dev/null
+++ b/sysdeps/sparc/sparc64/submul_1.s
@@ -0,0 +1,88 @@
+! SPARC v9 __mpn_submul_1 -- Multiply a limb vector with a single limb and
+! subtract the product from a second limb vector.
+
+! Copyright (C) 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+.section ".text"
+ .align 4
+ .global __mpn_submul_1
+ .type __mpn_submul_1,#function
+ .proc 016
+__mpn_submul_1:
+ !#PROLOGUE# 0
+ save %sp,-160,%sp
+ !#PROLOGUE# 1
+ sub %g0,%i2,%o7
+ sllx %o7,3,%g5
+ sub %i1,%g5,%o3
+ sub %i0,%g5,%o4
+ mov 0,%o0 ! zero cy_limb
+
+ srl %i3,0,%o1 ! extract low 32 bits of s2_limb
+ srlx %i3,32,%i3 ! extract high 32 bits of s2_limb
+ mov 1,%o2
+ sllx %o2,32,%o2 ! o2 = 0x100000000
+
+ ! hi !
+ ! mid-1 !
+ ! mid-2 !
+ ! lo !
+.Loop:
+ sllx %o7,3,%g1
+ ldx [%o3+%g1],%g5
+ srl %g5,0,%i0 ! zero hi bits
+ srlx %g5,32,%g5
+ mulx %o1,%i0,%i4 ! lo product
+ mulx %i3,%i0,%i1 ! mid-1 product
+ mulx %o1,%g5,%l2 ! mid-2 product
+ mulx %i3,%g5,%i5 ! hi product
+ srlx %i4,32,%i0 ! extract high 32 bits of lo product...
+ add %i1,%i0,%i1 ! ...and add it to the mid-1 product
+ addcc %i1,%l2,%i1 ! add mid products
+ mov 0,%l0 ! we need the carry from that add...
+ movcs %xcc,%o2,%l0 ! ...compute it and...
+ add %i5,%l0,%i5 ! ...add to bit 32 of the hi product
+ sllx %i1,32,%i0 ! align low bits of mid product
+ srl %i4,0,%g5 ! zero high 32 bits of lo product
+ add %i0,%g5,%i0 ! combine into low 64 bits of result
+ srlx %i1,32,%i1 ! extract high bits of mid product...
+ add %i5,%i1,%i1 ! ...and add them to the high result
+ addcc %i0,%o0,%i0 ! add cy_limb to low 64 bits of result
+ mov 0,%g5
+ movcs %xcc,1,%g5
+ add %o7,1,%o7
+ ldx [%o4+%g1],%l1
+ subcc %l1,%i0,%i0
+ movcs %xcc,1,%g5
+ stx %i0,[%o4+%g1]
+ brnz %o7,.Loop
+ add %i1,%g5,%o0 ! compute new cy_limb
+
+ mov %o0,%i0
+ ret
+ restore
+.LLfe1:
+ .size __mpn_submul_1,.LLfe1-__mpn_submul_1
diff --git a/sysdeps/sparc/sparc8/addmul_1.S b/sysdeps/sparc/sparc8/addmul_1.S
index d1de0c3..1cf5128 100644
--- a/sysdeps/sparc/sparc8/addmul_1.S
+++ b/sysdeps/sparc/sparc8/addmul_1.S
@@ -1,7 +1,7 @@
! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and
! add the result to a second limb vector.
-! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+! Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.
diff --git a/sysdeps/sparc/sparc8/mul_1.S b/sysdeps/sparc/sparc8/mul_1.S
index 42717be..d56394e 100644
--- a/sysdeps/sparc/sparc8/mul_1.S
+++ b/sysdeps/sparc/sparc8/mul_1.S
@@ -1,7 +1,7 @@
! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and
! store the product in a second limb vector.
-! Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+! Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.
diff --git a/sysdeps/sparc/sub_n.S b/sysdeps/sparc/sub_n.S
index 2e217ed..9ff5b7b 100644
--- a/sysdeps/sparc/sub_n.S
+++ b/sysdeps/sparc/sub_n.S
@@ -1,7 +1,7 @@
-! sparc __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
! store difference in a third limb vector.
-! Copyright (C) 1995 Free Software Foundation, Inc.
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.
@@ -38,7 +38,7 @@ C_SYMBOL_NAME(__mpn_sub_n):
nop
! ** V1a **
andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
- beq L_v1 ! if no, branch
+ be L_v1 ! if no, branch
nop
/* Add least significant limb separately to align res_ptr and s2_ptr */
ld [s1_ptr],%g4
@@ -133,9 +133,9 @@ L1: xor s1_ptr,res_ptr,%g1
nop
! ** V1b **
andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
- beq L_v1b ! if no, branch
+ be L_v1b ! if no, branch
nop
-/* Add least significant limb separately to align res_ptr and s2_ptr */
+/* Add least significant limb separately to align res_ptr and s1_ptr */
ld [s2_ptr],%g4
add s2_ptr,4,s2_ptr
ld [s1_ptr],%g2
@@ -232,7 +232,7 @@ L2: cmp size,1
be Ljone
nop
andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0
- beq L_v2 ! if no, branch
+ be L_v2 ! if no, branch
nop
/* Add least significant limb separately to align s1_ptr and s2_ptr */
ld [s1_ptr],%g4
diff --git a/sysdeps/unix/sysv/linux/m68k/profil-counter.h b/sysdeps/unix/sysv/linux/m68k/profil-counter.h
new file mode 100644
index 0000000..4e7b132
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/m68k/profil-counter.h
@@ -0,0 +1,24 @@
+/* Machine-dependent SIGPROF signal handler. Linux/m68k version.
+Copyright (C) 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+static void
+profil_counter (int signr, int code, struct sigcontext *scp)
+{
+ profil_count ((void *) scp->sc_pc);
+}
diff --git a/sysdeps/vax/gmp-mparam.h b/sysdeps/vax/gmp-mparam.h
index 687f12a..ddc308a 100644
--- a/sysdeps/vax/gmp-mparam.h
+++ b/sysdeps/vax/gmp-mparam.h
@@ -1,6 +1,6 @@
/* gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
diff --git a/sysdeps/z8000/mul_1.s b/sysdeps/z8000/mul_1.s
index 2075225..0150e85 100644
--- a/sysdeps/z8000/mul_1.s
+++ b/sysdeps/z8000/mul_1.s
@@ -1,7 +1,7 @@
! Z8000 __mpn_mul_1 -- Multiply a limb vector with a limb and store
! the result in a second limb vector.
-! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
+! Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.