aboutsummaryrefslogtreecommitdiff
path: root/crypto/bn/asm/s390x.S
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/bn/asm/s390x.S')
-rwxr-xr-xcrypto/bn/asm/s390x.S109
1 files changed, 72 insertions, 37 deletions
diff --git a/crypto/bn/asm/s390x.S b/crypto/bn/asm/s390x.S
index 43fcb79..f5eebe4 100755
--- a/crypto/bn/asm/s390x.S
+++ b/crypto/bn/asm/s390x.S
@@ -18,71 +18,106 @@
.align 4
bn_mul_add_words:
lghi zero,0 // zero = 0
- la %r1,0(%r2) // put rp aside
- lghi %r2,0 // i=0;
+ la %r1,0(%r2) // put rp aside [to give way to]
+ lghi %r2,0 // return value
ltgfr %r4,%r4
bler %r14 // if (len<=0) return 0;
- stmg %r6,%r10,48(%r15)
- lghi %r10,3
- lghi %r8,0 // carry = 0
- nr %r10,%r4 // len%4
+ stmg %r6,%r13,48(%r15)
+ lghi %r2,3
+ lghi %r12,0 // carry = 0
+ slgr %r1,%r3 // rp-=ap
+ nr %r2,%r4 // len%4
sra %r4,2 // cnt=len/4
jz .Loop1_madd // carry is incidentally cleared if branch taken
algr zero,zero // clear carry
-.Loop4_madd:
- lg %r7,0(%r2,%r3) // ap[i]
+ lg %r7,0(%r3) // ap[0]
+ lg %r9,8(%r3) // ap[1]
mlgr %r6,%r5 // *=w
- alcgr %r7,%r8 // +=carry
- alcgr %r6,zero
- alg %r7,0(%r2,%r1) // +=rp[i]
- stg %r7,0(%r2,%r1) // rp[i]=
+ brct %r4,.Loop4_madd
+ j .Loop4_madd_tail
- lg %r9,8(%r2,%r3)
+.Loop4_madd:
mlgr %r8,%r5
+ lg %r11,16(%r3) // ap[i+2]
+ alcgr %r7,%r12 // +=carry
+ alcgr %r6,zero
+ alg %r7,0(%r3,%r1) // +=rp[i]
+ stg %r7,0(%r3,%r1) // rp[i]=
+
+ mlgr %r10,%r5
+ lg %r13,24(%r3)
alcgr %r9,%r6
alcgr %r8,zero
- alg %r9,8(%r2,%r1)
- stg %r9,8(%r2,%r1)
+ alg %r9,8(%r3,%r1)
+ stg %r9,8(%r3,%r1)
+
+ mlgr %r12,%r5
+ lg %r7,32(%r3)
+ alcgr %r11,%r8
+ alcgr %r10,zero
+ alg %r11,16(%r3,%r1)
+ stg %r11,16(%r3,%r1)
- lg %r7,16(%r2,%r3)
mlgr %r6,%r5
- alcgr %r7,%r8
- alcgr %r6,zero
- alg %r7,16(%r2,%r1)
- stg %r7,16(%r2,%r1)
+ lg %r9,40(%r3)
+ alcgr %r13,%r10
+ alcgr %r12,zero
+ alg %r13,24(%r3,%r1)
+ stg %r13,24(%r3,%r1)
- lg %r9,24(%r2,%r3)
+ la %r3,32(%r3) // i+=4
+ brct %r4,.Loop4_madd
+
+.Loop4_madd_tail:
mlgr %r8,%r5
+ lg %r11,16(%r3)
+ alcgr %r7,%r12 // +=carry
+ alcgr %r6,zero
+ alg %r7,0(%r3,%r1) // +=rp[i]
+ stg %r7,0(%r3,%r1) // rp[i]=
+
+ mlgr %r10,%r5
+ lg %r13,24(%r3)
alcgr %r9,%r6
alcgr %r8,zero
- alg %r9,24(%r2,%r1)
- stg %r9,24(%r2,%r1)
+ alg %r9,8(%r3,%r1)
+ stg %r9,8(%r3,%r1)
- la %r2,32(%r2) // i+=4
- brct %r4,.Loop4_madd
+ mlgr %r12,%r5
+ alcgr %r11,%r8
+ alcgr %r10,zero
+ alg %r11,16(%r3,%r1)
+ stg %r11,16(%r3,%r1)
- la %r10,1(%r10) // see if len%4 is zero ...
- brct %r10,.Loop1_madd // without touching condition code:-)
+ alcgr %r13,%r10
+ alcgr %r12,zero
+ alg %r13,24(%r3,%r1)
+ stg %r13,24(%r3,%r1)
+
+ la %r3,32(%r3) // i+=4
+
+ la %r2,1(%r2) // see if len%4 is zero ...
+ brct %r2,.Loop1_madd // without touching condition code:-)
.Lend_madd:
- alcgr %r8,zero // collect carry bit
- lgr %r2,%r8
- lmg %r6,%r10,48(%r15)
+ lgr %r2,zero // return value
+ alcgr %r2,%r12 // collect even carry bit
+ lmg %r6,%r13,48(%r15)
br %r14
.Loop1_madd:
- lg %r7,0(%r2,%r3) // ap[i]
+ lg %r7,0(%r3) // ap[i]
mlgr %r6,%r5 // *=w
- alcgr %r7,%r8 // +=carry
+ alcgr %r7,%r12 // +=carry
alcgr %r6,zero
- alg %r7,0(%r2,%r1) // +=rp[i]
- stg %r7,0(%r2,%r1) // rp[i]=
+ alg %r7,0(%r3,%r1) // +=rp[i]
+ stg %r7,0(%r3,%r1) // rp[i]=
- lgr %r8,%r6
- la %r2,8(%r2) // i++
- brct %r10,.Loop1_madd
+ lgr %r12,%r6
+ la %r3,8(%r3) // i++
+ brct %r2,.Loop1_madd
j .Lend_madd
.size bn_mul_add_words,.-bn_mul_add_words