Eliminate conditional final subtraction in Montgomery assembler modules.

author: Andy Polyakov <appro@openssl.org> 2007-06-17 17:10:03 +0000
committer: Andy Polyakov <appro@openssl.org> 2007-06-17 17:10:03 +0000
commit: 7d9cf7c0bbc17a2c00339e660c83ebf1a4f9061a (patch)
tree: 004fe317e4795d576c92746d7e954c2db6a5d3af /crypto/bn/asm/x86_64-mont.pl
parent: 55525742f4c2bf416013fc3a75ec642775d97f80 (diff)
download: openssl-7d9cf7c0bbc17a2c00339e660c83ebf1a4f9061a.zip
openssl-7d9cf7c0bbc17a2c00339e660c83ebf1a4f9061a.tar.gz
openssl-7d9cf7c0bbc17a2c00339e660c83ebf1a4f9061a.tar.bz2
1 files changed, 28 insertions, 27 deletions
diff --git a/crypto/bn/asm/x86_64-mont.pl b/crypto/bn/asm/x86_64-mont.pl
index bc3fa83..6701bf2 100755
--- a/crypto/bn/asm/x86_64-mont.pl
+++ b/crypto/bn/asm/x86_64-mont.pl
@@ -59,6 +59,7 @@ bn_mul_mont:
 	neg	%rax
 	lea	(%rsp,%rax,8),%rsp	# tp=alloca(8*(num+2))
 	and	\$-1024,%rsp		# minimize TLB usage
+
 	mov	%rbp,8(%rsp,$num,8)	# tp[num+1]=%rsp
 	mov	%rdx,$bp		# $bp reassigned, remember?
 
@@ -166,22 +167,38 @@ bn_mul_mont:
 	cmp	$num,$i
 	jl	.Louter
 
-	xor	$i,$i			# i=0
+	mov	-8($np,$num,8),%rax	# np[num-1]
+	lea	(%rsp),$ap		# borrow ap for tp
+	shr	\$62,%rax		# check for boundary condition
+	jz	.Lcopy
+
+	mov	($ap),%rax		# tp[0]
 	lea	-1($num),$j		# j=num-1
-	cmp	\$0,%rdx		# %rdx still holds upmost overflow bit
-	jnz	.Lsub			# CF is cleared by compare with 0
-	mov	(%rsp,$j,8),%rax
-	cmp	($np,$j,8),%rax		# tp[num-1]-np[num-1]
-	jae	.Lsub			# if taken CF was cleared by above cmp
-.align	4
-.Lcopy:
-	mov	(%rsp,$j,8),%rax
+	xor	$i,$i			# i=0 and clear CF!
+	jmp	.Lsub
+.align	16
+.Lsub:	sbb	($np,$i,8),%rax
+	mov	%rax,($rp,$i,8)		# rp[i]=tp[i]-np[i]
+	dec	$j			# doesn't affect CF!
+	mov	8($ap,$i,8),%rax	# tp[i+1]
+	lea	1($i),$i		# i++
+	jge	.Lsub
+
+	sbb	\$0,%rax		# handle upmost overflow bit
+	and	%rax,$ap
+	not	%rax
+	mov	$rp,$np
+	and	%rax,$np
+	lea	-1($num),$j
+	or	$np,$ap			# ap=borrow?tp:rp
+.align	16
+.Lcopy:					# copy or in-place refresh
+	mov	($ap,$j,8),%rax
 	mov	%rax,($rp,$j,8)		# rp[i]=tp[i]
 	mov	$i,(%rsp,$j,8)		# zap temporary vector
 	dec	$j
 	jge	.Lcopy
-.align	4
-.Lexit:
+	
 	mov	8(%rsp,$num,8),%rsp	# restore %rsp
 	mov	\$1,%rax
 	pop	%r15
@@ -191,22 +208,6 @@ bn_mul_mont:
 	pop	%rbp
 	pop	%rbx
 	ret
-
-.align	16
-.Lsub:	mov	(%rsp,$i,8),%rax
-	sbb	($np,$i,8),%rax
-	mov	%rax,($rp,$i,8)		# rp[i]=tp[i]-np[j]
-	lea	1($i),$i		# i++
-	dec	$j			# doesn't affect CF!
-	jge	.Lsub
-	lea	-1($num),$j		# j=num-1
-	sbb	\$0,%rdx
-	jc	.Lcopy			# tp was less than np
-.align	4
-.Lzap:	mov	$i,(%rsp,$j,8)		# zap temporary vector
-	dec	$j
-	jge	.Lzap
-	jmp	.Lexit
 .size	bn_mul_mont,.-bn_mul_mont
 .asciz	"Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
 ___
author	Andy Polyakov <appro@openssl.org>	2007-06-17 17:10:03 +0000
committer	Andy Polyakov <appro@openssl.org>	2007-06-17 17:10:03 +0000
commit	7d9cf7c0bbc17a2c00339e660c83ebf1a4f9061a (patch)
tree	004fe317e4795d576c92746d7e954c2db6a5d3af /crypto/bn/asm/x86_64-mont.pl
parent	55525742f4c2bf416013fc3a75ec642775d97f80 (diff)
download	openssl-7d9cf7c0bbc17a2c00339e660c83ebf1a4f9061a.zip openssl-7d9cf7c0bbc17a2c00339e660c83ebf1a4f9061a.tar.gz openssl-7d9cf7c0bbc17a2c00339e660c83ebf1a4f9061a.tar.bz2