aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2019-02-15 22:16:41 +0100
committerRichard Levitte <levitte@openssl.org>2019-02-16 17:01:15 +0100
commitdb42bb440e76399b89fc8ae04644441a2a5f6821 (patch)
tree0ddc0a16632b60834e44805d2e329b774381f323
parent3405db97e5448c784729b56837f3f8c776a01067 (diff)
downloadopenssl-db42bb440e76399b89fc8ae04644441a2a5f6821.zip
openssl-db42bb440e76399b89fc8ae04644441a2a5f6821.tar.gz
openssl-db42bb440e76399b89fc8ae04644441a2a5f6821.tar.bz2
ARM64 assembly pack: make it Windows-friendly.
"Windows friendliness" means a) unified PIC-ification, unified across all platforms; b) unified commantary delimiter; c) explicit ldur/stur, as Visual Studio assembler can't automatically encode ldr/str as ldur/stur when needed. Reviewed-by: Paul Dale <paul.dale@oracle.com> Reviewed-by: Richard Levitte <levitte@openssl.org> (Merged from https://github.com/openssl/openssl/pull/8256)
-rwxr-xr-xcrypto/aes/asm/vpaes-armv8.pl276
-rwxr-xr-xcrypto/bn/asm/armv8-mont.pl16
-rwxr-xr-xcrypto/chacha/asm/chacha-armv8.pl19
-rw-r--r--crypto/ec/asm/ecp_nistz256-armv8.pl6
-rwxr-xr-xcrypto/perlasm/arm-xlate.pl10
-rwxr-xr-xcrypto/poly1305/asm/poly1305-armv8.pl31
-rw-r--r--crypto/sha/asm/sha1-armv8.pl30
-rw-r--r--crypto/sha/asm/sha512-armv8.pl23
8 files changed, 191 insertions, 220 deletions
diff --git a/crypto/aes/asm/vpaes-armv8.pl b/crypto/aes/asm/vpaes-armv8.pl
index ece9f20..f08ae58 100755
--- a/crypto/aes/asm/vpaes-armv8.pl
+++ b/crypto/aes/asm/vpaes-armv8.pl
@@ -150,12 +150,12 @@ my ($sb1u,$sb1t,$sb2u,$sb2t) = map("v$_.16b",(24..27));
my ($sb9u,$sb9t,$sbdu,$sbdt,$sbbu,$sbbt,$sbeu,$sbet)=map("v$_.16b",(24..31));
$code.=<<___;
-##
-## _aes_preheat
-##
-## Fills register %r10 -> .aes_consts (so you can -fPIC)
-## and %xmm9-%xmm15 as specified below.
-##
+//
+// _aes_preheat
+//
+// Fills register %r10 -> .aes_consts (so you can -fPIC)
+// and %xmm9-%xmm15 as specified below.
+//
.type _vpaes_encrypt_preheat,%function
.align 4
_vpaes_encrypt_preheat:
@@ -167,21 +167,21 @@ _vpaes_encrypt_preheat:
ret
.size _vpaes_encrypt_preheat,.-_vpaes_encrypt_preheat
-##
-## _aes_encrypt_core
-##
-## AES-encrypt %xmm0.
-##
-## Inputs:
-## %xmm0 = input
-## %xmm9-%xmm15 as in _vpaes_preheat
-## (%rdx) = scheduled keys
-##
-## Output in %xmm0
-## Clobbers %xmm1-%xmm5, %r9, %r10, %r11, %rax
-## Preserves %xmm6 - %xmm8 so you get some local vectors
-##
-##
+//
+// _aes_encrypt_core
+//
+// AES-encrypt %xmm0.
+//
+// Inputs:
+// %xmm0 = input
+// %xmm9-%xmm15 as in _vpaes_preheat
+// (%rdx) = scheduled keys
+//
+// Output in %xmm0
+// Clobbers %xmm1-%xmm5, %r9, %r10, %r11, %rax
+// Preserves %xmm6 - %xmm8 so you get some local vectors
+//
+//
.type _vpaes_encrypt_core,%function
.align 4
_vpaes_encrypt_core:
@@ -387,11 +387,11 @@ _vpaes_decrypt_preheat:
ret
.size _vpaes_decrypt_preheat,.-_vpaes_decrypt_preheat
-##
-## Decryption core
-##
-## Same API as encryption core.
-##
+//
+// Decryption core
+//
+// Same API as encryption core.
+//
.type _vpaes_decrypt_core,%function
.align 4
_vpaes_decrypt_core:
@@ -643,11 +643,11 @@ my ($inp,$bits,$out,$dir)=("x0","w1","x2","w3");
my ($invlo,$invhi,$iptlo,$ipthi,$rcon) = map("v$_.16b",(18..21,8));
$code.=<<___;
-########################################################
-## ##
-## AES key schedule ##
-## ##
-########################################################
+////////////////////////////////////////////////////////
+// //
+// AES key schedule //
+// //
+////////////////////////////////////////////////////////
.type _vpaes_key_preheat,%function
.align 4
_vpaes_key_preheat:
@@ -703,14 +703,14 @@ _vpaes_schedule_core:
b.eq .Lschedule_192
// 128: fall though
-##
-## .schedule_128
-##
-## 128-bit specific part of key schedule.
-##
-## This schedule is really simple, because all its parts
-## are accomplished by the subroutines.
-##
+//
+// .schedule_128
+//
+// 128-bit specific part of key schedule.
+//
+// This schedule is really simple, because all its parts
+// are accomplished by the subroutines.
+//
.Lschedule_128:
mov $inp, #10 // mov \$10, %esi
@@ -721,21 +721,21 @@ _vpaes_schedule_core:
bl _vpaes_schedule_mangle // write output
b .Loop_schedule_128
-##
-## .aes_schedule_192
-##
-## 192-bit specific part of key schedule.
-##
-## The main body of this schedule is the same as the 128-bit
-## schedule, but with more smearing. The long, high side is
-## stored in %xmm7 as before, and the short, low side is in
-## the high bits of %xmm6.
-##
-## This schedule is somewhat nastier, however, because each
-## round produces 192 bits of key material, or 1.5 round keys.
-## Therefore, on each cycle we do 2 rounds and produce 3 round
-## keys.
-##
+//
+// .aes_schedule_192
+//
+// 192-bit specific part of key schedule.
+//
+// The main body of this schedule is the same as the 128-bit
+// schedule, but with more smearing. The long, high side is
+// stored in %xmm7 as before, and the short, low side is in
+// the high bits of %xmm6.
+//
+// This schedule is somewhat nastier, however, because each
+// round produces 192 bits of key material, or 1.5 round keys.
+// Therefore, on each cycle we do 2 rounds and produce 3 round
+// keys.
+//
.align 4
.Lschedule_192:
sub $inp, $inp, #8
@@ -759,16 +759,16 @@ _vpaes_schedule_core:
bl _vpaes_schedule_192_smear
b .Loop_schedule_192
-##
-## .aes_schedule_256
-##
-## 256-bit specific part of key schedule.
-##
-## The structure here is very similar to the 128-bit
-## schedule, but with an additional "low side" in
-## %xmm6. The low side's rounds are the same as the
-## high side's, except no rcon and no rotation.
-##
+//
+// .aes_schedule_256
+//
+// 256-bit specific part of key schedule.
+//
+// The structure here is very similar to the 128-bit
+// schedule, but with an additional "low side" in
+// %xmm6. The low side's rounds are the same as the
+// high side's, except no rcon and no rotation.
+//
.align 4
.Lschedule_256:
ld1 {v0.16b}, [$inp] // vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned)
@@ -795,16 +795,16 @@ _vpaes_schedule_core:
b .Loop_schedule_256
-##
-## .aes_schedule_mangle_last
-##
-## Mangler for last round of key schedule
-## Mangles %xmm0
-## when encrypting, outputs out(%xmm0) ^ 63
-## when decrypting, outputs unskew(%xmm0)
-##
-## Always called right before return... jumps to cleanup and exits
-##
+//
+// .aes_schedule_mangle_last
+//
+// Mangler for last round of key schedule
+// Mangles %xmm0
+// when encrypting, outputs out(%xmm0) ^ 63
+// when decrypting, outputs unskew(%xmm0)
+//
+// Always called right before return... jumps to cleanup and exits
+//
.align 4
.Lschedule_mangle_last:
// schedule last round key from xmm0
@@ -838,20 +838,20 @@ _vpaes_schedule_core:
ret
.size _vpaes_schedule_core,.-_vpaes_schedule_core
-##
-## .aes_schedule_192_smear
-##
-## Smear the short, low side in the 192-bit key schedule.
-##
-## Inputs:
-## %xmm7: high side, b a x y
-## %xmm6: low side, d c 0 0
-## %xmm13: 0
-##
-## Outputs:
-## %xmm6: b+c+d b+c 0 0
-## %xmm0: b+c+d b+c b a
-##
+//
+// .aes_schedule_192_smear
+//
+// Smear the short, low side in the 192-bit key schedule.
+//
+// Inputs:
+// %xmm7: high side, b a x y
+// %xmm6: low side, d c 0 0
+// %xmm13: 0
+//
+// Outputs:
+// %xmm6: b+c+d b+c 0 0
+// %xmm0: b+c+d b+c b a
+//
.type _vpaes_schedule_192_smear,%function
.align 4
_vpaes_schedule_192_smear:
@@ -867,24 +867,24 @@ _vpaes_schedule_192_smear:
ret
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
-##
-## .aes_schedule_round
-##
-## Runs one main round of the key schedule on %xmm0, %xmm7
-##
-## Specifically, runs subbytes on the high dword of %xmm0
-## then rotates it by one byte and xors into the low dword of
-## %xmm7.
-##
-## Adds rcon from low byte of %xmm8, then rotates %xmm8 for
-## next rcon.
-##
-## Smears the dwords of %xmm7 by xoring the low into the
-## second low, result into third, result into highest.
-##
-## Returns results in %xmm7 = %xmm0.
-## Clobbers %xmm1-%xmm4, %r11.
-##
+//
+// .aes_schedule_round
+//
+// Runs one main round of the key schedule on %xmm0, %xmm7
+//
+// Specifically, runs subbytes on the high dword of %xmm0
+// then rotates it by one byte and xors into the low dword of
+// %xmm7.
+//
+// Adds rcon from low byte of %xmm8, then rotates %xmm8 for
+// next rcon.
+//
+// Smears the dwords of %xmm7 by xoring the low into the
+// second low, result into third, result into highest.
+//
+// Returns results in %xmm7 = %xmm0.
+// Clobbers %xmm1-%xmm4, %r11.
+//
.type _vpaes_schedule_round,%function
.align 4
_vpaes_schedule_round:
@@ -932,15 +932,15 @@ _vpaes_schedule_low_round:
ret
.size _vpaes_schedule_round,.-_vpaes_schedule_round
-##
-## .aes_schedule_transform
-##
-## Linear-transform %xmm0 according to tables at (%r11)
-##
-## Requires that %xmm9 = 0x0F0F... as in preheat
-## Output in %xmm0
-## Clobbers %xmm1, %xmm2
-##
+//
+// .aes_schedule_transform
+//
+// Linear-transform %xmm0 according to tables at (%r11)
+//
+// Requires that %xmm9 = 0x0F0F... as in preheat
+// Output in %xmm0
+// Clobbers %xmm1, %xmm2
+//
.type _vpaes_schedule_transform,%function
.align 4
_vpaes_schedule_transform:
@@ -954,29 +954,29 @@ _vpaes_schedule_transform:
ret
.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
-##
-## .aes_schedule_mangle
-##
-## Mangle xmm0 from (basis-transformed) standard version
-## to our version.
-##
-## On encrypt,
-## xor with 0x63
-## multiply by circulant 0,1,1,1
-## apply shiftrows transform
-##
-## On decrypt,
-## xor with 0x63
-## multiply by "inverse mixcolumns" circulant E,B,D,9
-## deskew
-## apply shiftrows transform
-##
-##
-## Writes out to (%rdx), and increments or decrements it
-## Keeps track of round number mod 4 in %r8
-## Preserves xmm0
-## Clobbers xmm1-xmm5
-##
+//
+// .aes_schedule_mangle
+//
+// Mangle xmm0 from (basis-transformed) standard version
+// to our version.
+//
+// On encrypt,
+// xor with 0x63
+// multiply by circulant 0,1,1,1
+// apply shiftrows transform
+//
+// On decrypt,
+// xor with 0x63
+// multiply by "inverse mixcolumns" circulant E,B,D,9
+// deskew
+// apply shiftrows transform
+//
+//
+// Writes out to (%rdx), and increments or decrements it
+// Keeps track of round number mod 4 in %r8
+// Preserves xmm0
+// Clobbers xmm1-xmm5
+//
.type _vpaes_schedule_mangle,%function
.align 4
_vpaes_schedule_mangle:
diff --git a/crypto/bn/asm/armv8-mont.pl b/crypto/bn/asm/armv8-mont.pl
index c09c783..c755555 100755
--- a/crypto/bn/asm/armv8-mont.pl
+++ b/crypto/bn/asm/armv8-mont.pl
@@ -197,7 +197,7 @@ bn_mul_mont:
mul $nlo,$nj,$m1 // np[j]*m1
adds $lo1,$lo1,$lo0
umulh $nhi,$nj,$m1
- str $lo1,[$tp,#-16] // tp[j-1]
+ stur $lo1,[$tp,#-16] // tp[j-1]
cbnz $j,.Linner
.Linner_skip:
@@ -253,13 +253,13 @@ bn_mul_mont:
csel $nj,$tj,$aj,lo // did it borrow?
ldr $tj,[$tp],#8
ldr $aj,[$rp],#8
- str xzr,[$tp,#-16] // wipe tp
- str $nj,[$rp,#-16]
+ stur xzr,[$tp,#-16] // wipe tp
+ stur $nj,[$rp,#-16]
cbnz $num,.Lcond_copy
csel $nj,$tj,$aj,lo
- str xzr,[$tp,#-8] // wipe tp
- str $nj,[$rp,#-8]
+ stur xzr,[$tp,#-8] // wipe tp
+ stur $nj,[$rp,#-8]
ldp x19,x20,[x29,#16]
mov sp,x29
@@ -596,7 +596,7 @@ __bn_sqr8x_mont:
ldp $a4,$a5,[$tp,#8*4]
ldp $a6,$a7,[$tp,#8*6]
adds $acc0,$acc0,$a0
- ldr $n0,[$rp,#-8*8]
+ ldur $n0,[$rp,#-8*8]
adcs $acc1,$acc1,$a1
ldp $a0,$a1,[$ap,#8*0]
adcs $acc2,$acc2,$a2
@@ -794,7 +794,7 @@ $code.=<<___;
//adc $carry,xzr,xzr // moved below
cbz $cnt,.Lsqr8x8_post_condition
- ldr $n0,[$tp,#-8*8]
+ ldur $n0,[$tp,#-8*8]
ldp $a0,$a1,[$np,#8*0]
ldp $a2,$a3,[$np,#8*2]
ldp $a4,$a5,[$np,#8*4]
@@ -852,7 +852,7 @@ $code.=<<___;
ldp $a6,$a7,[$tp,#8*6]
cbz $cnt,.Lsqr8x_tail_break
- ldr $n0,[$rp,#-8*8]
+ ldur $n0,[$rp,#-8*8]
adds $acc0,$acc0,$a0
adcs $acc1,$acc1,$a1
ldp $a0,$a1,[$np,#8*0]
diff --git a/crypto/chacha/asm/chacha-armv8.pl b/crypto/chacha/asm/chacha-armv8.pl
index cee787d..56ba1c3 100755
--- a/crypto/chacha/asm/chacha-armv8.pl
+++ b/crypto/chacha/asm/chacha-armv8.pl
@@ -131,12 +131,6 @@ $code.=<<___;
.quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral
.Lone:
.long 1,0,0,0
-.LOPENSSL_armcap_P:
-#ifdef __ILP32__
-.long OPENSSL_armcap_P-.
-#else
-.quad OPENSSL_armcap_P-.
-#endif
.asciz "ChaCha20 for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.globl ChaCha20_ctr32
@@ -144,17 +138,13 @@ $code.=<<___;
.align 5
ChaCha20_ctr32:
cbz $len,.Labort
- adr @x[0],.LOPENSSL_armcap_P
cmp $len,#192
b.lo .Lshort
-#ifdef __ILP32__
- ldrsw @x[1],[@x[0]]
-#else
- ldr @x[1],[@x[0]]
-#endif
- ldr w17,[@x[1],@x[0]]
+
+ adrp x17,OPENSSL_armcap_P
+ ldr w17,[x17,#:lo12:OPENSSL_armcap_P]
tst w17,#ARMV7_NEON
- b.ne ChaCha20_neon
+ b.ne .LChaCha20_neon
.Lshort:
.inst 0xd503233f // paciasp
@@ -380,6 +370,7 @@ $code.=<<___;
.type ChaCha20_neon,%function
.align 5
ChaCha20_neon:
+.LChaCha20_neon:
.inst 0xd503233f // paciasp
stp x29,x30,[sp,#-96]!
add x29,sp,#0
diff --git a/crypto/ec/asm/ecp_nistz256-armv8.pl b/crypto/ec/asm/ecp_nistz256-armv8.pl
index d84f523..8914f1a 100644
--- a/crypto/ec/asm/ecp_nistz256-armv8.pl
+++ b/crypto/ec/asm/ecp_nistz256-armv8.pl
@@ -1654,7 +1654,7 @@ ecp_nistz256_scatter_w5:
ldp x4,x5,[$inp] // X
ldp x6,x7,[$inp,#16]
- str w4,[$out,#64*0-4]
+ stur w4,[$out,#64*0-4]
lsr x4,x4,#32
str w5,[$out,#64*1-4]
lsr x5,x5,#32
@@ -1670,7 +1670,7 @@ ecp_nistz256_scatter_w5:
ldp x4,x5,[$inp,#32] // Y
ldp x6,x7,[$inp,#48]
- str w4,[$out,#64*0-4]
+ stur w4,[$out,#64*0-4]
lsr x4,x4,#32
str w5,[$out,#64*1-4]
lsr x5,x5,#32
@@ -1686,7 +1686,7 @@ ecp_nistz256_scatter_w5:
ldp x4,x5,[$inp,#64] // Z
ldp x6,x7,[$inp,#80]
- str w4,[$out,#64*0-4]
+ stur w4,[$out,#64*0-4]
lsr x4,x4,#32
str w5,[$out,#64*1-4]
lsr x5,x5,#32
diff --git a/crypto/perlasm/arm-xlate.pl b/crypto/perlasm/arm-xlate.pl
index b953f1f..af6f7d3 100755
--- a/crypto/perlasm/arm-xlate.pl
+++ b/crypto/perlasm/arm-xlate.pl
@@ -103,6 +103,12 @@ my $asciz = sub {
{ ""; }
};
+my $adrp = sub {
+ my ($args,$comment) = split(m|\s*//|,shift);
+ "\tadrp\t$args\@PAGE";
+} if ($flavour =~ /ios64/);
+
+
sub range {
my ($r,$sfx,$start,$end) = @_;
@@ -132,6 +138,10 @@ sub expand_line {
$line =~ s/\b(\w+)/$GLOBALS{$1} or $1/ge;
+ if ($flavour =~ /ios64/) {
+ $line =~ s/#:lo12:(\w+)/$1\@PAGEOFF/;
+ }
+
return $line;
}
diff --git a/crypto/poly1305/asm/poly1305-armv8.pl b/crypto/poly1305/asm/poly1305-armv8.pl
index 1aded5a..b7aa7dc 100755
--- a/crypto/poly1305/asm/poly1305-armv8.pl
+++ b/crypto/poly1305/asm/poly1305-armv8.pl
@@ -71,17 +71,12 @@ poly1305_init:
csel x0,xzr,x0,eq
b.eq .Lno_key
-#ifdef __ILP32__
- ldrsw $t1,.LOPENSSL_armcap_P
-#else
- ldr $t1,.LOPENSSL_armcap_P
-#endif
- adr $t0,.LOPENSSL_armcap_P
+ adrp x17,OPENSSL_armcap_P
+ ldr w17,[x17,#:lo12:OPENSSL_armcap_P]
ldp $r0,$r1,[$inp] // load key
mov $s1,#0xfffffffc0fffffff
movk $s1,#0x0fff,lsl#48
- ldr w17,[$t0,$t1]
#ifdef __ARMEB__
rev $r0,$r0 // flip bytes
rev $r1,$r1
@@ -93,10 +88,10 @@ poly1305_init:
tst w17,#ARMV7_NEON
- adr $d0,poly1305_blocks
- adr $r0,poly1305_blocks_neon
- adr $d1,poly1305_emit
- adr $r1,poly1305_emit_neon
+ adr $d0,.Lpoly1305_blocks
+ adr $r0,.Lpoly1305_blocks_neon
+ adr $d1,.Lpoly1305_emit
+ adr $r1,.Lpoly1305_emit_neon
csel $d0,$d0,$r0,eq
csel $d1,$d1,$r1,eq
@@ -115,6 +110,7 @@ poly1305_init:
.type poly1305_blocks,%function
.align 5
poly1305_blocks:
+.Lpoly1305_blocks:
ands $len,$len,#-16
b.eq .Lno_data
@@ -179,6 +175,7 @@ poly1305_blocks:
.type poly1305_emit,%function
.align 5
poly1305_emit:
+.Lpoly1305_emit:
ldp $h0,$h1,[$ctx] // load hash base 2^64
ldr $h2,[$ctx,#16]
ldp $t0,$t1,[$nonce] // load nonce
@@ -285,10 +282,11 @@ poly1305_splat:
.type poly1305_blocks_neon,%function
.align 5
poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
ldr $is_base2_26,[$ctx,#24]
cmp $len,#128
b.hs .Lblocks_neon
- cbz $is_base2_26,poly1305_blocks
+ cbz $is_base2_26,.Lpoly1305_blocks
.Lblocks_neon:
.inst 0xd503233f // paciasp
@@ -431,7 +429,7 @@ poly1305_blocks_neon:
csel $in2,$zeros,$in2,lo
mov x4,#1
- str x4,[$ctx,#-24] // set is_base2_26
+ stur x4,[$ctx,#-24] // set is_base2_26
sub $ctx,$ctx,#48 // restore original $ctx
b .Ldo_neon
@@ -868,6 +866,7 @@ poly1305_blocks_neon:
.type poly1305_emit_neon,%function
.align 5
poly1305_emit_neon:
+.Lpoly1305_emit_neon:
ldr $is_base2_26,[$ctx,#24]
cbz $is_base2_26,poly1305_emit
@@ -920,12 +919,6 @@ poly1305_emit_neon:
.align 5
.Lzeros:
.long 0,0,0,0,0,0,0,0
-.LOPENSSL_armcap_P:
-#ifdef __ILP32__
-.long OPENSSL_armcap_P-.
-#else
-.quad OPENSSL_armcap_P-.
-#endif
.asciz "Poly1305 for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
___
diff --git a/crypto/sha/asm/sha1-armv8.pl b/crypto/sha/asm/sha1-armv8.pl
index a695b2c..7a0cbf5 100644
--- a/crypto/sha/asm/sha1-armv8.pl
+++ b/crypto/sha/asm/sha1-armv8.pl
@@ -58,10 +58,10 @@ $code.=<<___ if ($i<15 && !($i&1));
lsr @Xx[$i+1],@Xx[$i],#32
___
$code.=<<___ if ($i<14 && !($i&1));
- ldr @Xx[$i+2],[$inp,#`($i+2)*4-64`]
+ ldur @Xx[$i+2],[$inp,#`($i+2)*4-64`]
___
$code.=<<___ if ($i<14 && ($i&1));
-#ifdef __ARMEB__
+#ifdef __AARCH64EB__
ror @Xx[$i+1],@Xx[$i+1],#32
#else
rev32 @Xx[$i+1],@Xx[$i+1]
@@ -171,23 +171,19 @@ ___
}
$code.=<<___;
-#include "arm_arch.h"
+#ifndef __KERNEL__
+# include "arm_arch.h"
+.extern OPENSSL_armcap_P
+#endif
.text
-.extern OPENSSL_armcap_P
.globl sha1_block_data_order
.type sha1_block_data_order,%function
.align 6
sha1_block_data_order:
-#ifdef __ILP32__
- ldrsw x16,.LOPENSSL_armcap_P
-#else
- ldr x16,.LOPENSSL_armcap_P
-#endif
- adr x17,.LOPENSSL_armcap_P
- add x16,x16,x17
- ldr w16,[x16]
+ adrp x16,OPENSSL_armcap_P
+ ldr w16,[x16,#:lo12:OPENSSL_armcap_P]
tst w16,#ARMV8_SHA1
b.ne .Lv8_entry
@@ -208,7 +204,7 @@ sha1_block_data_order:
movz $K,#0x7999
sub $num,$num,#1
movk $K,#0x5a82,lsl#16
-#ifdef __ARMEB__
+#ifdef __AARCH64EB__
ror $Xx[0],@Xx[0],#32
#else
rev32 @Xx[0],@Xx[0]
@@ -321,15 +317,11 @@ $code.=<<___;
.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39
.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59
.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79
-.LOPENSSL_armcap_P:
-#ifdef __ILP32__
-.long OPENSSL_armcap_P-.
-#else
-.quad OPENSSL_armcap_P-.
-#endif
.asciz "SHA1 block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
+#if !defined(__KERNELL__) && !defined(_WIN64)
.comm OPENSSL_armcap_P,4,4
+#endif
___
}}}
diff --git a/crypto/sha/asm/sha512-armv8.pl b/crypto/sha/asm/sha512-armv8.pl
index 6540a9f..f7c6721 100644
--- a/crypto/sha/asm/sha512-armv8.pl
+++ b/crypto/sha/asm/sha512-armv8.pl
@@ -188,24 +188,18 @@ ___
$code.=<<___;
#ifndef __KERNEL__
# include "arm_arch.h"
+.extern OPENSSL_armcap_P
#endif
.text
-.extern OPENSSL_armcap_P
.globl $func
.type $func,%function
.align 6
$func:
#ifndef __KERNEL__
-# ifdef __ILP32__
- ldrsw x16,.LOPENSSL_armcap_P
-# else
- ldr x16,.LOPENSSL_armcap_P
-# endif
- adr x17,.LOPENSSL_armcap_P
- add x16,x16,x17
- ldr w16,[x16]
+ adrp x16,OPENSSL_armcap_P
+ ldr w16,[x16,#:lo12:OPENSSL_armcap_P]
___
$code.=<<___ if ($SZ==4);
tst w16,#ARMV8_SHA256
@@ -353,15 +347,6 @@ $code.=<<___ if ($SZ==4);
___
$code.=<<___;
.size .LK$BITS,.-.LK$BITS
-#ifndef __KERNEL__
-.align 3
-.LOPENSSL_armcap_P:
-# ifdef __ILP32__
- .long OPENSSL_armcap_P-.
-# else
- .quad OPENSSL_armcap_P-.
-# endif
-#endif
.asciz "SHA$BITS block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
___
@@ -841,7 +826,7 @@ ___
}
$code.=<<___;
-#ifndef __KERNEL__
+#if !defined(__KERNEL__) && !defined(_WIN64)
.comm OPENSSL_armcap_P,4,4
#endif
___