aboutsummaryrefslogtreecommitdiff
path: root/crypto
diff options
context:
space:
mode:
Diffstat (limited to 'crypto')
-rwxr-xr-xcrypto/aes/asm/aes-586.pl6
-rw-r--r--crypto/aes/asm/aes-c64xplus.pl12
-rw-r--r--crypto/bn/asm/armv4-gf2m.pl10
-rw-r--r--crypto/bn/asm/c64xplus-gf2m.pl12
-rw-r--r--crypto/bn/asm/ia64.S2
-rw-r--r--crypto/bn/asm/s390x-gf2m.pl6
-rw-r--r--crypto/bn/asm/x86-gf2m.pl16
-rw-r--r--crypto/bn/asm/x86_64-gcc.c2
-rw-r--r--crypto/bn/asm/x86_64-gf2m.pl16
-rw-r--r--crypto/modes/asm/ghash-armv4.pl8
-rw-r--r--crypto/modes/asm/ghash-c64xplus.pl4
-rw-r--r--crypto/modes/asm/ghash-sparcv9.pl18
-rw-r--r--crypto/modes/asm/ghash-x86.pl2
-rw-r--r--crypto/modes/asm/ghash-x86_64.pl8
-rwxr-xr-xcrypto/modes/asm/ghashp8-ppc.pl12
-rw-r--r--crypto/modes/asm/ghashv8-armx.pl22
-rw-r--r--crypto/rc4/asm/rc4-586.pl2
-rwxr-xr-xcrypto/rc4/asm/rc4-x86_64.pl2
-rw-r--r--crypto/sha/asm/sha1-586.pl4
-rw-r--r--crypto/sha/asm/sha256-586.pl2
-rw-r--r--crypto/sha/asm/sha512-586.pl2
-rw-r--r--crypto/sparccpuid.S2
-rw-r--r--crypto/whrlpool/asm/wp-mmx.pl2
-rw-r--r--crypto/x509v3/v3_pci.c2
-rw-r--r--crypto/x509v3/v3_pcia.c2
25 files changed, 88 insertions, 88 deletions
diff --git a/crypto/aes/asm/aes-586.pl b/crypto/aes/asm/aes-586.pl
index 1c1e23e..767f204 100755
--- a/crypto/aes/asm/aes-586.pl
+++ b/crypto/aes/asm/aes-586.pl
@@ -45,7 +45,7 @@
# the undertaken effort was that it appeared that in tight IA-32
# register window little-endian flavor could achieve slightly higher
# Instruction Level Parallelism, and it indeed resulted in up to 15%
-# better performance on most recent µ-archs...
+# better performance on most recent µ-archs...
#
# Third version adds AES_cbc_encrypt implementation, which resulted in
# up to 40% performance imrovement of CBC benchmark results. 40% was
@@ -224,7 +224,7 @@ sub _data_word() { my $i; while(defined($i=shift)) { &data_word($i,$i); } }
$speed_limit=512; # chunks smaller than $speed_limit are
# processed with compact routine in CBC mode
$small_footprint=1; # $small_footprint=1 code is ~5% slower [on
- # recent µ-archs], but ~5 times smaller!
+ # recent µ-archs], but ~5 times smaller!
# I favor compact code to minimize cache
# contention and in hope to "collect" 5% back
# in real-life applications...
@@ -565,7 +565,7 @@ sub enctransform()
# Performance is not actually extraordinary in comparison to pure
# x86 code. In particular encrypt performance is virtually the same.
# Decrypt performance on the other hand is 15-20% better on newer
-# µ-archs [but we're thankful for *any* improvement here], and ~50%
+# µ-archs [but we're thankful for *any* improvement here], and ~50%
# better on PIII:-) And additionally on the pros side this code
# eliminates redundant references to stack and thus relieves/
# minimizes the pressure on the memory bus.
diff --git a/crypto/aes/asm/aes-c64xplus.pl b/crypto/aes/asm/aes-c64xplus.pl
index 24b2ba4..5bbc2ac 100644
--- a/crypto/aes/asm/aes-c64xplus.pl
+++ b/crypto/aes/asm/aes-c64xplus.pl
@@ -891,7 +891,7 @@ ret?: ; B0 holds rounds or zero
MVC B0,ILC
|| SUB B0,1,B0
- GMPY4 $K[0],A24,$Kx9[0] ; ·0x09
+ GMPY4 $K[0],A24,$Kx9[0] ; ·0x09
|| GMPY4 $K[1],A24,$Kx9[1]
|| MVK 0x00000D0D,A25
|| MVK 0x00000E0E,B25
@@ -900,14 +900,14 @@ ret?: ; B0 holds rounds or zero
|| MVKH 0x0D0D0000,A25
|| MVKH 0x0E0E0000,B25
- GMPY4 $K[0],B24,$KxB[0] ; ·0x0B
+ GMPY4 $K[0],B24,$KxB[0] ; ·0x0B
|| GMPY4 $K[1],B24,$KxB[1]
GMPY4 $K[2],B24,$KxB[2]
|| GMPY4 $K[3],B24,$KxB[3]
SPLOOP 11 ; InvMixColumns
;;====================================================================
- GMPY4 $K[0],A25,$KxD[0] ; ·0x0D
+ GMPY4 $K[0],A25,$KxD[0] ; ·0x0D
|| GMPY4 $K[1],A25,$KxD[1]
|| SWAP2 $Kx9[0],$Kx9[0] ; rotate by 16
|| SWAP2 $Kx9[1],$Kx9[1]
@@ -924,7 +924,7 @@ ret?: ; B0 holds rounds or zero
|| [B0] LDW *${KPA}[6],$K[2]
|| [B0] LDW *${KPB}[7],$K[3]
- GMPY4 $s[0],B25,$KxE[0] ; ·0x0E
+ GMPY4 $s[0],B25,$KxE[0] ; ·0x0E
|| GMPY4 $s[1],B25,$KxE[1]
|| XOR $Kx9[0],$KxB[0],$KxB[0]
|| XOR $Kx9[1],$KxB[1],$KxB[1]
@@ -944,7 +944,7 @@ ret?: ; B0 holds rounds or zero
XOR $KxE[0],$KxD[0],$KxE[0]
|| XOR $KxE[1],$KxD[1],$KxE[1]
-|| [B0] GMPY4 $K[0],A24,$Kx9[0] ; ·0x09
+|| [B0] GMPY4 $K[0],A24,$Kx9[0] ; ·0x09
|| [B0] GMPY4 $K[1],A24,$Kx9[1]
|| ADDAW $KPA,4,$KPA
XOR $KxE[2],$KxD[2],$KxE[2]
@@ -955,7 +955,7 @@ ret?: ; B0 holds rounds or zero
XOR $KxB[0],$KxE[0],$KxE[0]
|| XOR $KxB[1],$KxE[1],$KxE[1]
-|| [B0] GMPY4 $K[0],B24,$KxB[0] ; ·0x0B
+|| [B0] GMPY4 $K[0],B24,$KxB[0] ; ·0x0B
|| [B0] GMPY4 $K[1],B24,$KxB[1]
XOR $KxB[2],$KxE[2],$KxE[2]
|| XOR $KxB[3],$KxE[3],$KxE[3]
diff --git a/crypto/bn/asm/armv4-gf2m.pl b/crypto/bn/asm/armv4-gf2m.pl
index f05461a..a0b018c 100644
--- a/crypto/bn/asm/armv4-gf2m.pl
+++ b/crypto/bn/asm/armv4-gf2m.pl
@@ -27,7 +27,7 @@
# referred below, which improves ECDH and ECDSA verify benchmarks
# by 18-40%.
#
-# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
+# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
# Polynomial Multiplication on ARM Processors using the NEON Engine.
#
# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
@@ -148,7 +148,7 @@ ___
################
# void bn_GF2m_mul_2x2(BN_ULONG *r,
# BN_ULONG a1,BN_ULONG a0,
-# BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0
+# BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0
{
$code.=<<___;
.global bn_GF2m_mul_2x2
@@ -171,7 +171,7 @@ $code.=<<___;
mov $mask,#7<<2
sub sp,sp,#32 @ allocate tab[8]
- bl mul_1x1_ialu @ a1·b1
+ bl mul_1x1_ialu @ a1·b1
str $lo,[$ret,#8]
str $hi,[$ret,#12]
@@ -181,13 +181,13 @@ $code.=<<___;
eor r2,r2,$a
eor $b,$b,r3
eor $a,$a,r2
- bl mul_1x1_ialu @ a0·b0
+ bl mul_1x1_ialu @ a0·b0
str $lo,[$ret]
str $hi,[$ret,#4]
eor $a,$a,r2
eor $b,$b,r3
- bl mul_1x1_ialu @ (a1+a0)·(b1+b0)
+ bl mul_1x1_ialu @ (a1+a0)·(b1+b0)
___
@r=map("r$_",(6..9));
$code.=<<___;
diff --git a/crypto/bn/asm/c64xplus-gf2m.pl b/crypto/bn/asm/c64xplus-gf2m.pl
index e4aa4e4..c79f46f 100644
--- a/crypto/bn/asm/c64xplus-gf2m.pl
+++ b/crypto/bn/asm/c64xplus-gf2m.pl
@@ -120,26 +120,26 @@ _bn_GF2m_mul_2x2:
.asmfunc
MVK 0xFF,$xFF
___
- &mul_1x1_upper($a0,$b0); # a0·b0
+ &mul_1x1_upper($a0,$b0); # a0·b0
$code.=<<___;
|| MV $b1,$B
MV $a1,$A
___
- &mul_1x1_merged("A28","B28",$A,$B); # a0·b0/a1·b1
+ &mul_1x1_merged("A28","B28",$A,$B); # a0·b0/a1·b1
$code.=<<___;
|| XOR $b0,$b1,$B
XOR $a0,$a1,$A
___
- &mul_1x1_merged("A31","B31",$A,$B); # a1·b1/(a0+a1)·(b0+b1)
+ &mul_1x1_merged("A31","B31",$A,$B); # a1·b1/(a0+a1)·(b0+b1)
$code.=<<___;
XOR A28,A31,A29
-|| XOR B28,B31,B29 ; a0·b0+a1·b1
+|| XOR B28,B31,B29 ; a0·b0+a1·b1
___
- &mul_1x1_lower("A30","B30"); # (a0+a1)·(b0+b1)
+ &mul_1x1_lower("A30","B30"); # (a0+a1)·(b0+b1)
$code.=<<___;
|| BNOP B3
XOR A29,A30,A30
-|| XOR B29,B30,B30 ; (a0+a1)·(b0+b1)-a0·b0-a1·b1
+|| XOR B29,B30,B30 ; (a0+a1)·(b0+b1)-a0·b0-a1·b1
XOR B28,A30,A30
|| STW A28,*${rp}[0]
XOR B30,A31,A31
diff --git a/crypto/bn/asm/ia64.S b/crypto/bn/asm/ia64.S
index 951abc5..c0cee82 100644
--- a/crypto/bn/asm/ia64.S
+++ b/crypto/bn/asm/ia64.S
@@ -568,7 +568,7 @@ bn_sqr_comba8:
// I've estimated this routine to run in ~120 ticks, but in reality
// (i.e. according to ar.itc) it takes ~160 ticks. Are those extra
// cycles consumed for instructions fetch? Or did I misinterpret some
-// clause in Itanium µ-architecture manual? Comments are welcomed and
+// clause in Itanium µ-architecture manual? Comments are welcomed and
// highly appreciated.
//
// On Itanium 2 it takes ~190 ticks. This is because of stalls on
diff --git a/crypto/bn/asm/s390x-gf2m.pl b/crypto/bn/asm/s390x-gf2m.pl
index cd9f13e..9d18d40 100644
--- a/crypto/bn/asm/s390x-gf2m.pl
+++ b/crypto/bn/asm/s390x-gf2m.pl
@@ -172,19 +172,19 @@ ___
if ($SIZE_T==8) {
my @r=map("%r$_",(6..9));
$code.=<<___;
- bras $ra,_mul_1x1 # a1·b1
+ bras $ra,_mul_1x1 # a1·b1
stmg $lo,$hi,16($rp)
lg $a,`$stdframe+128+4*$SIZE_T`($sp)
lg $b,`$stdframe+128+6*$SIZE_T`($sp)
- bras $ra,_mul_1x1 # a0·b0
+ bras $ra,_mul_1x1 # a0·b0
stmg $lo,$hi,0($rp)
lg $a,`$stdframe+128+3*$SIZE_T`($sp)
lg $b,`$stdframe+128+5*$SIZE_T`($sp)
xg $a,`$stdframe+128+4*$SIZE_T`($sp)
xg $b,`$stdframe+128+6*$SIZE_T`($sp)
- bras $ra,_mul_1x1 # (a0+a1)·(b0+b1)
+ bras $ra,_mul_1x1 # (a0+a1)·(b0+b1)
lmg @r[0],@r[3],0($rp)
xgr $lo,$hi
diff --git a/crypto/bn/asm/x86-gf2m.pl b/crypto/bn/asm/x86-gf2m.pl
index 808a1e5..b579530 100644
--- a/crypto/bn/asm/x86-gf2m.pl
+++ b/crypto/bn/asm/x86-gf2m.pl
@@ -14,7 +14,7 @@
# the time being... Except that it has three code paths: pure integer
# code suitable for any x86 CPU, MMX code suitable for PIII and later
# and PCLMULQDQ suitable for Westmere and later. Improvement varies
-# from one benchmark and µ-arch to another. Below are interval values
+# from one benchmark and µ-arch to another. Below are interval values
# for 163- and 571-bit ECDH benchmarks relative to compiler-generated
# code:
#
@@ -226,22 +226,22 @@ if ($sse2) {
&push ("edi");
&mov ($a,&wparam(1));
&mov ($b,&wparam(3));
- &call ("_mul_1x1_mmx"); # a1·b1
+ &call ("_mul_1x1_mmx"); # a1·b1
&movq ("mm7",$R);
&mov ($a,&wparam(2));
&mov ($b,&wparam(4));
- &call ("_mul_1x1_mmx"); # a0·b0
+ &call ("_mul_1x1_mmx"); # a0·b0
&movq ("mm6",$R);
&mov ($a,&wparam(1));
&mov ($b,&wparam(3));
&xor ($a,&wparam(2));
&xor ($b,&wparam(4));
- &call ("_mul_1x1_mmx"); # (a0+a1)·(b0+b1)
+ &call ("_mul_1x1_mmx"); # (a0+a1)·(b0+b1)
&pxor ($R,"mm7");
&mov ($a,&wparam(0));
- &pxor ($R,"mm6"); # (a0+a1)·(b0+b1)-a1·b1-a0·b0
+ &pxor ($R,"mm6"); # (a0+a1)·(b0+b1)-a1·b1-a0·b0
&movq ($A,$R);
&psllq ($R,32);
@@ -266,13 +266,13 @@ if ($sse2) {
&mov ($a,&wparam(1));
&mov ($b,&wparam(3));
- &call ("_mul_1x1_ialu"); # a1·b1
+ &call ("_mul_1x1_ialu"); # a1·b1
&mov (&DWP(8,"esp"),$lo);
&mov (&DWP(12,"esp"),$hi);
&mov ($a,&wparam(2));
&mov ($b,&wparam(4));
- &call ("_mul_1x1_ialu"); # a0·b0
+ &call ("_mul_1x1_ialu"); # a0·b0
&mov (&DWP(0,"esp"),$lo);
&mov (&DWP(4,"esp"),$hi);
@@ -280,7 +280,7 @@ if ($sse2) {
&mov ($b,&wparam(3));
&xor ($a,&wparam(2));
&xor ($b,&wparam(4));
- &call ("_mul_1x1_ialu"); # (a0+a1)·(b0+b1)
+ &call ("_mul_1x1_ialu"); # (a0+a1)·(b0+b1)
&mov ("ebp",&wparam(0));
@r=("ebx","ecx","edi","esi");
diff --git a/crypto/bn/asm/x86_64-gcc.c b/crypto/bn/asm/x86_64-gcc.c
index d548886..d77dc43 100644
--- a/crypto/bn/asm/x86_64-gcc.c
+++ b/crypto/bn/asm/x86_64-gcc.c
@@ -65,7 +65,7 @@
# undef mul_add
/*-
- * "m"(a), "+m"(r) is the way to favor DirectPath µ-code;
+ * "m"(a), "+m"(r) is the way to favor DirectPath µ-code;
* "g"(0) let the compiler to decide where does it
* want to keep the value of zero;
*/
diff --git a/crypto/bn/asm/x86_64-gf2m.pl b/crypto/bn/asm/x86_64-gf2m.pl
index 226c66c..42bbec2 100644
--- a/crypto/bn/asm/x86_64-gf2m.pl
+++ b/crypto/bn/asm/x86_64-gf2m.pl
@@ -13,7 +13,7 @@
# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
# the time being... Except that it has two code paths: code suitable
# for any x86_64 CPU and PCLMULQDQ one suitable for Westmere and
-# later. Improvement varies from one benchmark and µ-arch to another.
+# later. Improvement varies from one benchmark and µ-arch to another.
# Vanilla code path is at most 20% faster than compiler-generated code
# [not very impressive], while PCLMULQDQ - whole 85%-160% better on
# 163- and 571-bit ECDH benchmarks on Intel CPUs. Keep in mind that
@@ -184,13 +184,13 @@ ___
$code.=<<___;
movdqa %xmm0,%xmm4
movdqa %xmm1,%xmm5
- pclmulqdq \$0,%xmm1,%xmm0 # a1·b1
+ pclmulqdq \$0,%xmm1,%xmm0 # a1·b1
pxor %xmm2,%xmm4
pxor %xmm3,%xmm5
- pclmulqdq \$0,%xmm3,%xmm2 # a0·b0
- pclmulqdq \$0,%xmm5,%xmm4 # (a0+a1)·(b0+b1)
+ pclmulqdq \$0,%xmm3,%xmm2 # a0·b0
+ pclmulqdq \$0,%xmm5,%xmm4 # (a0+a1)·(b0+b1)
xorps %xmm0,%xmm4
- xorps %xmm2,%xmm4 # (a0+a1)·(b0+b1)-a0·b0-a1·b1
+ xorps %xmm2,%xmm4 # (a0+a1)·(b0+b1)-a0·b0-a1·b1
movdqa %xmm4,%xmm5
pslldq \$8,%xmm4
psrldq \$8,%xmm5
@@ -225,13 +225,13 @@ $code.=<<___;
mov \$0xf,$mask
mov $a1,$a
mov $b1,$b
- call _mul_1x1 # a1·b1
+ call _mul_1x1 # a1·b1
mov $lo,16(%rsp)
mov $hi,24(%rsp)
mov 48(%rsp),$a
mov 64(%rsp),$b
- call _mul_1x1 # a0·b0
+ call _mul_1x1 # a0·b0
mov $lo,0(%rsp)
mov $hi,8(%rsp)
@@ -239,7 +239,7 @@ $code.=<<___;
mov 56(%rsp),$b
xor 48(%rsp),$a
xor 64(%rsp),$b
- call _mul_1x1 # (a0+a1)·(b0+b1)
+ call _mul_1x1 # (a0+a1)·(b0+b1)
___
@r=("%rbx","%rcx","%rdi","%rsi");
$code.=<<___;
diff --git a/crypto/modes/asm/ghash-armv4.pl b/crypto/modes/asm/ghash-armv4.pl
index 7311ad2..2d225cf 100644
--- a/crypto/modes/asm/ghash-armv4.pl
+++ b/crypto/modes/asm/ghash-armv4.pl
@@ -45,7 +45,7 @@
# processes one byte in 8.45 cycles, A9 - in 10.2, A15 - in 7.63,
# Snapdragon S4 - in 9.33.
#
-# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
+# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
# Polynomial Multiplication on ARM Processors using the NEON Engine.
#
# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
@@ -449,12 +449,12 @@ gcm_ghash_neon:
veor $IN,$Xl @ inp^=Xi
.Lgmult_neon:
___
- &clmul64x64 ($Xl,$Hlo,"$IN#lo"); # H.lo·Xi.lo
+ &clmul64x64 ($Xl,$Hlo,"$IN#lo"); # H.lo·Xi.lo
$code.=<<___;
veor $IN#lo,$IN#lo,$IN#hi @ Karatsuba pre-processing
___
- &clmul64x64 ($Xm,$Hhl,"$IN#lo"); # (H.lo+H.hi)·(Xi.lo+Xi.hi)
- &clmul64x64 ($Xh,$Hhi,"$IN#hi"); # H.hi·Xi.hi
+ &clmul64x64 ($Xm,$Hhl,"$IN#lo"); # (H.lo+H.hi)·(Xi.lo+Xi.hi)
+ &clmul64x64 ($Xh,$Hhi,"$IN#hi"); # H.hi·Xi.hi
$code.=<<___;
veor $Xm,$Xm,$Xl @ Karatsuba post-processing
veor $Xm,$Xm,$Xh
diff --git a/crypto/modes/asm/ghash-c64xplus.pl b/crypto/modes/asm/ghash-c64xplus.pl
index a465b42..93f6985 100644
--- a/crypto/modes/asm/ghash-c64xplus.pl
+++ b/crypto/modes/asm/ghash-c64xplus.pl
@@ -153,7 +153,7 @@ ___
# 8/2 S1 L1x S2 | ....
#####... ................|............
$code.=<<___;
- XORMPY $H0,$xia,$H0x ; 0 ; H·(Xi[i]<<1)
+ XORMPY $H0,$xia,$H0x ; 0 ; H·(Xi[i]<<1)
|| XORMPY $H01u,$xib,$H01y
|| [A0] LDBU *--${xip},$x0
XORMPY $H1,$xia,$H1x ; 1
@@ -162,7 +162,7 @@ $code.=<<___;
XORMPY $H3,$xia,$H3x ; 3
|| XORMPY $H3u,$xib,$H3y
||[!A0] MVK.D 15,A0 ; *--${xip} counter
- XOR.L $H0x,$Z0,$Z0 ; 4 ; Z^=H·(Xi[i]<<1)
+ XOR.L $H0x,$Z0,$Z0 ; 4 ; Z^=H·(Xi[i]<<1)
|| [A0] SUB.S A0,1,A0
XOR.L $H1x,$Z1,$Z1 ; 5
|| AND.D $H01y,$FF000000,$H0z
diff --git a/crypto/modes/asm/ghash-sparcv9.pl b/crypto/modes/asm/ghash-sparcv9.pl
index c1074ed..e928c42 100644
--- a/crypto/modes/asm/ghash-sparcv9.pl
+++ b/crypto/modes/asm/ghash-sparcv9.pl
@@ -379,7 +379,7 @@ gcm_init_vis3:
or $V,%lo(0xA0406080),$V
or %l0,%lo(0x20C0E000),%l0
sllx $V,32,$V
- or %l0,$V,$V ! (0xE0·i)&0xff=0xA040608020C0E000
+ or %l0,$V,$V ! (0xE0·i)&0xff=0xA040608020C0E000
stx $V,[%i0+16]
ret
@@ -399,7 +399,7 @@ gcm_gmult_vis3:
mov 0xE1,%l7
sllx %l7,57,$xE1 ! 57 is not a typo
- ldx [$Htable+16],$V ! (0xE0·i)&0xff=0xA040608020C0E000
+ ldx [$Htable+16],$V ! (0xE0·i)&0xff=0xA040608020C0E000
xor $Hhi,$Hlo,$Hhl ! Karatsuba pre-processing
xmulx $Xlo,$Hlo,$C0
@@ -411,9 +411,9 @@ gcm_gmult_vis3:
xmulx $Xhi,$Hhi,$Xhi
sll $C0,3,$sqr
- srlx $V,$sqr,$sqr ! ·0xE0 [implicit &(7<<3)]
+ srlx $V,$sqr,$sqr ! ·0xE0 [implicit &(7<<3)]
xor $C0,$sqr,$sqr
- sllx $sqr,57,$sqr ! ($C0·0xE1)<<1<<56 [implicit &0x7f]
+ sllx $sqr,57,$sqr ! ($C0·0xE1)<<1<<56 [implicit &0x7f]
xor $C0,$C1,$C1 ! Karatsuba post-processing
xor $Xlo,$C2,$C2
@@ -423,7 +423,7 @@ gcm_gmult_vis3:
xor $Xhi,$C2,$C2
xor $Xhi,$C1,$C1
- xmulxhi $C0,$xE1,$Xlo ! ·0xE1<<1<<56
+ xmulxhi $C0,$xE1,$Xlo ! ·0xE1<<1<<56
xor $C0,$C2,$C2
xmulx $C1,$xE1,$C0
xor $C1,$C3,$C3
@@ -453,7 +453,7 @@ gcm_ghash_vis3:
mov 0xE1,%l7
sllx %l7,57,$xE1 ! 57 is not a typo
- ldx [$Htable+16],$V ! (0xE0·i)&0xff=0xA040608020C0E000
+ ldx [$Htable+16],$V ! (0xE0·i)&0xff=0xA040608020C0E000
and $inp,7,$shl
andn $inp,7,$inp
@@ -490,9 +490,9 @@ gcm_ghash_vis3:
xmulx $Xhi,$Hhi,$Xhi
sll $C0,3,$sqr
- srlx $V,$sqr,$sqr ! ·0xE0 [implicit &(7<<3)]
+ srlx $V,$sqr,$sqr ! ·0xE0 [implicit &(7<<3)]
xor $C0,$sqr,$sqr
- sllx $sqr,57,$sqr ! ($C0·0xE1)<<1<<56 [implicit &0x7f]
+ sllx $sqr,57,$sqr ! ($C0·0xE1)<<1<<56 [implicit &0x7f]
xor $C0,$C1,$C1 ! Karatsuba post-processing
xor $Xlo,$C2,$C2
@@ -502,7 +502,7 @@ gcm_ghash_vis3:
xor $Xhi,$C2,$C2
xor $Xhi,$C1,$C1
- xmulxhi $C0,$xE1,$Xlo ! ·0xE1<<1<<56
+ xmulxhi $C0,$xE1,$Xlo ! ·0xE1<<1<<56
xor $C0,$C2,$C2
xmulx $C1,$xE1,$C0
xor $C1,$C3,$C3
diff --git a/crypto/modes/asm/ghash-x86.pl b/crypto/modes/asm/ghash-x86.pl
index 23a5527..0269169 100644
--- a/crypto/modes/asm/ghash-x86.pl
+++ b/crypto/modes/asm/ghash-x86.pl
@@ -358,7 +358,7 @@ $S=12; # shift factor for rem_4bit
# effective address calculation and finally merge of value to Z.hi.
# Reference to rem_4bit is scheduled so late that I had to >>4
# rem_4bit elements. This resulted in 20-45% procent improvement
-# on contemporary µ-archs.
+# on contemporary µ-archs.
{
my $cnt;
my $rem_4bit = "eax";
diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/modes/asm/ghash-x86_64.pl
index 6e656ca..5a7ce39 100644
--- a/crypto/modes/asm/ghash-x86_64.pl
+++ b/crypto/modes/asm/ghash-x86_64.pl
@@ -576,15 +576,15 @@ $code.=<<___ if (0 || (&reduction_alg9($Xhi,$Xi)&&0));
# experimental alternative. special thing about is that there
# no dependency between the two multiplications...
mov \$`0xE1<<1`,%eax
- mov \$0xA040608020C0E000,%r10 # ((7..0)·0xE0)&0xff
+ mov \$0xA040608020C0E000,%r10 # ((7..0)·0xE0)&0xff
mov \$0x07,%r11d
movq %rax,$T1
movq %r10,$T2
movq %r11,$T3 # borrow $T3
pand $Xi,$T3
- pshufb $T3,$T2 # ($Xi&7)·0xE0
+ pshufb $T3,$T2 # ($Xi&7)·0xE0
movq %rax,$T3
- pclmulqdq \$0x00,$Xi,$T1 # ·(0xE1<<1)
+ pclmulqdq \$0x00,$Xi,$T1 # ·(0xE1<<1)
pxor $Xi,$T2
pslldq \$15,$T2
paddd $T2,$T2 # <<(64+56+1)
@@ -657,7 +657,7 @@ $code.=<<___;
je .Lskip4x
sub \$0x30,$len
- mov \$0xA040608020C0E000,%rax # ((7..0)·0xE0)&0xff
+ mov \$0xA040608020C0E000,%rax # ((7..0)·0xE0)&0xff
movdqu 0x30($Htbl),$Hkey3
movdqu 0x40($Htbl),$Hkey4
diff --git a/crypto/modes/asm/ghashp8-ppc.pl b/crypto/modes/asm/ghashp8-ppc.pl
index e76a58c..71457cf 100755
--- a/crypto/modes/asm/ghashp8-ppc.pl
+++ b/crypto/modes/asm/ghashp8-ppc.pl
@@ -118,9 +118,9 @@ $code=<<___;
le?vperm $IN,$IN,$IN,$lemask
vxor $zero,$zero,$zero
- vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
- vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
- vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
+ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
+ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
+ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
vpmsumd $t2,$Xl,$xC2 # 1st phase
@@ -178,11 +178,11 @@ $code=<<___;
.align 5
Loop:
subic $len,$len,16
- vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
+ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
subfe. r0,r0,r0 # borrow?-1:0
- vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
+ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
and r0,r0,$len
- vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
+ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
add $inp,$inp,r0
vpmsumd $t2,$Xl,$xC2 # 1st phase
diff --git a/crypto/modes/asm/ghashv8-armx.pl b/crypto/modes/asm/ghashv8-armx.pl
index 3750d25..fe3a34d 100644
--- a/crypto/modes/asm/ghashv8-armx.pl
+++ b/crypto/modes/asm/ghashv8-armx.pl
@@ -144,10 +144,10 @@ gcm_gmult_v8:
#endif
vext.8 $IN,$t1,$t1,#8
- vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
+ vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
veor $t1,$t1,$IN @ Karatsuba pre-processing
- vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
- vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
+ vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
+ vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
veor $t2,$Xl,$Xh
@@ -235,7 +235,7 @@ $code.=<<___;
#endif
vext.8 $In,$t1,$t1,#8
veor $IN,$IN,$Xl @ I[i]^=Xi
- vpmull.p64 $Xln,$H,$In @ H·Ii+1
+ vpmull.p64 $Xln,$H,$In @ H·Ii+1
veor $t1,$t1,$In @ Karatsuba pre-processing
vpmull2.p64 $Xhn,$H,$In
b .Loop_mod2x_v8
@@ -244,14 +244,14 @@ $code.=<<___;
.Loop_mod2x_v8:
vext.8 $t2,$IN,$IN,#8
subs $len,$len,#32 @ is there more data?
- vpmull.p64 $Xl,$H2,$IN @ H^2.lo·Xi.lo
+ vpmull.p64 $Xl,$H2,$IN @ H^2.lo·Xi.lo
cclr $inc,lo @ is it time to zero $inc?
vpmull.p64 $Xmn,$Hhl,$t1
veor $t2,$t2,$IN @ Karatsuba pre-processing
- vpmull2.p64 $Xh,$H2,$IN @ H^2.hi·Xi.hi
+ vpmull2.p64 $Xh,$H2,$IN @ H^2.hi·Xi.hi
veor $Xl,$Xl,$Xln @ accumulate
- vpmull2.p64 $Xm,$Hhl,$t2 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
+ vpmull2.p64 $Xm,$Hhl,$t2 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
vld1.64 {$t0},[$inp],$inc @ load [rotated] I[i+2]
veor $Xh,$Xh,$Xhn
@@ -276,7 +276,7 @@ $code.=<<___;
vext.8 $In,$t1,$t1,#8
vext.8 $IN,$t0,$t0,#8
veor $Xl,$Xm,$t2
- vpmull.p64 $Xln,$H,$In @ H·Ii+1
+ vpmull.p64 $Xln,$H,$In @ H·Ii+1
veor $IN,$IN,$Xh @ accumulate $IN early
vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction
@@ -300,10 +300,10 @@ $code.=<<___;
veor $IN,$IN,$Xl @ inp^=Xi
veor $t1,$t0,$t2 @ $t1 is rotated inp^Xi
- vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
+ vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
veor $t1,$t1,$IN @ Karatsuba pre-processing
- vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
- vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
+ vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
+ vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
veor $t2,$Xl,$Xh
diff --git a/crypto/rc4/asm/rc4-586.pl b/crypto/rc4/asm/rc4-586.pl
index f906e09..d04e177 100644
--- a/crypto/rc4/asm/rc4-586.pl
+++ b/crypto/rc4/asm/rc4-586.pl
@@ -44,7 +44,7 @@
# Sandy Bridge 5.0/+8%
# Atom 12.6/+6%
# VIA Nano 6.4/+9%
-# Ivy Bridge 4.9/±0%
+# Ivy Bridge 4.9/±0%
# Bulldozer 4.9/+15%
#
# (*) PIII can actually deliver 6.6 cycles per byte with MMX code,
diff --git a/crypto/rc4/asm/rc4-x86_64.pl b/crypto/rc4/asm/rc4-x86_64.pl
index fa22763..4675106 100755
--- a/crypto/rc4/asm/rc4-x86_64.pl
+++ b/crypto/rc4/asm/rc4-x86_64.pl
@@ -56,7 +56,7 @@
# achieves respectful 432MBps on 2.8GHz processor now. For reference.
# If executed on Xeon, current RC4_CHAR code-path is 2.7x faster than
# RC4_INT code-path. While if executed on Opteron, it's only 25%
-# slower than the RC4_INT one [meaning that if CPU µ-arch detection
+# slower than the RC4_INT one [meaning that if CPU µ-arch detection
# is not implemented, then this final RC4_CHAR code-path should be
# preferred, as it provides better *all-round* performance].
diff --git a/crypto/sha/asm/sha1-586.pl b/crypto/sha/asm/sha1-586.pl
index 4895eb3..e0b5d83 100644
--- a/crypto/sha/asm/sha1-586.pl
+++ b/crypto/sha/asm/sha1-586.pl
@@ -66,9 +66,9 @@
# switch to AVX alone improves performance by as little as 4% in
# comparison to SSSE3 code path. But below result doesn't look like
# 4% improvement... Trouble is that Sandy Bridge decodes 'ro[rl]' as
-# pair of µ-ops, and it's the additional µ-ops, two per round, that
+# pair of µ-ops, and it's the additional µ-ops, two per round, that
# make it run slower than Core2 and Westmere. But 'sh[rl]d' is decoded
-# as single µ-op by Sandy Bridge and it's replacing 'ro[rl]' with
+# as single µ-op by Sandy Bridge and it's replacing 'ro[rl]' with
# equivalent 'sh[rl]d' that is responsible for the impressive 5.1
# cycles per processed byte. But 'sh[rl]d' is not something that used
# to be fast, nor does it appear to be fast in upcoming Bulldozer
diff --git a/crypto/sha/asm/sha256-586.pl b/crypto/sha/asm/sha256-586.pl
index 6462e45..e907714 100644
--- a/crypto/sha/asm/sha256-586.pl
+++ b/crypto/sha/asm/sha256-586.pl
@@ -10,7 +10,7 @@
# SHA256 block transform for x86. September 2007.
#
# Performance improvement over compiler generated code varies from
-# 10% to 40% [see below]. Not very impressive on some µ-archs, but
+# 10% to 40% [see below]. Not very impressive on some µ-archs, but
# it's 5 times smaller and optimizies amount of writes.
#
# May 2012.
diff --git a/crypto/sha/asm/sha512-586.pl b/crypto/sha/asm/sha512-586.pl
index e96ec00..2f6a202 100644
--- a/crypto/sha/asm/sha512-586.pl
+++ b/crypto/sha/asm/sha512-586.pl
@@ -37,7 +37,7 @@
#
# IALU code-path is optimized for elder Pentiums. On vanilla Pentium
# performance improvement over compiler generated code reaches ~60%,
-# while on PIII - ~35%. On newer µ-archs improvement varies from 15%
+# while on PIII - ~35%. On newer µ-archs improvement varies from 15%
# to 50%, but it's less important as they are expected to execute SSE2
# code-path, which is commonly ~2-3x faster [than compiler generated
# code]. SSE2 code-path is as fast as original sha512-sse2.pl, even
diff --git a/crypto/sparccpuid.S b/crypto/sparccpuid.S
index 9b6744f..72c7adf 100644
--- a/crypto/sparccpuid.S
+++ b/crypto/sparccpuid.S
@@ -127,7 +127,7 @@ OPENSSL_wipe_cpu:
fmovs %f1,%f3
fmovs %f0,%f2
- add %fp,BIAS,%i0 ! return pointer to caller´s top of stack
+ add %fp,BIAS,%i0 ! return pointer to caller´s top of stack
ret
restore
diff --git a/crypto/whrlpool/asm/wp-mmx.pl b/crypto/whrlpool/asm/wp-mmx.pl
index c584e5b..7725951 100644
--- a/crypto/whrlpool/asm/wp-mmx.pl
+++ b/crypto/whrlpool/asm/wp-mmx.pl
@@ -16,7 +16,7 @@
# table]. I stick to value of 2 for two reasons: 1. smaller table
# minimizes cache trashing and thus mitigates the hazard of side-
# channel leakage similar to AES cache-timing one; 2. performance
-# gap among different µ-archs is smaller.
+# gap among different µ-archs is smaller.
#
# Performance table lists rounded amounts of CPU cycles spent by
# whirlpool_block_mmx routine on single 64 byte input block, i.e.
diff --git a/crypto/x509v3/v3_pci.c b/crypto/x509v3/v3_pci.c
index c39acd7..ef105dc 100644
--- a/crypto/x509v3/v3_pci.c
+++ b/crypto/x509v3/v3_pci.c
@@ -3,7 +3,7 @@
* Contributed to the OpenSSL Project 2004 by Richard Levitte
* (richard@levitte.org)
*/
-/* Copyright (c) 2004 Kungliga Tekniska Högskolan
+/* Copyright (c) 2004 Kungliga Tekniska Högskolan
* (Royal Institute of Technology, Stockholm, Sweden).
* All rights reserved.
*
diff --git a/crypto/x509v3/v3_pcia.c b/crypto/x509v3/v3_pcia.c
index 350b398..43fd362 100644
--- a/crypto/x509v3/v3_pcia.c
+++ b/crypto/x509v3/v3_pcia.c
@@ -3,7 +3,7 @@
* Contributed to the OpenSSL Project 2004 by Richard Levitte
* (richard@levitte.org)
*/
-/* Copyright (c) 2004 Kungliga Tekniska Högskolan
+/* Copyright (c) 2004 Kungliga Tekniska Högskolan
* (Royal Institute of Technology, Stockholm, Sweden).
* All rights reserved.
*