diff options
Diffstat (limited to 'MdePkg')
-rw-r--r-- | MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem.nasm | 11 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem16.nasm | 11 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem32.nasm | 9 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem64.nasm | 20 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibSse2/Ia32/ZeroMem.nasm | 11 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibSse2/X64/SetMem.nasm | 9 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibSse2/X64/SetMem16.nasm | 11 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibSse2/X64/SetMem32.nasm | 9 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibSse2/X64/SetMem64.nasm | 19 | ||||
-rw-r--r-- | MdePkg/Library/BaseMemoryLibSse2/X64/ZeroMem.nasm | 13 |
10 files changed, 85 insertions, 38 deletions
diff --git a/MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem.nasm b/MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem.nasm index 24313cb..a874430 100644 --- a/MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem.nasm +++ b/MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem.nasm @@ -34,7 +34,7 @@ ASM_PFX(InternalMemSetMem): mov al, [esp + 16] ; al <- Value
xor ecx, ecx
sub ecx, edi
- and ecx, 15 ; ecx + edi aligns on 16-byte boundary
+ and ecx, 63 ; ecx + edi aligns on 16-byte boundary
jz .0
cmp ecx, edx
cmova ecx, edx
@@ -42,8 +42,8 @@ ASM_PFX(InternalMemSetMem): rep stosb
.0:
mov ecx, edx
- and edx, 15
- shr ecx, 4 ; ecx <- # of DQwords to set
+ and edx, 63
+ shr ecx, 6 ; ecx <- # of DQwords to set
jz @SetBytes
mov ah, al ; ax <- Value | (Value << 8)
add esp, -16
@@ -53,7 +53,10 @@ ASM_PFX(InternalMemSetMem): movlhps xmm0, xmm0 ; xmm0 <- Value repeats 16 times
.1:
movntdq [edi], xmm0 ; edi should be 16-byte aligned
- add edi, 16
+ movntdq [edi + 16], xmm0
+ movntdq [edi + 32], xmm0
+ movntdq [edi + 48], xmm0
+ add edi, 64
loop .1
mfence
movdqu xmm0, [esp] ; restore xmm0
diff --git a/MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem16.nasm b/MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem16.nasm index 6e308b5..d461ee0 100644 --- a/MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem16.nasm +++ b/MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem16.nasm @@ -33,7 +33,7 @@ ASM_PFX(InternalMemSetMem16): mov edi, [esp + 8]
xor ecx, ecx
sub ecx, edi
- and ecx, 15 ; ecx + edi aligns on 16-byte boundary
+ and ecx, 63 ; ecx + edi aligns on 16-byte boundary
mov eax, [esp + 16]
jz .0
shr ecx, 1
@@ -43,15 +43,18 @@ ASM_PFX(InternalMemSetMem16): rep stosw
.0:
mov ecx, edx
- and edx, 7
- shr ecx, 3
+ and edx, 31
+ shr ecx, 5
jz @SetWords
movd xmm0, eax
pshuflw xmm0, xmm0, 0
movlhps xmm0, xmm0
.1:
movntdq [edi], xmm0 ; edi should be 16-byte aligned
- add edi, 16
+ movntdq [edi + 16], xmm0
+ movntdq [edi + 32], xmm0
+ movntdq [edi + 48], xmm0
+ add edi, 64
loop .1
mfence
@SetWords:
diff --git a/MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem32.nasm b/MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem32.nasm index 2cfc8cb..3ffdcd0 100644 --- a/MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem32.nasm +++ b/MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem32.nasm @@ -43,14 +43,17 @@ ASM_PFX(InternalMemSetMem32): rep stosd
.0:
mov ecx, edx
- and edx, 3
- shr ecx, 2
+ and edx, 15
+ shr ecx, 4
jz @SetDwords
movd xmm0, eax
pshufd xmm0, xmm0, 0
.1:
movntdq [edi], xmm0
- add edi, 16
+ movntdq [edi + 16], xmm0
+ movntdq [edi + 32], xmm0
+ movntdq [edi + 48], xmm0
+ add edi, 64
loop .1
mfence
@SetDwords:
diff --git a/MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem64.nasm b/MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem64.nasm index e153495..cd00064 100644 --- a/MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem64.nasm +++ b/MdePkg/Library/BaseMemoryLibSse2/Ia32/SetMem64.nasm @@ -38,17 +38,29 @@ ASM_PFX(InternalMemSetMem64): add edx, 8
dec ecx
.0:
- shr ecx, 1
+ push ebx
+ mov ebx, ecx
+ and ebx, 7
+ shr ecx, 3
jz @SetQwords
movlhps xmm0, xmm0
.1:
movntdq [edx], xmm0
- lea edx, [edx + 16]
+ movntdq [edx + 16], xmm0
+ movntdq [edx + 32], xmm0
+ movntdq [edx + 48], xmm0
+ lea edx, [edx + 64]
loop .1
mfence
@SetQwords:
- jnc .2
+ test ebx, ebx
+ jz .3
+ mov ecx, ebx
+.2
movq qword [edx], xmm0
-.2:
+ lea edx, [edx + 8]
+ loop .2
+.3:
+ pop ebx
ret
diff --git a/MdePkg/Library/BaseMemoryLibSse2/Ia32/ZeroMem.nasm b/MdePkg/Library/BaseMemoryLibSse2/Ia32/ZeroMem.nasm index cd34006..0e08285 100644 --- a/MdePkg/Library/BaseMemoryLibSse2/Ia32/ZeroMem.nasm +++ b/MdePkg/Library/BaseMemoryLibSse2/Ia32/ZeroMem.nasm @@ -33,7 +33,7 @@ ASM_PFX(InternalMemZeroMem): xor ecx, ecx
sub ecx, edi
xor eax, eax
- and ecx, 15
+ and ecx, 63
jz .0
cmp ecx, edx
cmova ecx, edx
@@ -41,13 +41,16 @@ ASM_PFX(InternalMemZeroMem): rep stosb
.0:
mov ecx, edx
- and edx, 15
- shr ecx, 4
+ and edx, 63
+ shr ecx, 6
jz @ZeroBytes
pxor xmm0, xmm0
.1:
movntdq [edi], xmm0
- add edi, 16
+ movntdq [edi + 16], xmm0
+ movntdq [edi + 32], xmm0
+ movntdq [edi + 48], xmm0
+ add edi, 64
loop .1
mfence
@ZeroBytes:
diff --git a/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem.nasm b/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem.nasm index 5bd1c22..28b11ee 100644 --- a/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem.nasm +++ b/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem.nasm @@ -42,8 +42,8 @@ ASM_PFX(InternalMemSetMem): rep stosb
.0:
mov rcx, rdx
- and rdx, 15
- shr rcx, 4
+ and rdx, 63
+ shr rcx, 6
jz @SetBytes
mov ah, al ; ax <- Value repeats twice
movdqa [rsp + 0x10], xmm0 ; save xmm0
@@ -52,7 +52,10 @@ ASM_PFX(InternalMemSetMem): movlhps xmm0, xmm0 ; xmm0 <- Value repeats 16 times
.1:
movntdq [rdi], xmm0 ; rdi should be 16-byte aligned
- add rdi, 16
+ movntdq [rdi + 16], xmm0
+ movntdq [rdi + 32], xmm0
+ movntdq [rdi + 48], xmm0
+ add rdi, 64
loop .1
mfence
movdqa xmm0, [rsp + 0x10] ; restore xmm0
diff --git a/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem16.nasm b/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem16.nasm index 90d1598..375be19 100644 --- a/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem16.nasm +++ b/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem16.nasm @@ -33,7 +33,7 @@ ASM_PFX(InternalMemSetMem16): mov r9, rdi
xor rcx, rcx
sub rcx, rdi
- and rcx, 15
+ and rcx, 63
mov rax, r8
jz .0
shr rcx, 1
@@ -43,15 +43,18 @@ ASM_PFX(InternalMemSetMem16): rep stosw
.0:
mov rcx, rdx
- and edx, 7
- shr rcx, 3
+ and edx, 31
+ shr rcx, 5
jz @SetWords
movd xmm0, eax
pshuflw xmm0, xmm0, 0
movlhps xmm0, xmm0
.1:
movntdq [rdi], xmm0
- add rdi, 16
+ movntdq [rdi + 16], xmm0
+ movntdq [rdi + 32], xmm0
+ movntdq [rdi + 48], xmm0
+ add rdi, 64
loop .1
mfence
@SetWords:
diff --git a/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem32.nasm b/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem32.nasm index 928e086..5d12bea 100644 --- a/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem32.nasm +++ b/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem32.nasm @@ -43,14 +43,17 @@ ASM_PFX(InternalMemSetMem32): rep stosd
.0:
mov rcx, rdx
- and edx, 3
- shr rcx, 2
+ and edx, 15
+ shr rcx, 4
jz @SetDwords
movd xmm0, eax
pshufd xmm0, xmm0, 0
.1:
movntdq [rdi], xmm0
- add rdi, 16
+ movntdq [rdi + 16], xmm0
+ movntdq [rdi + 32], xmm0
+ movntdq [rdi + 48], xmm0
+ add rdi, 64
loop .1
mfence
@SetDwords:
diff --git a/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem64.nasm b/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem64.nasm index d771810..485f74d 100644 --- a/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem64.nasm +++ b/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem64.nasm @@ -37,17 +37,28 @@ ASM_PFX(InternalMemSetMem64): add rdx, 8
dec rcx
.0:
- shr rcx, 1
+ push rbx
+ mov rbx, rcx
+ and rbx, 7
+ shr rcx, 3
jz @SetQwords
movlhps xmm0, xmm0
.1:
movntdq [rdx], xmm0
- lea rdx, [rdx + 16]
+ movntdq [rdx + 16], xmm0
+ movntdq [rdx + 32], xmm0
+ movntdq [rdx + 48], xmm0
+ lea rdx, [rdx + 64]
loop .1
mfence
@SetQwords:
- jnc .2
- mov [rdx], r8
+ push rdi
+ mov rcx, rbx
+ mov rax, r8
+ mov rdi, rdx
+ rep stosq
+ pop rdi
.2:
+ pop rbx
ret
diff --git a/MdePkg/Library/BaseMemoryLibSse2/X64/ZeroMem.nasm b/MdePkg/Library/BaseMemoryLibSse2/X64/ZeroMem.nasm index 5ddcae9..21f504e 100644 --- a/MdePkg/Library/BaseMemoryLibSse2/X64/ZeroMem.nasm +++ b/MdePkg/Library/BaseMemoryLibSse2/X64/ZeroMem.nasm @@ -32,7 +32,7 @@ ASM_PFX(InternalMemZeroMem): xor rcx, rcx
xor eax, eax
sub rcx, rdi
- and rcx, 15
+ and rcx, 63
mov r8, rdi
jz .0
cmp rcx, rdx
@@ -41,13 +41,16 @@ ASM_PFX(InternalMemZeroMem): rep stosb
.0:
mov rcx, rdx
- and edx, 15
- shr rcx, 4
+ and edx, 63
+ shr rcx, 6
jz @ZeroBytes
pxor xmm0, xmm0
.1:
- movntdq [rdi], xmm0 ; rdi should be 16-byte aligned
- add rdi, 16
+ movntdq [rdi], xmm0
+ movntdq [rdi + 16], xmm0
+ movntdq [rdi + 32], xmm0
+ movntdq [rdi + 48], xmm0
+ add rdi, 64
loop .1
mfence
@ZeroBytes:
|