diff options
author | Takashi Yano <takashi.yano@nifty.ne.jp> | 2024-10-13 07:41:40 +0900 |
---|---|---|
committer | Takashi Yano <takashi.yano@nifty.ne.jp> | 2024-11-01 04:56:27 +0900 |
commit | c607889824395f0eac7e85d5b81c906bf8025321 (patch) | |
tree | f055ae62fce835ab7a59215e4bc11c17bafc73dc /winsup/cygwin | |
parent | 7ed9adb356df61114bb3fa3d0efc098c0501921c (diff) | |
download | newlib-c607889824395f0eac7e85d5b81c906bf8025321.zip newlib-c607889824395f0eac7e85d5b81c906bf8025321.tar.gz newlib-c607889824395f0eac7e85d5b81c906bf8025321.tar.bz2 |
Cygwin: sigfe: Fix a bug that signal handler destroys fpu states
Previously, sigfe had a bug that the signal handler destroys fpu state.
This is caused by fninit instruction in sigdelayed. With this patch,
saving/restoring the FPU/SIMD state is done using fxsave/fxrstor or
xsave/xrstor rather than fnstcw/fldcw, stmxcsr/ldmxcsr and push/pop
xmm0-xmm15. Since xsave/xrstor is used, not only x87/MMX/SSE states
but also AVX/AVX2/AVX-512 states can be maintained unlike before.
Addresses: https://cygwin.com/pipermail/cygwin/2024-October/256503.html
Fixes: ed89fbc3ff11 ("* gendef (sigdelayed (x86_64)): Save and restore FPU control word.")
Reported-by: Christian Franke <Christian.Franke@t-online.de>
Suggested-by: Brian Inglis <Brian.Inglis@SystematicSW.ab.ca>
Reviewed-by: Corinna Vinschen <corinna@vinschen.de>
Signed-off-by: Takashi Yano <takashi.yano@nifty.ne.jp>
Diffstat (limited to 'winsup/cygwin')
-rw-r--r-- | winsup/cygwin/release/3.6.0 | 3 | ||||
-rwxr-xr-x | winsup/cygwin/scripts/gendef | 91 |
2 files changed, 53 insertions, 41 deletions
diff --git a/winsup/cygwin/release/3.6.0 b/winsup/cygwin/release/3.6.0 index 40e0a86..ddb303b 100644 --- a/winsup/cygwin/release/3.6.0 +++ b/winsup/cygwin/release/3.6.0 @@ -37,3 +37,6 @@ What changed: - Redesign pipe handling to minimize toggling blocking mode. The query_hdl stuff is no longer needed in new implementation. + +- Now using AVX/AVX2/AVX-512 instructions in signal handler does not + break their context. diff --git a/winsup/cygwin/scripts/gendef b/winsup/cygwin/scripts/gendef index 3b1f8b9..a80b215 100755 --- a/winsup/cygwin/scripts/gendef +++ b/winsup/cygwin/scripts/gendef @@ -185,7 +185,7 @@ sigdelayed: # make sure it is aligned from here on # We could be called from an interrupted thread which doesn't know # about his fate, so save and restore everything and the kitchen sink. - andq \$0xfffffffffffffff0,%rsp + andq \$0xffffffffffffffc0,%rsp .seh_setframe %rbp,0 pushq %r15 .seh_pushreg %r15 @@ -213,26 +213,41 @@ sigdelayed: .seh_pushreg %rbx pushq %rax .seh_pushreg %rax - subq \$0x128,%rsp - .seh_stackalloc 0x128 - stmxcsr 0x124(%rsp) - fnstcw 0x120(%rsp) - movdqa %xmm15,0x110(%rsp) - movdqa %xmm14,0x100(%rsp) - movdqa %xmm13,0xf0(%rsp) - movdqa %xmm12,0xe0(%rsp) - movdqa %xmm11,0xd0(%rsp) - movdqa %xmm10,0xc0(%rsp) - movdqa %xmm9,0xb0(%rsp) - movdqa %xmm8,0xa0(%rsp) - movdqa %xmm7,0x90(%rsp) - movdqa %xmm6,0x80(%rsp) - movdqa %xmm5,0x70(%rsp) - movdqa %xmm4,0x60(%rsp) - movdqa %xmm3,0x50(%rsp) - movdqa %xmm2,0x40(%rsp) - movdqa %xmm1,0x30(%rsp) - movdqa %xmm0,0x20(%rsp) + + # +0x20: indicates if xsave is available + # +0x24: decrement of the stack to allocate space + # +0x28: %eax returnd by cpuid (0x0d, 0x00) + # +0x2c: %edx returnd by cpuid (0x0d, 0x00) + # +0x30: state save area + movl \$1,%eax + cpuid + andl \$0x04000000,%ecx # xsave available? + jnz 1f + movl \$0x248,%ebx # 0x18 for alignment, 0x30 for additional space + subq %rbx,%rsp + movl %ecx,0x20(%rsp) + movl %ebx,0x24(%rsp) + fxsave64 0x30(%rsp) # x86 CPU with 64-bit mode has fxsave64/fxrstor64 + jmp 2f +1: + movl \$0x0d,%eax + xorl %ecx,%ecx + cpuid # get necessary space for xsave + movq %rbx,%rcx + addq \$0x48,%rbx # 0x18 for alignment, 0x30 for additional space + subq %rbx,%rsp + movl %ebx,0x24(%rsp) + xorq %rax,%rax + shrq \$3,%rcx + leaq 0x30(%rsp),%rdi + rep stosq + xgetbv # get XCR0 (ecx is 0 after rep) + movl %eax,0x28(%rsp) + movl %edx,0x2c(%rsp) + notl %ecx # set ecx non-zero + movl %ecx,0x20(%rsp) + xsave64 0x30(%rsp) +2: .seh_endprologue movq %gs:8,%r12 # get tls @@ -259,26 +274,20 @@ sigdelayed: xorl %r11d,%r11d movl %r11d,_cygtls.incyg(%r12) movl %r11d,_cygtls.stacklock(%r12) # unlock - movdqa 0x20(%rsp),%xmm0 - movdqa 0x30(%rsp),%xmm1 - movdqa 0x40(%rsp),%xmm2 - movdqa 0x50(%rsp),%xmm3 - movdqa 0x60(%rsp),%xmm4 - movdqa 0x70(%rsp),%xmm5 - movdqa 0x80(%rsp),%xmm6 - movdqa 0x90(%rsp),%xmm7 - movdqa 0xa0(%rsp),%xmm8 - movdqa 0xb0(%rsp),%xmm9 - movdqa 0xc0(%rsp),%xmm10 - movdqa 0xd0(%rsp),%xmm11 - movdqa 0xe0(%rsp),%xmm12 - movdqa 0xf0(%rsp),%xmm13 - movdqa 0x100(%rsp),%xmm14 - movdqa 0x110(%rsp),%xmm15 - fninit - fldcw 0x120(%rsp) - ldmxcsr 0x124(%rsp) - addq \$0x128,%rsp + + movl 0x20(%rsp),%ecx + testl %ecx,%ecx # xsave available? + jnz 1f + fxrstor64 0x30(%rsp) + jmp 2f +1: + movl 0x28(%rsp),%eax + movl 0x2c(%rsp),%edx + xrstor64 0x30(%rsp) +2: + movl 0x24(%rsp),%ebx + addq %rbx,%rsp + popq %rax popq %rbx popq %rcx |