From efa0569d2bfdbb7367fce42b1c99821b85d2d3ba Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sat, 8 Aug 2009 17:48:09 -0700 Subject: Optimize x86-64 version of sem_timedwait. --- nptl/ChangeLog | 3 + .../sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S | 210 ++++++++++++++------- 2 files changed, 148 insertions(+), 65 deletions(-) (limited to 'nptl') diff --git a/nptl/ChangeLog b/nptl/ChangeLog index 48fcc0f..5be464e 100644 --- a/nptl/ChangeLog +++ b/nptl/ChangeLog @@ -1,5 +1,8 @@ 2009-08-08 Ulrich Drepper + * sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S (sem_timedwait): + Optimize code path used when FUTEX_CLOCK_REALTIME is supported. + * sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S (__pthread_cond_wait): Optimize by avoiding use of callee-safe register. diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S index 9576283..0291beb 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S @@ -65,34 +65,9 @@ sem_timedwait: retq /* Check whether the timeout value is valid. */ -1: pushq %r12 - cfi_adjust_cfa_offset(8) - cfi_rel_offset(%r12, 0) - pushq %r13 - cfi_adjust_cfa_offset(8) - cfi_rel_offset(%r13, 0) - pushq %r14 - cfi_adjust_cfa_offset(8) - cfi_rel_offset(%r14, 0) -#ifdef __ASSUME_FUTEX_CLOCK_REALTIME -# define STACKFRAME 8 -#else -# define STACKFRAME 24 -#endif - subq $STACKFRAME, %rsp - cfi_adjust_cfa_offset(STACKFRAME) - - movq %rdi, %r12 - movq %rsi, %r13 - - /* Check for invalid nanosecond field. */ - cmpq $1000000000, 8(%r13) - movl $EINVAL, %r14d +1: cmpq $1000000000, 8(%rsi) jae 6f - LOCK - addq $1, NWAITERS(%r12) - #ifndef __ASSUME_FUTEX_CLOCK_REALTIME # ifdef PIC cmpl $0, __have_futex_clock_realtime(%rip) @@ -102,15 +77,22 @@ sem_timedwait: je .Lreltmo #endif + /* This push is only needed to store the sem_t pointer for the + exception handler. */ + pushq %rdi + cfi_adjust_cfa_offset(8) + + movq %rsi, %r10 + + LOCK + addq $1, NWAITERS(%rdi) + .LcleanupSTART: 13: call __pthread_enable_asynccancel - movl %eax, (%rsp) + movl %eax, %r8d - movq %r13, %r10 -#if VALUE == 0 - movq %r12, %rdi -#else - leaq VALUE(%r12), %rdi +#if VALUE != 0 + leaq VALUE(%rdi), %rdi #endif movl $0xffffffff, %r9d movl $FUTEX_WAIT_BITSET|FUTEX_CLOCK_REALTIME, %esi @@ -118,22 +100,26 @@ sem_timedwait: movl $SYS_futex, %eax xorl %edx, %edx syscall - movq %rax, %r14 + movq %rax, %r9 +#if VALUE != 0 + leaq -VALUE(%rdi), %rdi +#endif - movl (%rsp), %edi + xchgq %r8, %rdi call __pthread_disable_asynccancel .LcleanupEND: + movq %r8, %rdi - testq %r14, %r14 + testq %r9, %r9 je 11f - cmpq $-EWOULDBLOCK, %r14 + cmpq $-EWOULDBLOCK, %r9 jne 3f 11: #if VALUE == 0 - movl (%r12), %eax + movl (%rdi), %eax #else - movl VALUE(%r12), %eax + movl VALUE(%rdi), %eax #endif 14: testl %eax, %eax je 13b @@ -141,49 +127,74 @@ sem_timedwait: leaq -1(%rax), %rcx LOCK #if VALUE == 0 - cmpxchgl %ecx, (%r12) + cmpxchgl %ecx, (%rdi) #else - cmpxchgl %ecx, VALUE(%r12) + cmpxchgl %ecx, VALUE(%rdi) #endif jne 14b -10: xorl %eax, %eax + xorl %eax, %eax 15: LOCK - subq $1, NWAITERS(%r12) + subq $1, NWAITERS(%rdi) - addq $STACKFRAME, %rsp - cfi_adjust_cfa_offset(-STACKFRAME) - popq %r14 - cfi_adjust_cfa_offset(-8) - cfi_restore(%r14) - popq %r13 + leaq 8(%rsp), %rsp cfi_adjust_cfa_offset(-8) - cfi_restore(%r13) - popq %r12 - cfi_adjust_cfa_offset(-8) - cfi_restore(%r12) retq - cfi_adjust_cfa_offset(STACKFRAME + 3 * 8) - cfi_rel_offset(%r12, STACKFRAME + 2 * 8) - cfi_rel_offset(%r13, STACKFRAME + 1 * 8) - cfi_rel_offset(%r14, STACKFRAME) -3: negq %r14 -6: + cfi_adjust_cfa_offset(8) +3: negq %r9 #if USE___THREAD movq errno@gottpoff(%rip), %rdx - movl %r14d, %fs:(%rdx) + movl %r9d, %fs:(%rdx) #else callq __errno_location@plt - movl %r14d, (%rax) + movl %r9d, (%rax) #endif orl $-1, %eax jmp 15b + cfi_adjust_cfa_offset(-8) +6: +#if USE___THREAD + movq errno@gottpoff(%rip), %rdx + movl $EINVAL, %fs:(%rdx) +#else + callq __errno_location@plt + movl $EINVAL, (%rax) +#endif + + orl $-1, %eax + + retq + #ifndef __ASSUME_FUTEX_CLOCK_REALTIME .Lreltmo: + pushq %r12 + cfi_adjust_cfa_offset(8) + cfi_rel_offset(%r12, 0) + pushq %r13 + cfi_adjust_cfa_offset(8) + cfi_rel_offset(%r13, 0) + pushq %r14 + cfi_adjust_cfa_offset(8) + cfi_rel_offset(%r14, 0) + +#ifdef __ASSUME_FUTEX_CLOCK_REALTIME +# define STACKFRAME 8 +#else +# define STACKFRAME 24 +#endif + subq $STACKFRAME, %rsp + cfi_adjust_cfa_offset(STACKFRAME) + + movq %rdi, %r12 + movq %rsi, %r13 + + LOCK + addq $1, NWAITERS(%r12) + 7: xorl %esi, %esi movq %rsp, %rdi movq $VSYSCALL_ADDR_vgettimeofday, %rax @@ -202,7 +213,7 @@ sem_timedwait: decq %rdi 5: testq %rdi, %rdi movl $ETIMEDOUT, %r14d - js 6b /* Time is already up. */ + js 36f /* Time is already up. */ movq %rdi, (%rsp) /* Store relative timeout. */ movq %rsi, 8(%rsp) @@ -235,7 +246,7 @@ sem_timedwait: testq %r14, %r14 je 9f cmpq $-EWOULDBLOCK, %r14 - jne 3b + jne 33f 9: # if VALUE == 0 @@ -254,15 +265,54 @@ sem_timedwait: cmpxchgl %ecx, VALUE(%r12) # endif jne 8b - jmp 10b + + xorl %eax, %eax + +45: LOCK + subq $1, NWAITERS(%r12) + + addq $STACKFRAME, %rsp + cfi_adjust_cfa_offset(-STACKFRAME) + popq %r14 + cfi_adjust_cfa_offset(-8) + cfi_restore(%r14) + popq %r13 + cfi_adjust_cfa_offset(-8) + cfi_restore(%r13) + popq %r12 + cfi_adjust_cfa_offset(-8) + cfi_restore(%r12) + retq + + cfi_adjust_cfa_offset(STACKFRAME + 3 * 8) + cfi_rel_offset(%r12, STACKFRAME + 2 * 8) + cfi_rel_offset(%r13, STACKFRAME + 1 * 8) + cfi_rel_offset(%r14, STACKFRAME) +33: negq %r14 +36: +#if USE___THREAD + movq errno@gottpoff(%rip), %rdx + movl %r14d, %fs:(%rdx) +#else + callq __errno_location@plt + movl %r14d, (%rax) #endif + + orl $-1, %eax + jmp 45b +#endif + cfi_endproc .size sem_timedwait,.-sem_timedwait .type sem_timedwait_cleanup,@function sem_timedwait_cleanup: + cfi_startproc + cfi_adjust_cfa_offset(8) + + movq (%rsp), %rdi LOCK - subq $1, NWAITERS(%r12) + subq $1, NWAITERS(%rdi) movq %rax, %rdi .LcallUR: call _Unwind_Resume@PLT @@ -272,6 +322,30 @@ sem_timedwait_cleanup: .size sem_timedwait_cleanup,.-sem_timedwait_cleanup +#ifndef __ASSUME_FUTEX_CLOCK_REALTIME + .type sem_timedwait_cleanup2,@function +sem_timedwait_cleanup2: + cfi_startproc + cfi_adjust_cfa_offset(STACKFRAME + 3 * 8) + cfi_rel_offset(%r12, STACKFRAME + 2 * 8) + cfi_rel_offset(%r13, STACKFRAME + 1 * 8) + cfi_rel_offset(%r14, STACKFRAME) + + LOCK + subq $1, NWAITERS(%r12) + movq %rax, %rdi + movq STACKFRAME(%rsp), %r14 + movq STACKFRAME+8(%rsp), %r13 + movq STACKFRAME+16(%rsp), %r12 +.LcallUR2: + call _Unwind_Resume@PLT + hlt +.LENDCODE2: + cfi_endproc + .size sem_timedwait_cleanup2,.-sem_timedwait_cleanup2 +#endif + + .section .gcc_except_table,"a",@progbits .LexceptSTART: .byte DW_EH_PE_omit # @LPStart format @@ -286,13 +360,19 @@ sem_timedwait_cleanup: #ifndef __ASSUME_FUTEX_CLOCK_REALTIME .uleb128 .LcleanupSTART2-.LSTARTCODE .uleb128 .LcleanupEND2-.LcleanupSTART2 - .uleb128 sem_timedwait_cleanup-.LSTARTCODE + .uleb128 sem_timedwait_cleanup2-.LSTARTCODE .uleb128 0 #endif .uleb128 .LcallUR-.LSTARTCODE .uleb128 .LENDCODE-.LcallUR .uleb128 0 .uleb128 0 +#ifndef __ASSUME_FUTEX_CLOCK_REALTIME + .uleb128 .LcallUR2-.LSTARTCODE + .uleb128 .LENDCODE2-.LcallUR2 + .uleb128 0 + .uleb128 0 +#endif .Lcstend: -- cgit v1.1