From 7bb672b9de470892efd5699d575fb057b4d9d4ee Mon Sep 17 00:00:00 2001 From: "Ryan S. Arnold" Date: Fri, 24 Jul 2009 10:30:00 -0500 Subject: Enable 64-bit FPSCR for POWER6 and include some dynamic feature checks for DFP facility availability. commit edba7a54eb83c37610b15454a21d54f47ec9dee7 Author: Ulrich Drepper Date: Mon Nov 17 02:49:45 2008 +0000 2008-11-13 Ryan S. Arnold [BZ #6411] * sysdeps/powerpc/fpu/Makefile: Added test case tst-setcontext-fpscr. * sysdeps/powerpc/fpu/feholdexcpt.c (_FPU_MASK_ALL): Define to replace magic numbers. * sysdeps/powerpc/fpu/fenv_libc.h (fesetenv_register): Dynamically choose mtfsf insn based on PPC_FEATURE_HAS_DFP. (relax_fenv_state): Same as above. (FPSCR_29): Reserve bit in ISA 2.05. (FPSCR_NI): Provide define for compat. * sysdeps/powerpc/fpu/fesetenv.c (_FPU_MASK_ALL): Define to replace magic numbers. * sysdeps/powerpc/fpu/feupdateenv.c (_FPU_MASK_ALL): Define to replace magic numbers. * sysdeps/powerpc/fpu/tst-setcontext-fpscr.c: New file. Test case to test setcontext and swapcontext with dynamic 64-bit FPSCR detection. * sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S (__longjmp): Adjust access to hwcap to account for hwcap size increase to uint64_t. * sysdeps/powerpc/powerpc32/fpu/setjmp-common.S (__sigsetjmp ): Likewise. * sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S (*setcontext): Likewise. * sysdeps/unix/sysv/linux/powerpc/powerpc32/power6/fpu/setcontext.S: New file. * sysdeps/unix/sysv/linux/powerpc/powerpc32/power6/fpu/swapcontext.S: New file. * sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S (*setcontext): dynamically select mtfsf insn based on PPC_FEATURE_HAS_DFP. Adjust access to hwcap to account for hwcap size increase to uint64_t. * sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S (*swapcontext): dynamically select mtfsf insn based on PPC_FEATURE_HAS_DFP. Adjust access to hwcap to account for hwcap size increase to uint64_t. * sysdeps/unix/sysv/linux/powerpc/powerpc64/power6/fpu/setcontext.S: New file. * sysdeps/unix/sysv/linux/powerpc/powerpc64/power6/fpu/swapcontext.S: New file. * sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S (*setcontext): dynamically select mtfsf insn based on PPC_FEATURE_HAS_DFP. * sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S (*swapcontext): dynamically select mtfsf insn based on PPC_FEATURE_HAS_DFP. --- sysdeps/powerpc/fpu/Makefile | 4 + sysdeps/powerpc/fpu/feholdexcpt.c | 6 +- sysdeps/powerpc/fpu/fenv_libc.h | 26 +- sysdeps/powerpc/fpu/fesetenv.c | 20 +- sysdeps/powerpc/fpu/feupdateenv.c | 10 +- sysdeps/powerpc/fpu/tst-setcontext-fpscr.c | 358 +++++++++++++++++++++ sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S | 8 +- sysdeps/powerpc/powerpc32/fpu/setjmp-common.S | 8 +- .../linux/powerpc/powerpc32/getcontext-common.S | 8 +- .../powerpc/powerpc32/power6/fpu/setcontext.S | 2 + .../powerpc/powerpc32/power6/fpu/swapcontext.S | 2 + .../linux/powerpc/powerpc32/setcontext-common.S | 50 +-- .../linux/powerpc/powerpc32/swapcontext-common.S | 60 ++-- .../powerpc/powerpc64/power6/fpu/setcontext.S | 2 + .../powerpc/powerpc64/power6/fpu/swapcontext.S | 2 + .../unix/sysv/linux/powerpc/powerpc64/setcontext.S | 58 +++- .../sysv/linux/powerpc/powerpc64/swapcontext.S | 58 +++- 17 files changed, 593 insertions(+), 89 deletions(-) create mode 100644 sysdeps/powerpc/fpu/tst-setcontext-fpscr.c create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/power6/fpu/setcontext.S create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/power6/fpu/swapcontext.S create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc64/power6/fpu/setcontext.S create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc64/power6/fpu/swapcontext.S diff --git a/sysdeps/powerpc/fpu/Makefile b/sysdeps/powerpc/fpu/Makefile index ce67ff8..ffacf1a 100644 --- a/sysdeps/powerpc/fpu/Makefile +++ b/sysdeps/powerpc/fpu/Makefile @@ -5,3 +5,7 @@ libm-tests += test-powerpc-snan # libm needs ld.so to access dl_hwcap $(objpfx)libm.so: $(elfobjdir)/ld.so endif + +ifeq ($(subdir),stdlib) +tests += tst-setcontext-fpscr +endif diff --git a/sysdeps/powerpc/fpu/feholdexcpt.c b/sysdeps/powerpc/fpu/feholdexcpt.c index c943224..b74b18a 100644 --- a/sysdeps/powerpc/fpu/feholdexcpt.c +++ b/sysdeps/powerpc/fpu/feholdexcpt.c @@ -1,5 +1,5 @@ /* Store current floating-point environment and clear exceptions. - Copyright (C) 1997, 2005 Free Software Foundation, Inc. + Copyright (C) 1997, 2005, 2008 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -18,6 +18,8 @@ 02111-1307 USA. */ #include +#include +#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM | _FPU_MASK_XM | _FPU_MASK_IM) int feholdexcept (fenv_t *envp) @@ -35,7 +37,7 @@ feholdexcept (fenv_t *envp) /* If the old env had any eabled exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the FPU to run faster because it always takes the default action and can not generate SIGFPE. */ - if ((old.l[1] & 0x000000F8) != 0) + if ((old.l[1] & _FPU_MASK_ALL) != 0) (void)__fe_mask_env (); /* Put the new state in effect. */ diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h index 6f116b6..c70f851 100644 --- a/sysdeps/powerpc/fpu/fenv_libc.h +++ b/sysdeps/powerpc/fpu/fenv_libc.h @@ -21,6 +21,8 @@ #define _FENV_LIBC_H 1 #include +#include +#include libm_hidden_proto (__fe_nomask_env) @@ -34,7 +36,13 @@ libm_hidden_proto (__fe_nomask_env) /* Equivalent to fesetenv, but takes a fenv_t instead of a pointer. */ #define fesetenv_register(env) \ - ({ double d = (env); asm volatile ("mtfsf 0xff,%0" : : "f" (d)); }) + do { \ + double d = (env); \ + if(GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \ + asm volatile ("mtfsf 0xff,%0,1,0" : : "f" (d)); \ + else \ + asm volatile ("mtfsf 0xff,%0" : : "f" (d)); \ + } while(0) /* This very handy macro: - Sets the rounding mode to 'round to nearest'; @@ -42,7 +50,12 @@ libm_hidden_proto (__fe_nomask_env) - Prevents exceptions from being raised for inexact results. These things happen to be exactly what you need for typical elementary functions. */ -#define relax_fenv_state() asm ("mtfsfi 7,0") +#define relax_fenv_state() \ + do { \ + if(GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \ + asm ("mtfsfi 7,0,1"); \ + asm ("mtfsfi 7,0"); \ + } while(0) /* Set/clear a particular FPSCR bit (for instance, reset_fpscr_bit(FPSCR_VE); @@ -122,10 +135,19 @@ enum { FPSCR_UE, /* underflow exception enable */ FPSCR_ZE, /* zero divide exception enable */ FPSCR_XE, /* inexact exception enable */ +#ifdef _ARCH_PWR6 + FPSCR_29, /* Reserved in ISA 2.05 */ +#else FPSCR_NI /* non-IEEE mode (typically, no denormalised numbers) */ +#endif /* _ARCH_PWR6 */ /* the remaining two least-significant bits keep the rounding mode */ }; +#ifdef _ARCH_PWR6 + /* Not supported in ISA 2.05. Provided for source compat only. */ +# define FPSCR_NI 29 +#endif /* _ARCH_PWR6 */ + /* This operation (i) sets the appropriate FPSCR bits for its parameter, (ii) converts SNaN to the corresponding NaN, and (iii) otherwise passes its parameter through unchanged (in particular, -0 diff --git a/sysdeps/powerpc/fpu/fesetenv.c b/sysdeps/powerpc/fpu/fesetenv.c index 5a0c742..fa81d78 100644 --- a/sysdeps/powerpc/fpu/fesetenv.c +++ b/sysdeps/powerpc/fpu/fesetenv.c @@ -1,5 +1,6 @@ /* Install given floating-point environment. - Copyright (C) 1997,99,2000,01,02,07 Free Software Foundation, Inc. + Copyright (C) 1997, 1999, 2000, 2001, 2007, 2008 + Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -18,8 +19,11 @@ 02111-1307 USA. */ #include +#include #include +#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM | _FPU_MASK_XM | _FPU_MASK_IM) + int __fesetenv (const fenv_t *envp) { @@ -29,18 +33,18 @@ __fesetenv (const fenv_t *envp) new.fenv = *envp; old.fenv = fegetenv_register (); - /* If the old env has no eabled exceptions and the new env has any enabled - exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put - the hardware into "precise mode" and may cause the FPU to run slower on - some hardware. */ - if ((old.l[1] & 0x000000F8) == 0 && (new.l[1] & 0x000000F8) != 0) + /* If the old env has no enabled exceptions and the new env has any enabled + exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the + hardware into "precise mode" and may cause the FPU to run slower on some + hardware. */ + if ((old.l[1] & _FPU_MASK_ALL) == 0 && (new.l[1] & _FPU_MASK_ALL) != 0) (void)__fe_nomask_env (); - /* If the old env had any eabled exceptions and the new env has no enabled + /* If the old env had any enabled exceptions and the new env has no enabled exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the FPU to run faster because it always takes the default action and can not generate SIGFPE. */ - if ((old.l[1] & 0x000000F8) != 0 && (new.l[1] & 0x000000F8) == 0) + if ((old.l[1] & _FPU_MASK_ALL) != 0 && (new.l[1] & _FPU_MASK_ALL) == 0) (void)__fe_mask_env (); fesetenv_register (*envp); diff --git a/sysdeps/powerpc/fpu/feupdateenv.c b/sysdeps/powerpc/fpu/feupdateenv.c index 5a4000f..5fca301 100644 --- a/sysdeps/powerpc/fpu/feupdateenv.c +++ b/sysdeps/powerpc/fpu/feupdateenv.c @@ -1,5 +1,6 @@ /* Install given floating-point environment and raise exceptions. - Copyright (C) 1997,99,2000,01,07 Free Software Foundation, Inc. + Copyright (C) 1997, 1999, 2000, 2001, 2007, 2008 + Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1997. @@ -19,8 +20,11 @@ 02111-1307 USA. */ #include +#include #include +#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM | _FPU_MASK_XM | _FPU_MASK_IM) + int __feupdateenv (const fenv_t *envp) { @@ -39,14 +43,14 @@ __feupdateenv (const fenv_t *envp) exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the hardware into "precise mode" and may cause the FPU to run slower on some hardware. */ - if ((old.l[1] & 0x000000F8) == 0 && (new.l[1] & 0x000000F8) != 0) + if ((old.l[1] & _FPU_MASK_ALL) == 0 && (new.l[1] & _FPU_MASK_ALL) != 0) (void)__fe_nomask_env (); /* If the old env had any eabled exceptions and the new env has no enabled exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the FPU to run faster because it always takes the default action and can not generate SIGFPE. */ - if ((old.l[1] & 0x000000F8) != 0 && (new.l[1] & 0x000000F8) == 0) + if ((old.l[1] & _FPU_MASK_ALL) != 0 && (new.l[1] & _FPU_MASK_ALL) == 0) (void)__fe_mask_env (); /* Atomically enable and raise (if appropriate) exceptions set in `new'. */ diff --git a/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c b/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c new file mode 100644 index 0000000..973fb3f --- /dev/null +++ b/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c @@ -0,0 +1,358 @@ +/* Copyright (C) 2001,2002,2004,2006,2007,2008 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ryan S. Arnold + Sean Curry + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static ucontext_t ctx[3]; + + +volatile int global; + + +static int back_in_main; + + +volatile static ElfW(auxv_t) *auxv = NULL; + +ElfW(Addr) query_auxv(int type) +{ + FILE *auxv_f; + ElfW(auxv_t) auxv_struct; + ElfW(auxv_t) *auxv_temp; + int i = 0; + + /* if the /proc/self/auxv file has not been manually copied into the heap + yet, then do it */ + + if(auxv == NULL) + { + auxv_f = fopen("/proc/self/auxv", "r"); + + if(auxv_f == 0) + { + perror("Error opening file for reading"); + return 0; + } + auxv = (ElfW(auxv_t) *)malloc(getpagesize()); + + do + { + fread(&auxv_struct, sizeof(ElfW(auxv_t)), 1, auxv_f); + auxv[i] = auxv_struct; + i++; + } while(auxv_struct.a_type != AT_NULL); + } + + auxv_temp = (ElfW(auxv_t) *)auxv; + i = 0; + do + { + if(auxv_temp[i].a_type == type) + { + return auxv_temp[i].a_un.a_val; + } + i++; + } while (auxv_temp[i].a_type != AT_NULL); + + return 0; +} + +typedef unsigned long long di_fpscr_t __attribute__ ((__mode__ (__DI__))); +typedef unsigned int si_fpscr_t __attribute__ ((__mode__ (__SI__))); + +#define _FPSCR_RESERVED 0xfffffff8ffffff04ULL + +#define _FPSCR_TEST0_DRN 0x0000000400000000ULL +#define _FPSCR_TEST0_RN 0x0000000000000003ULL + +#define _FPSCR_TEST1_DRN 0x0000000300000000ULL +#define _FPSCR_TEST1_RN 0x0000000000000002ULL + +/* Macros for accessing the hardware control word on Power6[x]. */ +# define _GET_DI_FPSCR(__fpscr) ({ \ + union { double d; \ + di_fpscr_t fpscr; } \ + tmp __attribute__ ((__aligned__(8))); \ + __asm__ ("mffs 0; stfd%U0 0,%0" : "=m" (tmp.d) : : "fr0"); \ + (__fpscr)=tmp.fpscr; \ + tmp.fpscr; }) + +# define _SET_DI_FPSCR(__fpscr) { \ + union { double d; di_fpscr_t fpscr; } \ + tmp __attribute__ ((__aligned__(8))); \ + tmp.fpscr = __fpscr; \ + /* Set the entire 64-bit FPSCR. */ \ + __asm__ ("lfd%U0 0,%0; mtfsf 255,0,1,0" : : "m" (tmp.d) : "fr0"); \ +} + +# define _GET_SI_FPSCR(__fpscr) ({ \ + union { double d; \ + si_fpscr_t cw[2]; } \ + tmp __attribute__ ((__aligned__(8))); \ + __asm__ ("mffs 0; stfd%U0 0,%0" : "=m" (tmp.d) : : "fr0"); \ + (__fpscr)=tmp.cw[1]; \ + tmp.cw[0]; }) + +# define _SET_SI_FPSCR(__fpscr) { \ + union { double d; si_fpscr_t fpscr[2]; } \ + tmp __attribute__ ((__aligned__(8))); \ + /* More-or-less arbitrary; this is a QNaN. */ \ + tmp.fpscr[0] = 0xFFF80000; \ + tmp.fpscr[1] = __fpscr; \ + __asm__ ("lfd%U0 0,%0; mtfsf 255,0" : : "m" (tmp.d) : "fr0"); \ +} + +void prime_special_regs(int which) +{ + ElfW(Addr) a_val; + + di_fpscr_t di_fpscr __attribute__ ((__aligned__(8))); + + a_val = query_auxv(AT_HWCAP); + if(a_val == -1) + { + puts ("querying the auxv for the hwcap failed"); + _exit (1); + } + + /* Indicates a 64-bit FPSCR. */ + if (a_val & PPC_FEATURE_HAS_DFP) + { + _GET_DI_FPSCR(di_fpscr); + + /* Overwrite the existing DRN and RN if there is one. */ + if (which == 0) + di_fpscr = ((di_fpscr & _FPSCR_RESERVED) | (_FPSCR_TEST0_DRN | _FPSCR_TEST0_RN)); + else + di_fpscr = ((di_fpscr & _FPSCR_RESERVED) | (_FPSCR_TEST1_DRN | _FPSCR_TEST1_RN)); + puts ("Priming 64-bit FPSCR with:"); + printf("0x%.16llx\n",(unsigned long long int)di_fpscr); + + _SET_DI_FPSCR(di_fpscr); + } + else + { + puts ("32-bit FPSCR found and will be tested."); + _GET_SI_FPSCR(di_fpscr); + + /* Overwrite the existing RN if there is one. */ + if (which == 0) + di_fpscr = ((di_fpscr & _FPSCR_RESERVED) | (_FPSCR_TEST0_RN)); + else + di_fpscr = ((di_fpscr & _FPSCR_RESERVED) | (_FPSCR_TEST1_RN)); + puts ("Priming 32-bit FPSCR with:"); + printf("0x%.8lx\n",(unsigned long int) di_fpscr); + + _SET_SI_FPSCR(di_fpscr); + } +} + +void clear_special_regs(void) +{ + ElfW(Addr) a_val; + + di_fpscr_t di_fpscr __attribute__ ((__aligned__(8))); + + union { + double d; + unsigned long long int lli; + unsigned int li[2]; + } dlli; + + a_val = query_auxv(AT_HWCAP); + if(a_val == -1) + { + puts ("querying the auxv for the hwcap failed"); + _exit (1); + } + +#if __WORDSIZE == 32 + dlli.d = ctx[0].uc_mcontext.uc_regs->fpregs.fpscr; +#else + dlli.d = ctx[0].uc_mcontext.fp_regs[32]; +#endif + + puts("The FPSCR value saved in the ucontext_t is:"); + + /* Indicates a 64-bit FPSCR. */ + if (a_val & PPC_FEATURE_HAS_DFP) + { + printf("0x%.16llx\n",dlli.lli); + di_fpscr = 0x0; + puts ("Clearing the 64-bit FPSCR to:"); + printf("0x%.16llx\n",(unsigned long long int) di_fpscr); + + _SET_DI_FPSCR(di_fpscr); + } + else + { + printf("0x%.8x\n",(unsigned int) dlli.li[1]); + di_fpscr = 0x0; + puts ("Clearing the 32-bit FPSCR to:"); + printf("0x%.8lx\n",(unsigned long int) di_fpscr); + + _SET_SI_FPSCR(di_fpscr); + } +} + +void test_special_regs(int which) +{ + ElfW(Addr) a_val; + unsigned long long int test; + + di_fpscr_t di_fpscr __attribute__ ((__aligned__(8))); + + a_val = query_auxv(AT_HWCAP); + if(a_val == -1) + { + puts ("querying the auxv for the hwcap failed"); + _exit (2); + } + + /* Indicates a 64-bit FPSCR. */ + if (a_val & PPC_FEATURE_HAS_DFP) + { + _GET_DI_FPSCR(di_fpscr); + + if (which == 0) + puts ("After setcontext the 64-bit FPSCR contains:"); + else + puts ("After swapcontext the 64-bit FPSCR contains:"); + + printf("0x%.16llx\n",(unsigned long long int) di_fpscr); + test = (_FPSCR_TEST0_DRN | _FPSCR_TEST0_RN); + if((di_fpscr & (test)) != (test)) + { + printf ("%s: DRN and RN bits set before getcontext were not preserved across [set|swap]context call: %m",__FUNCTION__); + _exit (3); + } + } + else + { + _GET_SI_FPSCR(di_fpscr); + if (which == 0) + puts ("After setcontext the 32-bit FPSCR contains:"); + else + puts ("After swapcontext the 32-bit FPSCR contains:"); + + printf("0x%.8lx\n",(unsigned long int) di_fpscr); + test = _FPSCR_TEST0_RN; + if((di_fpscr & test) != test) + { + printf ("%s: RN bit set before getcontext was not preserved across [set|swap]context call: %m",__FUNCTION__); + _exit (4); + } + } +} + + +static void +check_called (void) +{ + if (back_in_main == 0) + { + puts ("program did not reach main again"); + _exit (5); + } +} + + +int +main (void) +{ + atexit (check_called); + + puts ("priming the FPSCR with a marker"); + prime_special_regs (0); + + puts ("making contexts"); + if (getcontext (&ctx[0]) != 0) + { + if (errno == ENOSYS) + { + back_in_main = 1; + exit (0); + } + + printf ("%s: getcontext: %m\n", __FUNCTION__); + exit (6); + } + + /* Play some tricks with this context. */ + if (++global == 1) + { + clear_special_regs ( ); + if (setcontext (&ctx[0]) != 0) + { + printf ("%s: setcontext: %m\n", __FUNCTION__); + exit (7); + } + } + if (global != 2) + { + printf ("%s: 'global' not incremented twice\n", __FUNCTION__); + exit (8); + } + + test_special_regs (0); + + global = 0; + if (getcontext (&ctx[0]) != 0) + { + printf ("%s: getcontext: %m\n", __FUNCTION__); + exit (9); + } + + if (++global == 1) + { + puts ("priming the FPSCR with a marker"); + prime_special_regs (1); + + puts ("swapping contexts"); + if (swapcontext (&ctx[1], &ctx[0]) != 0) + { + printf ("%s: swapcontext: %m\n", __FUNCTION__); + exit (9); + } + } + if (global != 2) + { + printf ("%s: 'global' not incremented twice\n", __FUNCTION__); + exit (10); + } + + test_special_regs (1); + + puts ("back at main program"); + back_in_main = 1; + + puts ("test succeeded"); + return 0; +} diff --git a/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S b/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S index 404f403..7c6e27c 100644 --- a/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S +++ b/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S @@ -47,16 +47,16 @@ ENTRY (BP_SYM (__longjmp)) lwz r5,_rtld_global_ro@got(r5) mtlr r6 cfi_same_value (lr) - lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r5) + lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5) # else lwz r5,_dl_hwcap@got(r5) mtlr r6 cfi_same_value (lr) - lwz r5,0(r5) + lwz r5,4(r5) # endif # else - lis r5,_dl_hwcap@ha - lwz r5,_dl_hwcap@l(r5) + lis r5,(_dl_hwcap+4)@ha + lwz r5,(_dl_hwcap+4)@l(r5) # endif andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) beq L(no_vmx) diff --git a/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S b/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S index 851480d..b7d1abc 100644 --- a/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S +++ b/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S @@ -98,14 +98,14 @@ ENTRY (BP_SYM (__sigsetjmp)) cfi_same_value (lr) # ifdef SHARED lwz r5,_rtld_global_ro@got(r5) - lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r5) + lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5) # else lwz r5,_dl_hwcap@got(r5) - lwz r5,0(r5) + lwz r5,4(r5) # endif # else - lis r6,_dl_hwcap@ha - lwz r5,_dl_hwcap@l(r6) + lis r6,(_dl_hwcap+4)@ha + lwz r5,(_dl_hwcap+4)@l(r6) # endif andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) beq L(no_vmx) diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S b/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S index c28c346..63e1773 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S @@ -157,15 +157,15 @@ ENTRY(__CONTEXT_FUNC_NAME) # ifdef SHARED lwz r7,_rtld_global_ro@got(r7) mtlr r8 - lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7) + lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7) # else lwz r7,_dl_hwcap@got(r7) mtlr r8 - lwz r7,0(r7) + lwz r7,4(r7) # endif # else - lis r7,_dl_hwcap@ha - lwz r7,_dl_hwcap@l(r7) + lis r7,(_dl_hwcap+4)@ha + lwz r7,(_dl_hwcap+4)@l(r7) # endif andis. r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16) diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/power6/fpu/setcontext.S b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power6/fpu/setcontext.S new file mode 100644 index 0000000..7e44cdf --- /dev/null +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power6/fpu/setcontext.S @@ -0,0 +1,2 @@ +#define _ARCH_PWR6 +#include_next diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/power6/fpu/swapcontext.S b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power6/fpu/swapcontext.S new file mode 100644 index 0000000..616e3db --- /dev/null +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power6/fpu/swapcontext.S @@ -0,0 +1,2 @@ +#define _ARCH_PWR6 +#include_next diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S b/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S index 40a7a24..91ac436 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S @@ -1,5 +1,5 @@ /* Jump to a new context powerpc32 common. - Copyright (C) 2005, 2006 Free Software Foundation, Inc. + Copyright (C) 2005, 2006, 2008 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -71,33 +71,34 @@ ENTRY(__CONTEXT_FUNC_NAME) cmpwi r3,0 bne 3f /* L(error_exit) */ -#ifdef __CONTEXT_ENABLE_FPRS -# ifdef __CONTEXT_ENABLE_VRS -# ifdef PIC +#ifdef PIC mflr r8 -# ifdef HAVE_ASM_PPC_REL16 +# ifdef HAVE_ASM_PPC_REL16 bcl 20,31,1f 1: mflr r7 addis r7,r7,_GLOBAL_OFFSET_TABLE_-1b@ha addi r7,r7,_GLOBAL_OFFSET_TABLE_-1b@l -# else +# else bl _GLOBAL_OFFSET_TABLE_@local-4 mflr r7 -# endif -# ifdef SHARED +# endif +# ifdef SHARED lwz r7,_rtld_global_ro@got(r7) mtlr r8 - lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7) -# else + lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7) +# else lwz r7,_dl_hwcap@got(r7) mtlr r8 - lwz r7,0(r7) -# endif -# else - lis r7,_dl_hwcap@ha - lwz r7,_dl_hwcap@l(r7) -# endif - andis. r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16) + lwz r7,4(r7) +# endif +#else + lis r7,(_dl_hwcap+4)@ha + lwz r7,(_dl_hwcap+4)@l(r7) +#endif + +#ifdef __CONTEXT_ENABLE_FPRS +# ifdef __CONTEXT_ENABLE_VRS + andis. r6,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16) la r10,(_UC_VREGS)(r31) beq 2f /* L(has_no_vec) */ @@ -199,7 +200,20 @@ ENTRY(__CONTEXT_FUNC_NAME) /* Restore the floating-point registers */ lfd fp31,_UC_FREGS+(32*8)(r31) lfd fp0,_UC_FREGS+(0*8)(r31) - mtfsf 0xff,fp31 +# ifdef _ARCH_PWR6 + /* Use the extended four-operand version of the mtfsf insn. */ + mtfsf 0xff,fp0,1,0 +# else + /* Availability of DFP indicates a 64-bit FPSCR. */ + andi. r6,r7,PPC_FEATURE_HAS_DFP + beq 7f + /* Use the extended four-operand version of the mtfsf insn. */ + mtfsf 0xff,fp31,1,0 + b 8f + /* Continue to operate on the FPSCR as if it were 32-bits. */ +7: mtfsf 0xff,fp31 +8: +# endif /* _ARCH_PWR6 */ lfd fp1,_UC_FREGS+(1*8)(r31) lfd fp2,_UC_FREGS+(2*8)(r31) lfd fp3,_UC_FREGS+(3*8)(r31) diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S b/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S index 0c7b945..77c9822 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S @@ -1,5 +1,5 @@ /* Save current context and jump to a new context. - Copyright (C) 2005, 2006 Free Software Foundation, Inc. + Copyright (C) 2005, 2006, 2008 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -143,33 +143,34 @@ ENTRY(__CONTEXT_FUNC_NAME) stfd fp30,_UC_FREGS+(30*8)(r3) stfd fp31,_UC_FREGS+(31*8)(r3) stfd fp0,_UC_FREGS+(32*8)(r3) - -# ifdef __CONTEXT_ENABLE_VRS -# ifdef PIC + +# ifdef PIC mflr r8 -# ifdef HAVE_ASM_PPC_REL16 +# ifdef HAVE_ASM_PPC_REL16 bcl 20,31,1f 1: mflr r7 addis r7,r7,_GLOBAL_OFFSET_TABLE_-1b@ha addi r7,r7,_GLOBAL_OFFSET_TABLE_-1b@l -# else +# else bl _GLOBAL_OFFSET_TABLE_@local-4 mflr r7 -# endif -# ifdef SHARED +# endif +# ifdef SHARED lwz r7,_rtld_global_ro@got(r7) mtlr r8 - lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7) -# else + lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7) +# else lwz r7,_dl_hwcap@got(r7) mtlr r8 - lwz r7,0(r7) -# endif -# else - lis r7,_dl_hwcap@ha - lwz r7,_dl_hwcap@l(r7) + lwz r7,4(r7) # endif - andis. r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16) +# else + lis r7,(_dl_hwcap+4)@ha + lwz r7,(_dl_hwcap+4)@l(r7) +# endif + +# ifdef __CONTEXT_ENABLE_VRS + andis. r6,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16) la r10,(_UC_VREGS)(r3) la r9,(_UC_VREGS+16)(r3) @@ -305,8 +306,8 @@ ENTRY(__CONTEXT_FUNC_NAME) # ifdef HAVE_ASM_PPC_REL16 bcl 20,31,5f 5: mflr r7 - addis r7,r7,_GLOBAL_OFFSET_TABLE_-1b@ha - addi r7,r7,_GLOBAL_OFFSET_TABLE_-1b@l + addis r7,r7,_GLOBAL_OFFSET_TABLE_-5b@ha + addi r7,r7,_GLOBAL_OFFSET_TABLE_-5b@l # else bl _GLOBAL_OFFSET_TABLE_@local-4 mflr r7 @@ -314,14 +315,14 @@ ENTRY(__CONTEXT_FUNC_NAME) mtlr r8 # ifdef SHARED lwz r7,_rtld_global_ro@got(r7) - lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r7) + lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7) # else lwz r7,_dl_hwcap@got(r7) - lwz r7,0(r7) + lwz r7,4(r7) # endif # else - lis r7,_dl_hwcap@ha - lwz r7,_dl_hwcap@l(r7) + lis r7,(_dl_hwcap+4)@ha + lwz r7,(_dl_hwcap+4)@l(r7) # endif andis. r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16) la r10,(_UC_VREGS)(r31) @@ -425,7 +426,20 @@ ENTRY(__CONTEXT_FUNC_NAME) /* Restore the floating-point registers */ lfd fp31,_UC_FREGS+(32*8)(r31) lfd fp0,_UC_FREGS+(0*8)(r31) - mtfsf 0xff,fp31 +# ifdef _ARCH_PWR6 + /* Use the extended four-operand version of the mtfsf insn. */ + mtfsf 0xff,fp0,1,0 +# else + /* Availability of DFP indicates a 64-bit FPSCR. */ + andi. r6,r7,PPC_FEATURE_HAS_DFP + beq 7f + /* Use the extended four-operand version of the mtfsf insn. */ + mtfsf 0xff,fp31,1,0 + b 8f + /* Continue to operate on the FPSCR as if it were 32-bits. */ +7: mtfsf 0xff,fp31 +8: +#endif /* _ARCH_PWR6 */ lfd fp1,_UC_FREGS+(1*8)(r31) lfd fp2,_UC_FREGS+(2*8)(r31) lfd fp3,_UC_FREGS+(3*8)(r31) diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/power6/fpu/setcontext.S b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power6/fpu/setcontext.S new file mode 100644 index 0000000..7e44cdf --- /dev/null +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power6/fpu/setcontext.S @@ -0,0 +1,2 @@ +#define _ARCH_PWR6 +#include_next diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/power6/fpu/swapcontext.S b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power6/fpu/swapcontext.S new file mode 100644 index 0000000..616e3db --- /dev/null +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power6/fpu/swapcontext.S @@ -0,0 +1,2 @@ +#define _ARCH_PWR6 +#include_next diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S b/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S index 48e9af3..b4e28b4 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S @@ -1,5 +1,5 @@ /* Switch to context. - Copyright (C) 2002, 2004, 2005, 2006 Free Software Foundation, Inc. + Copyright (C) 2002, 2004, 2005, 2006, 2008 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -27,6 +27,15 @@ #include "ucontext_i.h" #include + .section ".toc","aw" +.LC__dl_hwcap: +#ifdef SHARED + .tc _rtld_global_ro[TC],_rtld_global_ro +#else + .tc _dl_hwcap[TC],_dl_hwcap +#endif + .section ".text" + #if SHLIB_COMPAT (libc, GLIBC_2_3, GLIBC_2_3_4) ENTRY(__novec_setcontext) CALL_MCOUNT 1 @@ -62,10 +71,32 @@ ENTRY(__novec_setcontext) cmpdi r3,0 bne L(nv_error_exit) +# ifdef SHARED +/* Load _rtld-global._dl_hwcap. */ + ld r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r5) +# else + ld r5,0(r5) /* Load extern _dl_hwcap. */ +# endif + lfd fp0,(SIGCONTEXT_FP_REGS+(32*8))(r31) lfd fp31,(SIGCONTEXT_FP_REGS+(PT_R31*8))(r31) lfd fp30,(SIGCONTEXT_FP_REGS+(PT_R30*8))(r31) + +# ifdef _ARCH_PWR6 + /* Use the extended four-operand version of the mtfsf insn. */ + mtfsf 0xff,fp0,1,0 +# else + /* Availability of DFP indicates a 64-bit FPSCR. */ + andi. r6,r5,PPC_FEATURE_HAS_DFP + beq 5f + /* Use the extended four-operand version of the mtfsf insn. */ + mtfsf 0xff,fp0,1,0 + b 6f + /* Continue to operate on the FPSCR as if it were 32-bits. */ +5: mtfsf 0xff,fp0 +6: +# endif /* _ARCH_PWR6 */ lfd fp29,(SIGCONTEXT_FP_REGS+(PT_R29*8))(r31) lfd fp28,(SIGCONTEXT_FP_REGS+(PT_R28*8))(r31) lfd fp27,(SIGCONTEXT_FP_REGS+(PT_R27*8))(r31) @@ -189,15 +220,7 @@ compat_symbol (libc, __novec_setcontext, setcontext, GLIBC_2_3) #endif - .section ".toc","aw" -.LC__dl_hwcap: -#ifdef SHARED - .tc _rtld_global_ro[TC],_rtld_global_ro -#else - .tc _dl_hwcap[TC],_dl_hwcap -#endif .section ".text" - .machine "altivec" ENTRY(__setcontext) CALL_MCOUNT 1 @@ -241,7 +264,7 @@ ENTRY(__setcontext) # else ld r5,0(r5) /* Load extern _dl_hwcap. */ # endif - andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) + andis. r6,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) beq L(has_no_vec) cmpdi r10,0 @@ -346,7 +369,22 @@ L(has_no_vec): lfd fp0,(SIGCONTEXT_FP_REGS+(32*8))(r31) lfd fp31,(SIGCONTEXT_FP_REGS+(PT_R31*8))(r31) lfd fp30,(SIGCONTEXT_FP_REGS+(PT_R30*8))(r31) + +# ifdef _ARCH_PWR6 + /* Use the extended four-operand version of the mtfsf insn. */ + mtfsf 0xff,fp0,1,0 +# else + /* Availability of DFP indicates a 64-bit FPSCR. */ + andi. r6,r5,PPC_FEATURE_HAS_DFP + beq 7f + /* Use the extended four-operand version of the mtfsf insn. */ + mtfsf 0xff,fp0,1,0 + b 8f + /* Continue to operate on the FPSCR as if it were 32-bits. */ +7: mtfsf 0xff,fp0 +8: +# endif /* _ARCH_PWR6 */ lfd fp29,(SIGCONTEXT_FP_REGS+(PT_R29*8))(r31) lfd fp28,(SIGCONTEXT_FP_REGS+(PT_R28*8))(r31) lfd fp27,(SIGCONTEXT_FP_REGS+(PT_R27*8))(r31) diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S b/sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S index 936d641..c42ccfb 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S @@ -1,5 +1,5 @@ /* Save current context and install the given one. - Copyright (C) 2002, 2004, 2005, 2006 Free Software Foundation, Inc. + Copyright (C) 2002, 2004, 2005, 2006, 2008 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -27,7 +27,16 @@ #include "ucontext_i.h" #include + .section ".toc","aw" +.LC__dl_hwcap: +#ifdef SHARED + .tc _rtld_global_ro[TC],_rtld_global_ro +#else + .tc _dl_hwcap[TC],_dl_hwcap +#endif + #if SHLIB_COMPAT (libc, GLIBC_2_3, GLIBC_2_3_4) + .section ".text" ENTRY(__novec_swapcontext) CALL_MCOUNT 2 #ifdef __ASSUME_NEW_RT_SIGRETURN_SYSCALL @@ -157,10 +166,31 @@ ENTRY(__novec_swapcontext) cmpdi r0,0 bne L(nv_do_sigret) +# ifdef SHARED +/* Load _rtld-global._dl_hwcap. */ + ld r8,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r8) +# else + ld r8,0(r8) /* Load extern _dl_hwcap. */ +# endif + lfd fp0,(SIGCONTEXT_FP_REGS+(32*8))(r31) lfd fp31,(SIGCONTEXT_FP_REGS+(PT_R31*8))(r31) lfd fp30,(SIGCONTEXT_FP_REGS+(PT_R30*8))(r31) +# ifdef _ARCH_PWR6 + /* Use the extended four-operand version of the mtfsf insn. */ + mtfsf 0xff,fp0,1,0 +# else + /* Availability of DFP indicates a 64-bit FPSCR. */ + andi. r6,r8,PPC_FEATURE_HAS_DFP + beq 5f + /* Use the extended four-operand version of the mtfsf insn. */ + mtfsf 0xff,fp0,1,0 + b 6f + /* Continue to operate on the FPSCR as if it were 32-bits. */ +5: mtfsf 0xff,fp0 +6: +#endif /* _ARCH_PWR6 */ lfd fp29,(SIGCONTEXT_FP_REGS+(PT_R29*8))(r31) lfd fp28,(SIGCONTEXT_FP_REGS+(PT_R28*8))(r31) lfd fp27,(SIGCONTEXT_FP_REGS+(PT_R27*8))(r31) @@ -283,15 +313,7 @@ compat_symbol (libc, __novec_swapcontext, swapcontext, GLIBC_2_3) #endif - .section ".toc","aw" -.LC__dl_hwcap: -#ifdef SHARED - .tc _rtld_global_ro[TC],_rtld_global_ro -#else - .tc _dl_hwcap[TC],_dl_hwcap -#endif .section ".text" - .machine "altivec" ENTRY(__swapcontext) CALL_MCOUNT 2 @@ -409,7 +431,7 @@ ENTRY(__swapcontext) la r10,(SIGCONTEXT_V_RESERVE+8)(r3) la r9,(SIGCONTEXT_V_RESERVE+24)(r3) - andis. r8,r8,(PPC_FEATURE_HAS_ALTIVEC >> 16) + andis. r6,r8,(PPC_FEATURE_HAS_ALTIVEC >> 16) clrrdi r10,r10,4 beq L(has_no_vec) @@ -540,7 +562,7 @@ L(has_no_vec): # else ld r8,0(r8) /* Load extern _dl_hwcap. */ # endif - andis. r8,r8,(PPC_FEATURE_HAS_ALTIVEC >> 16) + andis. r6,r8,(PPC_FEATURE_HAS_ALTIVEC >> 16) beq L(has_no_vec2) cmpdi r10,0 @@ -646,7 +668,21 @@ L(has_no_vec2): lfd fp0,(SIGCONTEXT_FP_REGS+(32*8))(r31) lfd fp31,(SIGCONTEXT_FP_REGS+(PT_R31*8))(r31) lfd fp30,(SIGCONTEXT_FP_REGS+(PT_R30*8))(r31) +# ifdef _ARCH_PWR6 + /* Use the extended four-operand version of the mtfsf insn. */ + mtfsf 0xff,fp0,1,0 +# else + /* Availability of DFP indicates a 64-bit FPSCR. */ + andi. r6,r8,PPC_FEATURE_HAS_DFP + beq 7f + /* Use the extended four-operand version of the mtfsf insn. */ + mtfsf 0xff,fp0,1,0 + b 8f + /* Continue to operate on the FPSCR as if it were 32-bits. */ +7: mtfsf 0xff,fp0 +8: +#endif /* _ARCH_PWR6 */ lfd fp29,(SIGCONTEXT_FP_REGS+(PT_R29*8))(r31) lfd fp28,(SIGCONTEXT_FP_REGS+(PT_R28*8))(r31) lfd fp27,(SIGCONTEXT_FP_REGS+(PT_R27*8))(r31) -- cgit v1.1