diff options
Diffstat (limited to 'gcc/config/sh')
-rw-r--r-- | gcc/config/sh/lib1funcs.asm | 3933 | ||||
-rw-r--r-- | gcc/config/sh/lib1funcs.h | 76 | ||||
-rw-r--r-- | gcc/config/sh/sh.h | 2 | ||||
-rw-r--r-- | gcc/config/sh/t-linux | 2 | ||||
-rw-r--r-- | gcc/config/sh/t-netbsd | 1 | ||||
-rw-r--r-- | gcc/config/sh/t-sh | 7 | ||||
-rw-r--r-- | gcc/config/sh/t-sh64 | 9 |
7 files changed, 2 insertions, 4028 deletions
diff --git a/gcc/config/sh/lib1funcs.asm b/gcc/config/sh/lib1funcs.asm deleted file mode 100644 index 2f0ca16..0000000 --- a/gcc/config/sh/lib1funcs.asm +++ /dev/null @@ -1,3933 +0,0 @@ -/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2009 - Free Software Foundation, Inc. - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - - -!! libgcc routines for the Renesas / SuperH SH CPUs. -!! Contributed by Steve Chamberlain. -!! sac@cygnus.com - -!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines -!! recoded in assembly by Toshiyasu Morita -!! tm@netcom.com - -#if defined(__ELF__) && defined(__linux__) -.section .note.GNU-stack,"",%progbits -.previous -#endif - -/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and - ELF local label prefixes by J"orn Rennecke - amylaar@cygnus.com */ - -#include "lib1funcs.h" - -/* t-vxworks needs to build both PIC and non-PIC versions of libgcc, - so it is more convenient to define NO_FPSCR_VALUES here than to - define it on the command line. */ -#if defined __vxworks && defined __PIC__ -#define NO_FPSCR_VALUES -#endif - -#if ! __SH5__ -#ifdef L_ashiftrt - .global GLOBAL(ashiftrt_r4_0) - .global GLOBAL(ashiftrt_r4_1) - .global GLOBAL(ashiftrt_r4_2) - .global GLOBAL(ashiftrt_r4_3) - .global GLOBAL(ashiftrt_r4_4) - .global GLOBAL(ashiftrt_r4_5) - .global GLOBAL(ashiftrt_r4_6) - .global GLOBAL(ashiftrt_r4_7) - .global GLOBAL(ashiftrt_r4_8) - .global GLOBAL(ashiftrt_r4_9) - .global GLOBAL(ashiftrt_r4_10) - .global GLOBAL(ashiftrt_r4_11) - .global GLOBAL(ashiftrt_r4_12) - .global GLOBAL(ashiftrt_r4_13) - .global GLOBAL(ashiftrt_r4_14) - .global GLOBAL(ashiftrt_r4_15) - .global GLOBAL(ashiftrt_r4_16) - .global GLOBAL(ashiftrt_r4_17) - .global GLOBAL(ashiftrt_r4_18) - .global GLOBAL(ashiftrt_r4_19) - .global GLOBAL(ashiftrt_r4_20) - .global GLOBAL(ashiftrt_r4_21) - .global GLOBAL(ashiftrt_r4_22) - .global GLOBAL(ashiftrt_r4_23) - .global GLOBAL(ashiftrt_r4_24) - .global GLOBAL(ashiftrt_r4_25) - .global GLOBAL(ashiftrt_r4_26) - .global GLOBAL(ashiftrt_r4_27) - .global GLOBAL(ashiftrt_r4_28) - .global GLOBAL(ashiftrt_r4_29) - .global GLOBAL(ashiftrt_r4_30) - .global GLOBAL(ashiftrt_r4_31) - .global GLOBAL(ashiftrt_r4_32) - - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32)) - - .align 1 -GLOBAL(ashiftrt_r4_32): -GLOBAL(ashiftrt_r4_31): - rotcl r4 - rts - subc r4,r4 - -GLOBAL(ashiftrt_r4_30): - shar r4 -GLOBAL(ashiftrt_r4_29): - shar r4 -GLOBAL(ashiftrt_r4_28): - shar r4 -GLOBAL(ashiftrt_r4_27): - shar r4 -GLOBAL(ashiftrt_r4_26): - shar r4 -GLOBAL(ashiftrt_r4_25): - shar r4 -GLOBAL(ashiftrt_r4_24): - shlr16 r4 - shlr8 r4 - rts - exts.b r4,r4 - -GLOBAL(ashiftrt_r4_23): - shar r4 -GLOBAL(ashiftrt_r4_22): - shar r4 -GLOBAL(ashiftrt_r4_21): - shar r4 -GLOBAL(ashiftrt_r4_20): - shar r4 -GLOBAL(ashiftrt_r4_19): - shar r4 -GLOBAL(ashiftrt_r4_18): - shar r4 -GLOBAL(ashiftrt_r4_17): - shar r4 -GLOBAL(ashiftrt_r4_16): - shlr16 r4 - rts - exts.w r4,r4 - -GLOBAL(ashiftrt_r4_15): - shar r4 -GLOBAL(ashiftrt_r4_14): - shar r4 -GLOBAL(ashiftrt_r4_13): - shar r4 -GLOBAL(ashiftrt_r4_12): - shar r4 -GLOBAL(ashiftrt_r4_11): - shar r4 -GLOBAL(ashiftrt_r4_10): - shar r4 -GLOBAL(ashiftrt_r4_9): - shar r4 -GLOBAL(ashiftrt_r4_8): - shar r4 -GLOBAL(ashiftrt_r4_7): - shar r4 -GLOBAL(ashiftrt_r4_6): - shar r4 -GLOBAL(ashiftrt_r4_5): - shar r4 -GLOBAL(ashiftrt_r4_4): - shar r4 -GLOBAL(ashiftrt_r4_3): - shar r4 -GLOBAL(ashiftrt_r4_2): - shar r4 -GLOBAL(ashiftrt_r4_1): - rts - shar r4 - -GLOBAL(ashiftrt_r4_0): - rts - nop - - ENDFUNC(GLOBAL(ashiftrt_r4_0)) - ENDFUNC(GLOBAL(ashiftrt_r4_1)) - ENDFUNC(GLOBAL(ashiftrt_r4_2)) - ENDFUNC(GLOBAL(ashiftrt_r4_3)) - ENDFUNC(GLOBAL(ashiftrt_r4_4)) - ENDFUNC(GLOBAL(ashiftrt_r4_5)) - ENDFUNC(GLOBAL(ashiftrt_r4_6)) - ENDFUNC(GLOBAL(ashiftrt_r4_7)) - ENDFUNC(GLOBAL(ashiftrt_r4_8)) - ENDFUNC(GLOBAL(ashiftrt_r4_9)) - ENDFUNC(GLOBAL(ashiftrt_r4_10)) - ENDFUNC(GLOBAL(ashiftrt_r4_11)) - ENDFUNC(GLOBAL(ashiftrt_r4_12)) - ENDFUNC(GLOBAL(ashiftrt_r4_13)) - ENDFUNC(GLOBAL(ashiftrt_r4_14)) - ENDFUNC(GLOBAL(ashiftrt_r4_15)) - ENDFUNC(GLOBAL(ashiftrt_r4_16)) - ENDFUNC(GLOBAL(ashiftrt_r4_17)) - ENDFUNC(GLOBAL(ashiftrt_r4_18)) - ENDFUNC(GLOBAL(ashiftrt_r4_19)) - ENDFUNC(GLOBAL(ashiftrt_r4_20)) - ENDFUNC(GLOBAL(ashiftrt_r4_21)) - ENDFUNC(GLOBAL(ashiftrt_r4_22)) - ENDFUNC(GLOBAL(ashiftrt_r4_23)) - ENDFUNC(GLOBAL(ashiftrt_r4_24)) - ENDFUNC(GLOBAL(ashiftrt_r4_25)) - ENDFUNC(GLOBAL(ashiftrt_r4_26)) - ENDFUNC(GLOBAL(ashiftrt_r4_27)) - ENDFUNC(GLOBAL(ashiftrt_r4_28)) - ENDFUNC(GLOBAL(ashiftrt_r4_29)) - ENDFUNC(GLOBAL(ashiftrt_r4_30)) - ENDFUNC(GLOBAL(ashiftrt_r4_31)) - ENDFUNC(GLOBAL(ashiftrt_r4_32)) -#endif - -#ifdef L_ashiftrt_n - -! -! GLOBAL(ashrsi3) -! -! Entry: -! -! r4: Value to shift -! r5: Shifts -! -! Exit: -! -! r0: Result -! -! Destroys: -! -! (none) -! - - .global GLOBAL(ashrsi3) - HIDDEN_FUNC(GLOBAL(ashrsi3)) - .align 2 -GLOBAL(ashrsi3): - mov #31,r0 - and r0,r5 - mova LOCAL(ashrsi3_table),r0 - mov.b @(r0,r5),r5 -#ifdef __sh1__ - add r5,r0 - jmp @r0 -#else - braf r5 -#endif - mov r4,r0 - - .align 2 -LOCAL(ashrsi3_table): - .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table) - -LOCAL(ashrsi3_31): - rotcl r0 - rts - subc r0,r0 - -LOCAL(ashrsi3_30): - shar r0 -LOCAL(ashrsi3_29): - shar r0 -LOCAL(ashrsi3_28): - shar r0 -LOCAL(ashrsi3_27): - shar r0 -LOCAL(ashrsi3_26): - shar r0 -LOCAL(ashrsi3_25): - shar r0 -LOCAL(ashrsi3_24): - shlr16 r0 - shlr8 r0 - rts - exts.b r0,r0 - -LOCAL(ashrsi3_23): - shar r0 -LOCAL(ashrsi3_22): - shar r0 -LOCAL(ashrsi3_21): - shar r0 -LOCAL(ashrsi3_20): - shar r0 -LOCAL(ashrsi3_19): - shar r0 -LOCAL(ashrsi3_18): - shar r0 -LOCAL(ashrsi3_17): - shar r0 -LOCAL(ashrsi3_16): - shlr16 r0 - rts - exts.w r0,r0 - -LOCAL(ashrsi3_15): - shar r0 -LOCAL(ashrsi3_14): - shar r0 -LOCAL(ashrsi3_13): - shar r0 -LOCAL(ashrsi3_12): - shar r0 -LOCAL(ashrsi3_11): - shar r0 -LOCAL(ashrsi3_10): - shar r0 -LOCAL(ashrsi3_9): - shar r0 -LOCAL(ashrsi3_8): - shar r0 -LOCAL(ashrsi3_7): - shar r0 -LOCAL(ashrsi3_6): - shar r0 -LOCAL(ashrsi3_5): - shar r0 -LOCAL(ashrsi3_4): - shar r0 -LOCAL(ashrsi3_3): - shar r0 -LOCAL(ashrsi3_2): - shar r0 -LOCAL(ashrsi3_1): - rts - shar r0 - -LOCAL(ashrsi3_0): - rts - nop - - ENDFUNC(GLOBAL(ashrsi3)) -#endif - -#ifdef L_ashiftlt - -! -! GLOBAL(ashlsi3) -! -! Entry: -! -! r4: Value to shift -! r5: Shifts -! -! Exit: -! -! r0: Result -! -! Destroys: -! -! (none) -! - .global GLOBAL(ashlsi3) - HIDDEN_FUNC(GLOBAL(ashlsi3)) - .align 2 -GLOBAL(ashlsi3): - mov #31,r0 - and r0,r5 - mova LOCAL(ashlsi3_table),r0 - mov.b @(r0,r5),r5 -#ifdef __sh1__ - add r5,r0 - jmp @r0 -#else - braf r5 -#endif - mov r4,r0 - - .align 2 -LOCAL(ashlsi3_table): - .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table) - -LOCAL(ashlsi3_6): - shll2 r0 -LOCAL(ashlsi3_4): - shll2 r0 -LOCAL(ashlsi3_2): - rts - shll2 r0 - -LOCAL(ashlsi3_7): - shll2 r0 -LOCAL(ashlsi3_5): - shll2 r0 -LOCAL(ashlsi3_3): - shll2 r0 -LOCAL(ashlsi3_1): - rts - shll r0 - -LOCAL(ashlsi3_14): - shll2 r0 -LOCAL(ashlsi3_12): - shll2 r0 -LOCAL(ashlsi3_10): - shll2 r0 -LOCAL(ashlsi3_8): - rts - shll8 r0 - -LOCAL(ashlsi3_15): - shll2 r0 -LOCAL(ashlsi3_13): - shll2 r0 -LOCAL(ashlsi3_11): - shll2 r0 -LOCAL(ashlsi3_9): - shll8 r0 - rts - shll r0 - -LOCAL(ashlsi3_22): - shll2 r0 -LOCAL(ashlsi3_20): - shll2 r0 -LOCAL(ashlsi3_18): - shll2 r0 -LOCAL(ashlsi3_16): - rts - shll16 r0 - -LOCAL(ashlsi3_23): - shll2 r0 -LOCAL(ashlsi3_21): - shll2 r0 -LOCAL(ashlsi3_19): - shll2 r0 -LOCAL(ashlsi3_17): - shll16 r0 - rts - shll r0 - -LOCAL(ashlsi3_30): - shll2 r0 -LOCAL(ashlsi3_28): - shll2 r0 -LOCAL(ashlsi3_26): - shll2 r0 -LOCAL(ashlsi3_24): - shll16 r0 - rts - shll8 r0 - -LOCAL(ashlsi3_31): - shll2 r0 -LOCAL(ashlsi3_29): - shll2 r0 -LOCAL(ashlsi3_27): - shll2 r0 -LOCAL(ashlsi3_25): - shll16 r0 - shll8 r0 - rts - shll r0 - -LOCAL(ashlsi3_0): - rts - nop - - ENDFUNC(GLOBAL(ashlsi3)) -#endif - -#ifdef L_lshiftrt - -! -! GLOBAL(lshrsi3) -! -! Entry: -! -! r4: Value to shift -! r5: Shifts -! -! Exit: -! -! r0: Result -! -! Destroys: -! -! (none) -! - .global GLOBAL(lshrsi3) - HIDDEN_FUNC(GLOBAL(lshrsi3)) - .align 2 -GLOBAL(lshrsi3): - mov #31,r0 - and r0,r5 - mova LOCAL(lshrsi3_table),r0 - mov.b @(r0,r5),r5 -#ifdef __sh1__ - add r5,r0 - jmp @r0 -#else - braf r5 -#endif - mov r4,r0 - - .align 2 -LOCAL(lshrsi3_table): - .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table) - -LOCAL(lshrsi3_6): - shlr2 r0 -LOCAL(lshrsi3_4): - shlr2 r0 -LOCAL(lshrsi3_2): - rts - shlr2 r0 - -LOCAL(lshrsi3_7): - shlr2 r0 -LOCAL(lshrsi3_5): - shlr2 r0 -LOCAL(lshrsi3_3): - shlr2 r0 -LOCAL(lshrsi3_1): - rts - shlr r0 - -LOCAL(lshrsi3_14): - shlr2 r0 -LOCAL(lshrsi3_12): - shlr2 r0 -LOCAL(lshrsi3_10): - shlr2 r0 -LOCAL(lshrsi3_8): - rts - shlr8 r0 - -LOCAL(lshrsi3_15): - shlr2 r0 -LOCAL(lshrsi3_13): - shlr2 r0 -LOCAL(lshrsi3_11): - shlr2 r0 -LOCAL(lshrsi3_9): - shlr8 r0 - rts - shlr r0 - -LOCAL(lshrsi3_22): - shlr2 r0 -LOCAL(lshrsi3_20): - shlr2 r0 -LOCAL(lshrsi3_18): - shlr2 r0 -LOCAL(lshrsi3_16): - rts - shlr16 r0 - -LOCAL(lshrsi3_23): - shlr2 r0 -LOCAL(lshrsi3_21): - shlr2 r0 -LOCAL(lshrsi3_19): - shlr2 r0 -LOCAL(lshrsi3_17): - shlr16 r0 - rts - shlr r0 - -LOCAL(lshrsi3_30): - shlr2 r0 -LOCAL(lshrsi3_28): - shlr2 r0 -LOCAL(lshrsi3_26): - shlr2 r0 -LOCAL(lshrsi3_24): - shlr16 r0 - rts - shlr8 r0 - -LOCAL(lshrsi3_31): - shlr2 r0 -LOCAL(lshrsi3_29): - shlr2 r0 -LOCAL(lshrsi3_27): - shlr2 r0 -LOCAL(lshrsi3_25): - shlr16 r0 - shlr8 r0 - rts - shlr r0 - -LOCAL(lshrsi3_0): - rts - nop - - ENDFUNC(GLOBAL(lshrsi3)) -#endif - -#ifdef L_movmem - .text - .balign 4 - .global GLOBAL(movmem) - HIDDEN_FUNC(GLOBAL(movmem)) - HIDDEN_ALIAS(movstr,movmem) - /* This would be a lot simpler if r6 contained the byte count - minus 64, and we wouldn't be called here for a byte count of 64. */ -GLOBAL(movmem): - sts.l pr,@-r15 - shll2 r6 - bsr GLOBAL(movmemSI52+2) - mov.l @(48,r5),r0 - .balign 4 -LOCAL(movmem_loop): /* Reached with rts */ - mov.l @(60,r5),r0 - add #-64,r6 - mov.l r0,@(60,r4) - tst r6,r6 - mov.l @(56,r5),r0 - bt LOCAL(movmem_done) - mov.l r0,@(56,r4) - cmp/pl r6 - mov.l @(52,r5),r0 - add #64,r5 - mov.l r0,@(52,r4) - add #64,r4 - bt GLOBAL(movmemSI52) -! done all the large groups, do the remainder -! jump to movmem+ - mova GLOBAL(movmemSI4)+4,r0 - add r6,r0 - jmp @r0 -LOCAL(movmem_done): ! share slot insn, works out aligned. - lds.l @r15+,pr - mov.l r0,@(56,r4) - mov.l @(52,r5),r0 - rts - mov.l r0,@(52,r4) - .balign 4 -! ??? We need aliases movstr* for movmem* for the older libraries. These -! aliases will be removed at the some point in the future. - .global GLOBAL(movmemSI64) - HIDDEN_FUNC(GLOBAL(movmemSI64)) - HIDDEN_ALIAS(movstrSI64,movmemSI64) -GLOBAL(movmemSI64): - mov.l @(60,r5),r0 - mov.l r0,@(60,r4) - .global GLOBAL(movmemSI60) - HIDDEN_FUNC(GLOBAL(movmemSI60)) - HIDDEN_ALIAS(movstrSI60,movmemSI60) -GLOBAL(movmemSI60): - mov.l @(56,r5),r0 - mov.l r0,@(56,r4) - .global GLOBAL(movmemSI56) - HIDDEN_FUNC(GLOBAL(movmemSI56)) - HIDDEN_ALIAS(movstrSI56,movmemSI56) -GLOBAL(movmemSI56): - mov.l @(52,r5),r0 - mov.l r0,@(52,r4) - .global GLOBAL(movmemSI52) - HIDDEN_FUNC(GLOBAL(movmemSI52)) - HIDDEN_ALIAS(movstrSI52,movmemSI52) -GLOBAL(movmemSI52): - mov.l @(48,r5),r0 - mov.l r0,@(48,r4) - .global GLOBAL(movmemSI48) - HIDDEN_FUNC(GLOBAL(movmemSI48)) - HIDDEN_ALIAS(movstrSI48,movmemSI48) -GLOBAL(movmemSI48): - mov.l @(44,r5),r0 - mov.l r0,@(44,r4) - .global GLOBAL(movmemSI44) - HIDDEN_FUNC(GLOBAL(movmemSI44)) - HIDDEN_ALIAS(movstrSI44,movmemSI44) -GLOBAL(movmemSI44): - mov.l @(40,r5),r0 - mov.l r0,@(40,r4) - .global GLOBAL(movmemSI40) - HIDDEN_FUNC(GLOBAL(movmemSI40)) - HIDDEN_ALIAS(movstrSI40,movmemSI40) -GLOBAL(movmemSI40): - mov.l @(36,r5),r0 - mov.l r0,@(36,r4) - .global GLOBAL(movmemSI36) - HIDDEN_FUNC(GLOBAL(movmemSI36)) - HIDDEN_ALIAS(movstrSI36,movmemSI36) -GLOBAL(movmemSI36): - mov.l @(32,r5),r0 - mov.l r0,@(32,r4) - .global GLOBAL(movmemSI32) - HIDDEN_FUNC(GLOBAL(movmemSI32)) - HIDDEN_ALIAS(movstrSI32,movmemSI32) -GLOBAL(movmemSI32): - mov.l @(28,r5),r0 - mov.l r0,@(28,r4) - .global GLOBAL(movmemSI28) - HIDDEN_FUNC(GLOBAL(movmemSI28)) - HIDDEN_ALIAS(movstrSI28,movmemSI28) -GLOBAL(movmemSI28): - mov.l @(24,r5),r0 - mov.l r0,@(24,r4) - .global GLOBAL(movmemSI24) - HIDDEN_FUNC(GLOBAL(movmemSI24)) - HIDDEN_ALIAS(movstrSI24,movmemSI24) -GLOBAL(movmemSI24): - mov.l @(20,r5),r0 - mov.l r0,@(20,r4) - .global GLOBAL(movmemSI20) - HIDDEN_FUNC(GLOBAL(movmemSI20)) - HIDDEN_ALIAS(movstrSI20,movmemSI20) -GLOBAL(movmemSI20): - mov.l @(16,r5),r0 - mov.l r0,@(16,r4) - .global GLOBAL(movmemSI16) - HIDDEN_FUNC(GLOBAL(movmemSI16)) - HIDDEN_ALIAS(movstrSI16,movmemSI16) -GLOBAL(movmemSI16): - mov.l @(12,r5),r0 - mov.l r0,@(12,r4) - .global GLOBAL(movmemSI12) - HIDDEN_FUNC(GLOBAL(movmemSI12)) - HIDDEN_ALIAS(movstrSI12,movmemSI12) -GLOBAL(movmemSI12): - mov.l @(8,r5),r0 - mov.l r0,@(8,r4) - .global GLOBAL(movmemSI8) - HIDDEN_FUNC(GLOBAL(movmemSI8)) - HIDDEN_ALIAS(movstrSI8,movmemSI8) -GLOBAL(movmemSI8): - mov.l @(4,r5),r0 - mov.l r0,@(4,r4) - .global GLOBAL(movmemSI4) - HIDDEN_FUNC(GLOBAL(movmemSI4)) - HIDDEN_ALIAS(movstrSI4,movmemSI4) -GLOBAL(movmemSI4): - mov.l @(0,r5),r0 - rts - mov.l r0,@(0,r4) - - ENDFUNC(GLOBAL(movmemSI64)) - ENDFUNC(GLOBAL(movmemSI60)) - ENDFUNC(GLOBAL(movmemSI56)) - ENDFUNC(GLOBAL(movmemSI52)) - ENDFUNC(GLOBAL(movmemSI48)) - ENDFUNC(GLOBAL(movmemSI44)) - ENDFUNC(GLOBAL(movmemSI40)) - ENDFUNC(GLOBAL(movmemSI36)) - ENDFUNC(GLOBAL(movmemSI32)) - ENDFUNC(GLOBAL(movmemSI28)) - ENDFUNC(GLOBAL(movmemSI24)) - ENDFUNC(GLOBAL(movmemSI20)) - ENDFUNC(GLOBAL(movmemSI16)) - ENDFUNC(GLOBAL(movmemSI12)) - ENDFUNC(GLOBAL(movmemSI8)) - ENDFUNC(GLOBAL(movmemSI4)) - ENDFUNC(GLOBAL(movmem)) -#endif - -#ifdef L_movmem_i4 - .text - .global GLOBAL(movmem_i4_even) - .global GLOBAL(movmem_i4_odd) - .global GLOBAL(movmemSI12_i4) - - HIDDEN_FUNC(GLOBAL(movmem_i4_even)) - HIDDEN_FUNC(GLOBAL(movmem_i4_odd)) - HIDDEN_FUNC(GLOBAL(movmemSI12_i4)) - - HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even) - HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd) - HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4) - - .p2align 5 -L_movmem_2mod4_end: - mov.l r0,@(16,r4) - rts - mov.l r1,@(20,r4) - - .p2align 2 - -GLOBAL(movmem_i4_even): - mov.l @r5+,r0 - bra L_movmem_start_even - mov.l @r5+,r1 - -GLOBAL(movmem_i4_odd): - mov.l @r5+,r1 - add #-4,r4 - mov.l @r5+,r2 - mov.l @r5+,r3 - mov.l r1,@(4,r4) - mov.l r2,@(8,r4) - -L_movmem_loop: - mov.l r3,@(12,r4) - dt r6 - mov.l @r5+,r0 - bt/s L_movmem_2mod4_end - mov.l @r5+,r1 - add #16,r4 -L_movmem_start_even: - mov.l @r5+,r2 - mov.l @r5+,r3 - mov.l r0,@r4 - dt r6 - mov.l r1,@(4,r4) - bf/s L_movmem_loop - mov.l r2,@(8,r4) - rts - mov.l r3,@(12,r4) - - ENDFUNC(GLOBAL(movmem_i4_even)) - ENDFUNC(GLOBAL(movmem_i4_odd)) - - .p2align 4 -GLOBAL(movmemSI12_i4): - mov.l @r5,r0 - mov.l @(4,r5),r1 - mov.l @(8,r5),r2 - mov.l r0,@r4 - mov.l r1,@(4,r4) - rts - mov.l r2,@(8,r4) - - ENDFUNC(GLOBAL(movmemSI12_i4)) -#endif - -#ifdef L_mulsi3 - - - .global GLOBAL(mulsi3) - HIDDEN_FUNC(GLOBAL(mulsi3)) - -! r4 = aabb -! r5 = ccdd -! r0 = aabb*ccdd via partial products -! -! if aa == 0 and cc = 0 -! r0 = bb*dd -! -! else -! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536) -! - -GLOBAL(mulsi3): - mulu.w r4,r5 ! multiply the lsws macl=bb*dd - mov r5,r3 ! r3 = ccdd - swap.w r4,r2 ! r2 = bbaa - xtrct r2,r3 ! r3 = aacc - tst r3,r3 ! msws zero ? - bf hiset - rts ! yes - then we have the answer - sts macl,r0 - -hiset: sts macl,r0 ! r0 = bb*dd - mulu.w r2,r5 ! brewing macl = aa*dd - sts macl,r1 - mulu.w r3,r4 ! brewing macl = cc*bb - sts macl,r2 - add r1,r2 - shll16 r2 - rts - add r2,r0 - - ENDFUNC(GLOBAL(mulsi3)) -#endif -#endif /* ! __SH5__ */ -#ifdef L_sdivsi3_i4 - .title "SH DIVIDE" -!! 4 byte integer Divide code for the Renesas SH -#ifdef __SH4__ -!! args in r4 and r5, result in fpul, clobber dr0, dr2 - - .global GLOBAL(sdivsi3_i4) - HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) -GLOBAL(sdivsi3_i4): - lds r4,fpul - float fpul,dr0 - lds r5,fpul - float fpul,dr2 - fdiv dr2,dr0 - rts - ftrc dr0,fpul - - ENDFUNC(GLOBAL(sdivsi3_i4)) -#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__) -!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2 - -#if ! __SH5__ || __SH5__ == 32 -#if __SH5__ - .mode SHcompact -#endif - .global GLOBAL(sdivsi3_i4) - HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) -GLOBAL(sdivsi3_i4): - sts.l fpscr,@-r15 - mov #8,r2 - swap.w r2,r2 - lds r2,fpscr - lds r4,fpul - float fpul,dr0 - lds r5,fpul - float fpul,dr2 - fdiv dr2,dr0 - ftrc dr0,fpul - rts - lds.l @r15+,fpscr - - ENDFUNC(GLOBAL(sdivsi3_i4)) -#endif /* ! __SH5__ || __SH5__ == 32 */ -#endif /* ! __SH4__ */ -#endif - -#ifdef L_sdivsi3 -/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with - sh2e/sh3e code. */ -#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) -!! -!! Steve Chamberlain -!! sac@cygnus.com -!! -!! - -!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit - - .global GLOBAL(sdivsi3) -#if __SHMEDIA__ -#if __SH5__ == 32 - .section .text..SHmedia32,"ax" -#else - .text -#endif - .align 2 -#if 0 -/* The assembly code that follows is a hand-optimized version of the C - code that follows. Note that the registers that are modified are - exactly those listed as clobbered in the patterns divsi3_i1 and - divsi3_i1_media. - -int __sdivsi3 (i, j) - int i, j; -{ - register unsigned long long r18 asm ("r18"); - register unsigned long long r19 asm ("r19"); - register unsigned long long r0 asm ("r0") = 0; - register unsigned long long r1 asm ("r1") = 1; - register int r2 asm ("r2") = i >> 31; - register int r3 asm ("r3") = j >> 31; - - r2 = r2 ? r2 : r1; - r3 = r3 ? r3 : r1; - r18 = i * r2; - r19 = j * r3; - r2 *= r3; - - r19 <<= 31; - r1 <<= 31; - do - if (r18 >= r19) - r0 |= r1, r18 -= r19; - while (r19 >>= 1, r1 >>= 1); - - return r2 * (int)r0; -} -*/ -GLOBAL(sdivsi3): - pt/l LOCAL(sdivsi3_dontadd), tr2 - pt/l LOCAL(sdivsi3_loop), tr1 - ptabs/l r18, tr0 - movi 0, r0 - movi 1, r1 - shari.l r4, 31, r2 - shari.l r5, 31, r3 - cmveq r2, r1, r2 - cmveq r3, r1, r3 - muls.l r4, r2, r18 - muls.l r5, r3, r19 - muls.l r2, r3, r2 - shlli r19, 31, r19 - shlli r1, 31, r1 -LOCAL(sdivsi3_loop): - bgtu r19, r18, tr2 - or r0, r1, r0 - sub r18, r19, r18 -LOCAL(sdivsi3_dontadd): - shlri r1, 1, r1 - shlri r19, 1, r19 - bnei r1, 0, tr1 - muls.l r0, r2, r0 - add.l r0, r63, r0 - blink tr0, r63 -#elif 0 /* ! 0 */ - // inputs: r4,r5 - // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0 - // result in r0 -GLOBAL(sdivsi3): - // can create absolute value without extra latency, - // but dependent on proper sign extension of inputs: - // shari.l r5,31,r2 - // xor r5,r2,r20 - // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended. - shari.l r5,31,r2 - ori r2,1,r2 - muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended. - movi 0xffffffffffffbb0c,r19 // shift count eqiv 76 - shari.l r4,31,r3 - nsb r20,r0 - shlld r20,r0,r25 - shlri r25,48,r25 - sub r19,r25,r1 - mmulfx.w r1,r1,r2 - mshflo.w r1,r63,r1 - // If r4 was to be used in-place instead of r21, could use this sequence - // to compute absolute: - // sub r63,r4,r19 // compute absolute value of r4 - // shlri r4,32,r3 // into lower 32 bit of r4, keeping - // mcmv r19,r3,r4 // the sign in the upper 32 bits intact. - ori r3,1,r3 - mmulfx.w r25,r2,r2 - sub r19,r0,r0 - muls.l r4,r3,r21 - msub.w r1,r2,r2 - addi r2,-2,r1 - mulu.l r21,r1,r19 - mmulfx.w r2,r2,r2 - shlli r1,15,r1 - shlrd r19,r0,r19 - mulu.l r19,r20,r3 - mmacnfx.wl r25,r2,r1 - ptabs r18,tr0 - sub r21,r3,r25 - - mulu.l r25,r1,r2 - addi r0,14,r0 - xor r4,r5,r18 - shlrd r2,r0,r2 - mulu.l r2,r20,r3 - add r19,r2,r19 - shari.l r18,31,r18 - sub r25,r3,r25 - - mulu.l r25,r1,r2 - sub r25,r20,r25 - add r19,r18,r19 - shlrd r2,r0,r2 - mulu.l r2,r20,r3 - addi r25,1,r25 - add r19,r2,r19 - - cmpgt r25,r3,r25 - add.l r19,r25,r0 - xor r0,r18,r0 - blink tr0,r63 -#else /* ! 0 && ! 0 */ - - // inputs: r4,r5 - // clobbered: r1,r18,r19,r20,r21,r25,tr0 - // result in r0 - HIDDEN_FUNC(GLOBAL(sdivsi3_2)) -#ifndef __pic__ - FUNC(GLOBAL(sdivsi3)) -GLOBAL(sdivsi3): /* this is the shcompact entry point */ - // The special SHmedia entry point sdivsi3_1 prevents accidental linking - // with the SHcompact implementation, which clobbers tr1 / tr2. - .global GLOBAL(sdivsi3_1) -GLOBAL(sdivsi3_1): - .global GLOBAL(div_table_internal) - movi (GLOBAL(div_table_internal) >> 16) & 65535, r20 - shori GLOBAL(div_table_internal) & 65535, r20 -#endif - .global GLOBAL(sdivsi3_2) - // div_table in r20 - // clobbered: r1,r18,r19,r21,r25,tr0 -GLOBAL(sdivsi3_2): - nsb r5, r1 - shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 - shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) - ldx.ub r20, r21, r19 // u0.8 - shari r25, 32, r25 // normalize to s2.30 - shlli r21, 1, r21 - muls.l r25, r19, r19 // s2.38 - ldx.w r20, r21, r21 // s2.14 - ptabs r18, tr0 - shari r19, 24, r19 // truncate to s2.14 - sub r21, r19, r19 // some 11 bit inverse in s1.14 - muls.l r19, r19, r21 // u0.28 - sub r63, r1, r1 - addi r1, 92, r1 - muls.l r25, r21, r18 // s2.58 - shlli r19, 45, r19 // multiply by two and convert to s2.58 - /* bubble */ - sub r19, r18, r18 - shari r18, 28, r18 // some 22 bit inverse in s1.30 - muls.l r18, r25, r0 // s2.60 - muls.l r18, r4, r25 // s32.30 - /* bubble */ - shari r0, 16, r19 // s-16.44 - muls.l r19, r18, r19 // s-16.74 - shari r25, 63, r0 - shari r4, 14, r18 // s19.-14 - shari r19, 30, r19 // s-16.44 - muls.l r19, r18, r19 // s15.30 - xor r21, r0, r21 // You could also use the constant 1 << 27. - add r21, r25, r21 - sub r21, r19, r21 - shard r21, r1, r21 - sub r21, r0, r0 - blink tr0, r63 -#ifndef __pic__ - ENDFUNC(GLOBAL(sdivsi3)) -#endif - ENDFUNC(GLOBAL(sdivsi3_2)) -#endif -#elif defined __SHMEDIA__ -/* m5compact-nofpu */ - // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2 - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 - FUNC(GLOBAL(sdivsi3)) -GLOBAL(sdivsi3): - pt/l LOCAL(sdivsi3_dontsub), tr0 - pt/l LOCAL(sdivsi3_loop), tr1 - ptabs/l r18,tr2 - shari.l r4,31,r18 - shari.l r5,31,r19 - xor r4,r18,r20 - xor r5,r19,r21 - sub.l r20,r18,r20 - sub.l r21,r19,r21 - xor r18,r19,r19 - shlli r21,32,r25 - addi r25,-1,r21 - addz.l r20,r63,r20 -LOCAL(sdivsi3_loop): - shlli r20,1,r20 - bgeu/u r21,r20,tr0 - sub r20,r21,r20 -LOCAL(sdivsi3_dontsub): - addi.l r25,-1,r25 - bnei r25,-32,tr1 - xor r20,r19,r20 - sub.l r20,r19,r0 - blink tr2,r63 - ENDFUNC(GLOBAL(sdivsi3)) -#else /* ! __SHMEDIA__ */ - FUNC(GLOBAL(sdivsi3)) -GLOBAL(sdivsi3): - mov r4,r1 - mov r5,r0 - - tst r0,r0 - bt div0 - mov #0,r2 - div0s r2,r1 - subc r3,r3 - subc r2,r1 - div0s r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - addc r2,r1 - rts - mov r1,r0 - - -div0: rts - mov #0,r0 - - ENDFUNC(GLOBAL(sdivsi3)) -#endif /* ! __SHMEDIA__ */ -#endif /* ! __SH4__ */ -#endif -#ifdef L_udivsi3_i4 - - .title "SH DIVIDE" -!! 4 byte integer Divide code for the Renesas SH -#ifdef __SH4__ -!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4, -!! and t bit - - .global GLOBAL(udivsi3_i4) - HIDDEN_FUNC(GLOBAL(udivsi3_i4)) -GLOBAL(udivsi3_i4): - mov #1,r1 - cmp/hi r1,r5 - bf trivial - rotr r1 - xor r1,r4 - lds r4,fpul - mova L1,r0 -#ifdef FMOVD_WORKS - fmov.d @r0+,dr4 -#else - fmov.s @r0+,DR40 - fmov.s @r0,DR41 -#endif - float fpul,dr0 - xor r1,r5 - lds r5,fpul - float fpul,dr2 - fadd dr4,dr0 - fadd dr4,dr2 - fdiv dr2,dr0 - rts - ftrc dr0,fpul - -trivial: - rts - lds r4,fpul - - .align 2 -#ifdef FMOVD_WORKS - .align 3 ! make double below 8 byte aligned. -#endif -L1: - .double 2147483648 - - ENDFUNC(GLOBAL(udivsi3_i4)) -#elif defined (__SH5__) && ! defined (__SH4_NOFPU__) -#if ! __SH5__ || __SH5__ == 32 -!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33 - .mode SHmedia - .global GLOBAL(udivsi3_i4) - HIDDEN_FUNC(GLOBAL(udivsi3_i4)) -GLOBAL(udivsi3_i4): - addz.l r4,r63,r20 - addz.l r5,r63,r21 - fmov.qd r20,dr0 - fmov.qd r21,dr32 - ptabs r18,tr0 - float.qd dr0,dr0 - float.qd dr32,dr32 - fdiv.d dr0,dr32,dr0 - ftrc.dq dr0,dr32 - fmov.s fr33,fr32 - blink tr0,r63 - - ENDFUNC(GLOBAL(udivsi3_i4)) -#endif /* ! __SH5__ || __SH5__ == 32 */ -#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) -!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4 - - .global GLOBAL(udivsi3_i4) - HIDDEN_FUNC(GLOBAL(udivsi3_i4)) -GLOBAL(udivsi3_i4): - mov #1,r1 - cmp/hi r1,r5 - bf trivial - sts.l fpscr,@-r15 - mova L1,r0 - lds.l @r0+,fpscr - rotr r1 - xor r1,r4 - lds r4,fpul -#ifdef FMOVD_WORKS - fmov.d @r0+,dr4 -#else - fmov.s @r0+,DR40 - fmov.s @r0,DR41 -#endif - float fpul,dr0 - xor r1,r5 - lds r5,fpul - float fpul,dr2 - fadd dr4,dr0 - fadd dr4,dr2 - fdiv dr2,dr0 - ftrc dr0,fpul - rts - lds.l @r15+,fpscr - -#ifdef FMOVD_WORKS - .align 3 ! make double below 8 byte aligned. -#endif -trivial: - rts - lds r4,fpul - - .align 2 -L1: -#ifndef FMOVD_WORKS - .long 0x80000 -#else - .long 0x180000 -#endif - .double 2147483648 - - ENDFUNC(GLOBAL(udivsi3_i4)) -#endif /* ! __SH4__ */ -#endif - -#ifdef L_udivsi3 -/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with - sh2e/sh3e code. */ -#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) - -!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit - .global GLOBAL(udivsi3) - HIDDEN_FUNC(GLOBAL(udivsi3)) - -#if __SHMEDIA__ -#if __SH5__ == 32 - .section .text..SHmedia32,"ax" -#else - .text -#endif - .align 2 -#if 0 -/* The assembly code that follows is a hand-optimized version of the C - code that follows. Note that the registers that are modified are - exactly those listed as clobbered in the patterns udivsi3_i1 and - udivsi3_i1_media. - -unsigned -__udivsi3 (i, j) - unsigned i, j; -{ - register unsigned long long r0 asm ("r0") = 0; - register unsigned long long r18 asm ("r18") = 1; - register unsigned long long r4 asm ("r4") = i; - register unsigned long long r19 asm ("r19") = j; - - r19 <<= 31; - r18 <<= 31; - do - if (r4 >= r19) - r0 |= r18, r4 -= r19; - while (r19 >>= 1, r18 >>= 1); - - return r0; -} -*/ -GLOBAL(udivsi3): - pt/l LOCAL(udivsi3_dontadd), tr2 - pt/l LOCAL(udivsi3_loop), tr1 - ptabs/l r18, tr0 - movi 0, r0 - movi 1, r18 - addz.l r5, r63, r19 - addz.l r4, r63, r4 - shlli r19, 31, r19 - shlli r18, 31, r18 -LOCAL(udivsi3_loop): - bgtu r19, r4, tr2 - or r0, r18, r0 - sub r4, r19, r4 -LOCAL(udivsi3_dontadd): - shlri r18, 1, r18 - shlri r19, 1, r19 - bnei r18, 0, tr1 - blink tr0, r63 -#else -GLOBAL(udivsi3): - // inputs: r4,r5 - // clobbered: r18,r19,r20,r21,r22,r25,tr0 - // result in r0. - addz.l r5,r63,r22 - nsb r22,r0 - shlld r22,r0,r25 - shlri r25,48,r25 - movi 0xffffffffffffbb0c,r20 // shift count eqiv 76 - sub r20,r25,r21 - mmulfx.w r21,r21,r19 - mshflo.w r21,r63,r21 - ptabs r18,tr0 - mmulfx.w r25,r19,r19 - sub r20,r0,r0 - /* bubble */ - msub.w r21,r19,r19 - addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21 - before the msub.w, but we need a different value for - r19 to keep errors under control. */ - mulu.l r4,r21,r18 - mmulfx.w r19,r19,r19 - shlli r21,15,r21 - shlrd r18,r0,r18 - mulu.l r18,r22,r20 - mmacnfx.wl r25,r19,r21 - /* bubble */ - sub r4,r20,r25 - - mulu.l r25,r21,r19 - addi r0,14,r0 - /* bubble */ - shlrd r19,r0,r19 - mulu.l r19,r22,r20 - add r18,r19,r18 - /* bubble */ - sub.l r25,r20,r25 - - mulu.l r25,r21,r19 - addz.l r25,r63,r25 - sub r25,r22,r25 - shlrd r19,r0,r19 - mulu.l r19,r22,r20 - addi r25,1,r25 - add r18,r19,r18 - - cmpgt r25,r20,r25 - add.l r18,r25,r0 - blink tr0,r63 -#endif -#elif defined (__SHMEDIA__) -/* m5compact-nofpu - more emphasis on code size than on speed, but don't - ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4. - So use a short shmedia loop. */ - // clobbered: r20,r21,r25,tr0,tr1,tr2 - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 -GLOBAL(udivsi3): - pt/l LOCAL(udivsi3_dontsub), tr0 - pt/l LOCAL(udivsi3_loop), tr1 - ptabs/l r18,tr2 - shlli r5,32,r25 - addi r25,-1,r21 - addz.l r4,r63,r20 -LOCAL(udivsi3_loop): - shlli r20,1,r20 - bgeu/u r21,r20,tr0 - sub r20,r21,r20 -LOCAL(udivsi3_dontsub): - addi.l r25,-1,r25 - bnei r25,-32,tr1 - add.l r20,r63,r0 - blink tr2,r63 -#else /* ! defined (__SHMEDIA__) */ -LOCAL(div8): - div1 r5,r4 -LOCAL(div7): - div1 r5,r4; div1 r5,r4; div1 r5,r4 - div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 - -LOCAL(divx4): - div1 r5,r4; rotcl r0 - div1 r5,r4; rotcl r0 - div1 r5,r4; rotcl r0 - rts; div1 r5,r4 - -GLOBAL(udivsi3): - sts.l pr,@-r15 - extu.w r5,r0 - cmp/eq r5,r0 -#ifdef __sh1__ - bf LOCAL(large_divisor) -#else - bf/s LOCAL(large_divisor) -#endif - div0u - swap.w r4,r0 - shlr16 r4 - bsr LOCAL(div8) - shll16 r5 - bsr LOCAL(div7) - div1 r5,r4 - xtrct r4,r0 - xtrct r0,r4 - bsr LOCAL(div8) - swap.w r4,r4 - bsr LOCAL(div7) - div1 r5,r4 - lds.l @r15+,pr - xtrct r4,r0 - swap.w r0,r0 - rotcl r0 - rts - shlr16 r5 - -LOCAL(large_divisor): -#ifdef __sh1__ - div0u -#endif - mov #0,r0 - xtrct r4,r0 - xtrct r0,r4 - bsr LOCAL(divx4) - rotcl r0 - bsr LOCAL(divx4) - rotcl r0 - bsr LOCAL(divx4) - rotcl r0 - bsr LOCAL(divx4) - rotcl r0 - lds.l @r15+,pr - rts - rotcl r0 - - ENDFUNC(GLOBAL(udivsi3)) -#endif /* ! __SHMEDIA__ */ -#endif /* __SH4__ */ -#endif /* L_udivsi3 */ - -#ifdef L_udivdi3 -#ifdef __SHMEDIA__ - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 - .global GLOBAL(udivdi3) - FUNC(GLOBAL(udivdi3)) -GLOBAL(udivdi3): - HIDDEN_ALIAS(udivdi3_internal,udivdi3) - shlri r3,1,r4 - nsb r4,r22 - shlld r3,r22,r6 - shlri r6,49,r5 - movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ - sub r21,r5,r1 - mmulfx.w r1,r1,r4 - mshflo.w r1,r63,r1 - sub r63,r22,r20 // r63 == 64 % 64 - mmulfx.w r5,r4,r4 - pta LOCAL(large_divisor),tr0 - addi r20,32,r9 - msub.w r1,r4,r1 - madd.w r1,r1,r1 - mmulfx.w r1,r1,r4 - shlri r6,32,r7 - bgt/u r9,r63,tr0 // large_divisor - mmulfx.w r5,r4,r4 - shlri r2,32+14,r19 - addi r22,-31,r0 - msub.w r1,r4,r1 - - mulu.l r1,r7,r4 - addi r1,-3,r5 - mulu.l r5,r19,r5 - sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 - shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as - the case may be, %0000000000000000 000.11111111111, still */ - muls.l r1,r4,r4 /* leaving at least one sign bit. */ - mulu.l r5,r3,r8 - mshalds.l r1,r21,r1 - shari r4,26,r4 - shlld r8,r0,r8 - add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) - sub r2,r8,r2 - /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ - - shlri r2,22,r21 - mulu.l r21,r1,r21 - shlld r5,r0,r8 - addi r20,30-22,r0 - shlrd r21,r0,r21 - mulu.l r21,r3,r5 - add r8,r21,r8 - mcmpgt.l r21,r63,r21 // See Note 1 - addi r20,30,r0 - mshfhi.l r63,r21,r21 - sub r2,r5,r2 - andc r2,r21,r2 - - /* small divisor: need a third divide step */ - mulu.l r2,r1,r7 - ptabs r18,tr0 - addi r2,1,r2 - shlrd r7,r0,r7 - mulu.l r7,r3,r5 - add r8,r7,r8 - sub r2,r3,r2 - cmpgt r2,r5,r5 - add r8,r5,r2 - /* could test r3 here to check for divide by zero. */ - blink tr0,r63 - -LOCAL(large_divisor): - mmulfx.w r5,r4,r4 - shlrd r2,r9,r25 - shlri r25,32,r8 - msub.w r1,r4,r1 - - mulu.l r1,r7,r4 - addi r1,-3,r5 - mulu.l r5,r8,r5 - sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 - shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as - the case may be, %0000000000000000 000.11111111111, still */ - muls.l r1,r4,r4 /* leaving at least one sign bit. */ - shlri r5,14-1,r8 - mulu.l r8,r7,r5 - mshalds.l r1,r21,r1 - shari r4,26,r4 - add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) - sub r25,r5,r25 - /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ - - shlri r25,22,r21 - mulu.l r21,r1,r21 - pta LOCAL(no_lo_adj),tr0 - addi r22,32,r0 - shlri r21,40,r21 - mulu.l r21,r7,r5 - add r8,r21,r8 - shlld r2,r0,r2 - sub r25,r5,r25 - bgtu/u r7,r25,tr0 // no_lo_adj - addi r8,1,r8 - sub r25,r7,r25 -LOCAL(no_lo_adj): - mextr4 r2,r25,r2 - - /* large_divisor: only needs a few adjustments. */ - mulu.l r8,r6,r5 - ptabs r18,tr0 - /* bubble */ - cmpgtu r5,r2,r5 - sub r8,r5,r2 - blink tr0,r63 - ENDFUNC(GLOBAL(udivdi3)) -/* Note 1: To shift the result of the second divide stage so that the result - always fits into 32 bits, yet we still reduce the rest sufficiently - would require a lot of instructions to do the shifts just right. Using - the full 64 bit shift result to multiply with the divisor would require - four extra instructions for the upper 32 bits (shift / mulu / shift / sub). - Fortunately, if the upper 32 bits of the shift result are nonzero, we - know that the rest after taking this partial result into account will - fit into 32 bits. So we just clear the upper 32 bits of the rest if the - upper 32 bits of the partial result are nonzero. */ -#endif /* __SHMEDIA__ */ -#endif /* L_udivdi3 */ - -#ifdef L_divdi3 -#ifdef __SHMEDIA__ - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 - .global GLOBAL(divdi3) - FUNC(GLOBAL(divdi3)) -GLOBAL(divdi3): - pta GLOBAL(udivdi3_internal),tr0 - shari r2,63,r22 - shari r3,63,r23 - xor r2,r22,r2 - xor r3,r23,r3 - sub r2,r22,r2 - sub r3,r23,r3 - beq/u r22,r23,tr0 - ptabs r18,tr1 - blink tr0,r18 - sub r63,r2,r2 - blink tr1,r63 - ENDFUNC(GLOBAL(divdi3)) -#endif /* __SHMEDIA__ */ -#endif /* L_divdi3 */ - -#ifdef L_umoddi3 -#ifdef __SHMEDIA__ - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 - .global GLOBAL(umoddi3) - FUNC(GLOBAL(umoddi3)) -GLOBAL(umoddi3): - HIDDEN_ALIAS(umoddi3_internal,umoddi3) - shlri r3,1,r4 - nsb r4,r22 - shlld r3,r22,r6 - shlri r6,49,r5 - movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ - sub r21,r5,r1 - mmulfx.w r1,r1,r4 - mshflo.w r1,r63,r1 - sub r63,r22,r20 // r63 == 64 % 64 - mmulfx.w r5,r4,r4 - pta LOCAL(large_divisor),tr0 - addi r20,32,r9 - msub.w r1,r4,r1 - madd.w r1,r1,r1 - mmulfx.w r1,r1,r4 - shlri r6,32,r7 - bgt/u r9,r63,tr0 // large_divisor - mmulfx.w r5,r4,r4 - shlri r2,32+14,r19 - addi r22,-31,r0 - msub.w r1,r4,r1 - - mulu.l r1,r7,r4 - addi r1,-3,r5 - mulu.l r5,r19,r5 - sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 - shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as - the case may be, %0000000000000000 000.11111111111, still */ - muls.l r1,r4,r4 /* leaving at least one sign bit. */ - mulu.l r5,r3,r5 - mshalds.l r1,r21,r1 - shari r4,26,r4 - shlld r5,r0,r5 - add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) - sub r2,r5,r2 - /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ - - shlri r2,22,r21 - mulu.l r21,r1,r21 - addi r20,30-22,r0 - /* bubble */ /* could test r3 here to check for divide by zero. */ - shlrd r21,r0,r21 - mulu.l r21,r3,r5 - mcmpgt.l r21,r63,r21 // See Note 1 - addi r20,30,r0 - mshfhi.l r63,r21,r21 - sub r2,r5,r2 - andc r2,r21,r2 - - /* small divisor: need a third divide step */ - mulu.l r2,r1,r7 - ptabs r18,tr0 - sub r2,r3,r8 /* re-use r8 here for rest - r3 */ - shlrd r7,r0,r7 - mulu.l r7,r3,r5 - /* bubble */ - addi r8,1,r7 - cmpgt r7,r5,r7 - cmvne r7,r8,r2 - sub r2,r5,r2 - blink tr0,r63 - -LOCAL(large_divisor): - mmulfx.w r5,r4,r4 - shlrd r2,r9,r25 - shlri r25,32,r8 - msub.w r1,r4,r1 - - mulu.l r1,r7,r4 - addi r1,-3,r5 - mulu.l r5,r8,r5 - sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 - shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as - the case may be, %0000000000000000 000.11111111111, still */ - muls.l r1,r4,r4 /* leaving at least one sign bit. */ - shlri r5,14-1,r8 - mulu.l r8,r7,r5 - mshalds.l r1,r21,r1 - shari r4,26,r4 - add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) - sub r25,r5,r25 - /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ - - shlri r25,22,r21 - mulu.l r21,r1,r21 - pta LOCAL(no_lo_adj),tr0 - addi r22,32,r0 - shlri r21,40,r21 - mulu.l r21,r7,r5 - add r8,r21,r8 - shlld r2,r0,r2 - sub r25,r5,r25 - bgtu/u r7,r25,tr0 // no_lo_adj - addi r8,1,r8 - sub r25,r7,r25 -LOCAL(no_lo_adj): - mextr4 r2,r25,r2 - - /* large_divisor: only needs a few adjustments. */ - mulu.l r8,r6,r5 - ptabs r18,tr0 - add r2,r6,r7 - cmpgtu r5,r2,r8 - cmvne r8,r7,r2 - sub r2,r5,r2 - shlrd r2,r22,r2 - blink tr0,r63 - ENDFUNC(GLOBAL(umoddi3)) -/* Note 1: To shift the result of the second divide stage so that the result - always fits into 32 bits, yet we still reduce the rest sufficiently - would require a lot of instructions to do the shifts just right. Using - the full 64 bit shift result to multiply with the divisor would require - four extra instructions for the upper 32 bits (shift / mulu / shift / sub). - Fortunately, if the upper 32 bits of the shift result are nonzero, we - know that the rest after taking this partial result into account will - fit into 32 bits. So we just clear the upper 32 bits of the rest if the - upper 32 bits of the partial result are nonzero. */ -#endif /* __SHMEDIA__ */ -#endif /* L_umoddi3 */ - -#ifdef L_moddi3 -#ifdef __SHMEDIA__ - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 - .global GLOBAL(moddi3) - FUNC(GLOBAL(moddi3)) -GLOBAL(moddi3): - pta GLOBAL(umoddi3_internal),tr0 - shari r2,63,r22 - shari r3,63,r23 - xor r2,r22,r2 - xor r3,r23,r3 - sub r2,r22,r2 - sub r3,r23,r3 - beq/u r22,r63,tr0 - ptabs r18,tr1 - blink tr0,r18 - sub r63,r2,r2 - blink tr1,r63 - ENDFUNC(GLOBAL(moddi3)) -#endif /* __SHMEDIA__ */ -#endif /* L_moddi3 */ - -#ifdef L_set_fpscr -#if !defined (__SH2A_NOFPU__) -#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32 -#ifdef __SH5__ - .mode SHcompact -#endif - .global GLOBAL(set_fpscr) - HIDDEN_FUNC(GLOBAL(set_fpscr)) -GLOBAL(set_fpscr): - lds r4,fpscr -#ifdef __PIC__ - mov.l r12,@-r15 -#ifdef __vxworks - mov.l LOCAL(set_fpscr_L0_base),r12 - mov.l LOCAL(set_fpscr_L0_index),r0 - mov.l @r12,r12 - mov.l @(r0,r12),r12 -#else - mova LOCAL(set_fpscr_L0),r0 - mov.l LOCAL(set_fpscr_L0),r12 - add r0,r12 -#endif - mov.l LOCAL(set_fpscr_L1),r0 - mov.l @(r0,r12),r1 - mov.l @r15+,r12 -#else - mov.l LOCAL(set_fpscr_L1),r1 -#endif - swap.w r4,r0 - or #24,r0 -#ifndef FMOVD_WORKS - xor #16,r0 -#endif -#if defined(__SH4__) || defined (__SH2A_DOUBLE__) - swap.w r0,r3 - mov.l r3,@(4,r1) -#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ - swap.w r0,r2 - mov.l r2,@r1 -#endif -#ifndef FMOVD_WORKS - xor #8,r0 -#else - xor #24,r0 -#endif -#if defined(__SH4__) || defined (__SH2A_DOUBLE__) - swap.w r0,r2 - rts - mov.l r2,@r1 -#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ - swap.w r0,r3 - rts - mov.l r3,@(4,r1) -#endif - .align 2 -#ifdef __PIC__ -#ifdef __vxworks -LOCAL(set_fpscr_L0_base): - .long ___GOTT_BASE__ -LOCAL(set_fpscr_L0_index): - .long ___GOTT_INDEX__ -#else -LOCAL(set_fpscr_L0): - .long _GLOBAL_OFFSET_TABLE_ -#endif -LOCAL(set_fpscr_L1): - .long GLOBAL(fpscr_values@GOT) -#else -LOCAL(set_fpscr_L1): - .long GLOBAL(fpscr_values) -#endif - - ENDFUNC(GLOBAL(set_fpscr)) -#ifndef NO_FPSCR_VALUES -#ifdef __ELF__ - .comm GLOBAL(fpscr_values),8,4 -#else - .comm GLOBAL(fpscr_values),8 -#endif /* ELF */ -#endif /* NO_FPSCR_VALUES */ -#endif /* SH2E / SH3E / SH4 */ -#endif /* __SH2A_NOFPU__ */ -#endif /* L_set_fpscr */ -#ifdef L_ic_invalidate -#if __SH5__ == 32 - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 - .global GLOBAL(init_trampoline) - HIDDEN_FUNC(GLOBAL(init_trampoline)) -GLOBAL(init_trampoline): - st.l r0,8,r2 -#ifdef __LITTLE_ENDIAN__ - movi 9,r20 - shori 0x402b,r20 - shori 0xd101,r20 - shori 0xd002,r20 -#else - movi 0xffffffffffffd002,r20 - shori 0xd101,r20 - shori 0x402b,r20 - shori 9,r20 -#endif - st.q r0,0,r20 - st.l r0,12,r3 - ENDFUNC(GLOBAL(init_trampoline)) - .global GLOBAL(ic_invalidate) - HIDDEN_FUNC(GLOBAL(ic_invalidate)) -GLOBAL(ic_invalidate): - ocbwb r0,0 - synco - icbi r0, 0 - ptabs r18, tr0 - synci - blink tr0, r63 - ENDFUNC(GLOBAL(ic_invalidate)) -#elif defined(__SH4A__) - .global GLOBAL(ic_invalidate) - HIDDEN_FUNC(GLOBAL(ic_invalidate)) -GLOBAL(ic_invalidate): - ocbwb @r4 - synco - icbi @r4 - rts - nop - ENDFUNC(GLOBAL(ic_invalidate)) -#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) - /* For system code, we use ic_invalidate_line_i, but user code - needs a different mechanism. A kernel call is generally not - available, and it would also be slow. Different SH4 variants use - different sizes and associativities of the Icache. We use a small - bit of dispatch code that can be put hidden in every shared object, - which calls the actual processor-specific invalidation code in a - separate module. - Or if you have operating system support, the OS could mmap the - procesor-specific code from a single page, since it is highly - repetitive. */ - .global GLOBAL(ic_invalidate) - HIDDEN_FUNC(GLOBAL(ic_invalidate)) -GLOBAL(ic_invalidate): -#ifdef __pic__ -#ifdef __vxworks - mov.l 1f,r1 - mov.l 2f,r0 - mov.l @r1,r1 - mov.l 0f,r2 - mov.l @(r0,r1),r0 -#else - mov.l 1f,r1 - mova 1f,r0 - mov.l 0f,r2 - add r1,r0 -#endif - mov.l @(r0,r2),r1 -#else - mov.l 0f,r1 -#endif - ocbwb @r4 - mov.l @(8,r1),r0 - sub r1,r4 - and r4,r0 - add r1,r0 - jmp @r0 - mov.l @(4,r1),r0 - .align 2 -#ifndef __pic__ -0: .long GLOBAL(ic_invalidate_array) -#else /* __pic__ */ - .global GLOBAL(ic_invalidate_array) -0: .long GLOBAL(ic_invalidate_array)@GOT -#ifdef __vxworks -1: .long ___GOTT_BASE__ -2: .long ___GOTT_INDEX__ -#else -1: .long _GLOBAL_OFFSET_TABLE_ -#endif - ENDFUNC(GLOBAL(ic_invalidate)) -#endif /* __pic__ */ -#endif /* SH4 */ -#endif /* L_ic_invalidate */ - -#ifdef L_ic_invalidate_array -#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)))) - .global GLOBAL(ic_invalidate_array) - /* This is needed when an SH4 dso with trampolines is used on SH4A. */ - .global GLOBAL(ic_invalidate_array) - FUNC(GLOBAL(ic_invalidate_array)) -GLOBAL(ic_invalidate_array): - add r1,r4 - synco - icbi @r4 - rts - nop - .align 2 - .long 0 - ENDFUNC(GLOBAL(ic_invalidate_array)) -#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) - .global GLOBAL(ic_invalidate_array) - .p2align 5 - FUNC(GLOBAL(ic_invalidate_array)) -/* This must be aligned to the beginning of a cache line. */ -GLOBAL(ic_invalidate_array): -#ifndef WAYS -#define WAYS 4 -#define WAY_SIZE 0x4000 -#endif -#if WAYS == 1 - .rept WAY_SIZE * WAYS / 32 - rts - nop - .rept 7 - .long WAY_SIZE - 32 - .endr - .endr -#elif WAYS <= 6 - .rept WAY_SIZE * WAYS / 32 - braf r0 - add #-8,r0 - .long WAY_SIZE + 8 - .long WAY_SIZE - 32 - .rept WAYS-2 - braf r0 - nop - .endr - .rept 7 - WAYS - rts - nop - .endr - .endr -#else /* WAYS > 6 */ - /* This variant needs two different pages for mmap-ing. */ - .rept WAYS-1 - .rept WAY_SIZE / 32 - braf r0 - nop - .long WAY_SIZE - .rept 6 - .long WAY_SIZE - 32 - .endr - .endr - .endr - .rept WAY_SIZE / 32 - rts - .rept 15 - nop - .endr - .endr -#endif /* WAYS */ - ENDFUNC(GLOBAL(ic_invalidate_array)) -#endif /* SH4 */ -#endif /* L_ic_invalidate_array */ - -#if defined (__SH5__) && __SH5__ == 32 -#ifdef L_shcompact_call_trampoline - .section .rodata - .align 1 -LOCAL(ct_main_table): -.word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label) - .mode SHmedia - .section .text..SHmedia32, "ax" - .align 2 - - /* This function loads 64-bit general-purpose registers from the - stack, from a memory address contained in them or from an FP - register, according to a cookie passed in r1. Its execution - time is linear on the number of registers that actually have - to be copied. See sh.h for details on the actual bit pattern. - - The function to be called is passed in r0. If a 32-bit return - value is expected, the actual function will be tail-called, - otherwise the return address will be stored in r10 (that the - caller should expect to be clobbered) and the return value - will be expanded into r2/r3 upon return. */ - - .global GLOBAL(GCC_shcompact_call_trampoline) - FUNC(GLOBAL(GCC_shcompact_call_trampoline)) -GLOBAL(GCC_shcompact_call_trampoline): - ptabs/l r0, tr0 /* Prepare to call the actual function. */ - movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0 - pt/l LOCAL(ct_loop), tr1 - addz.l r1, r63, r1 - shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0 -LOCAL(ct_loop): - nsb r1, r28 - shlli r28, 1, r29 - ldx.w r0, r29, r30 -LOCAL(ct_main_label): - ptrel/l r30, tr2 - blink tr2, r63 -LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */ - /* It must be dr0, so just do it. */ - fmov.dq dr0, r2 - movi 7, r30 - shlli r30, 29, r31 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */ - /* It is either dr0 or dr2. */ - movi 7, r30 - shlri r1, 26, r32 - shlli r30, 26, r31 - andc r1, r31, r1 - fmov.dq dr0, r3 - beqi/l r32, 4, tr1 - fmov.dq dr2, r3 - blink tr1, r63 -LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */ - shlri r1, 23 - 3, r34 - andi r34, 3 << 3, r33 - addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32 -LOCAL(ct_r4_fp_base): - ptrel/l r32, tr2 - movi 7, r30 - shlli r30, 23, r31 - andc r1, r31, r1 - blink tr2, r63 -LOCAL(ct_r4_fp_copy): - fmov.dq dr0, r4 - blink tr1, r63 - fmov.dq dr2, r4 - blink tr1, r63 - fmov.dq dr4, r4 - blink tr1, r63 -LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */ - shlri r1, 20 - 3, r34 - andi r34, 3 << 3, r33 - addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32 -LOCAL(ct_r5_fp_base): - ptrel/l r32, tr2 - movi 7, r30 - shlli r30, 20, r31 - andc r1, r31, r1 - blink tr2, r63 -LOCAL(ct_r5_fp_copy): - fmov.dq dr0, r5 - blink tr1, r63 - fmov.dq dr2, r5 - blink tr1, r63 - fmov.dq dr4, r5 - blink tr1, r63 - fmov.dq dr6, r5 - blink tr1, r63 -LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */ - /* It must be dr8. */ - fmov.dq dr8, r6 - movi 15, r30 - shlli r30, 16, r31 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */ - shlri r1, 16 - 3, r34 - andi r34, 3 << 3, r33 - addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32 -LOCAL(ct_r6_fp_base): - ptrel/l r32, tr2 - movi 7, r30 - shlli r30, 16, r31 - andc r1, r31, r1 - blink tr2, r63 -LOCAL(ct_r6_fp_copy): - fmov.dq dr0, r6 - blink tr1, r63 - fmov.dq dr2, r6 - blink tr1, r63 - fmov.dq dr4, r6 - blink tr1, r63 - fmov.dq dr6, r6 - blink tr1, r63 -LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */ - /* It is either dr8 or dr10. */ - movi 15 << 12, r31 - shlri r1, 12, r32 - andc r1, r31, r1 - fmov.dq dr8, r7 - beqi/l r32, 8, tr1 - fmov.dq dr10, r7 - blink tr1, r63 -LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */ - shlri r1, 12 - 3, r34 - andi r34, 3 << 3, r33 - addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32 -LOCAL(ct_r7_fp_base): - ptrel/l r32, tr2 - movi 7 << 12, r31 - andc r1, r31, r1 - blink tr2, r63 -LOCAL(ct_r7_fp_copy): - fmov.dq dr0, r7 - blink tr1, r63 - fmov.dq dr2, r7 - blink tr1, r63 - fmov.dq dr4, r7 - blink tr1, r63 - fmov.dq dr6, r7 - blink tr1, r63 -LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */ - /* It is either dr8 or dr10. */ - movi 15 << 8, r31 - andi r1, 1 << 8, r32 - andc r1, r31, r1 - fmov.dq dr8, r8 - beq/l r32, r63, tr1 - fmov.dq dr10, r8 - blink tr1, r63 -LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */ - shlri r1, 8 - 3, r34 - andi r34, 3 << 3, r33 - addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32 -LOCAL(ct_r8_fp_base): - ptrel/l r32, tr2 - movi 7 << 8, r31 - andc r1, r31, r1 - blink tr2, r63 -LOCAL(ct_r8_fp_copy): - fmov.dq dr0, r8 - blink tr1, r63 - fmov.dq dr2, r8 - blink tr1, r63 - fmov.dq dr4, r8 - blink tr1, r63 - fmov.dq dr6, r8 - blink tr1, r63 -LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */ - /* It is either dr8 or dr10. */ - movi 15 << 4, r31 - andi r1, 1 << 4, r32 - andc r1, r31, r1 - fmov.dq dr8, r9 - beq/l r32, r63, tr1 - fmov.dq dr10, r9 - blink tr1, r63 -LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */ - shlri r1, 4 - 3, r34 - andi r34, 3 << 3, r33 - addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32 -LOCAL(ct_r9_fp_base): - ptrel/l r32, tr2 - movi 7 << 4, r31 - andc r1, r31, r1 - blink tr2, r63 -LOCAL(ct_r9_fp_copy): - fmov.dq dr0, r9 - blink tr1, r63 - fmov.dq dr2, r9 - blink tr1, r63 - fmov.dq dr4, r9 - blink tr1, r63 - fmov.dq dr6, r9 - blink tr1, r63 -LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */ - pt/l LOCAL(ct_r2_load), tr2 - movi 3, r30 - shlli r30, 29, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r2, 8, r3 - ldx.q r2, r63, r2 - /* Fall through. */ -LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */ - pt/l LOCAL(ct_r3_load), tr2 - movi 3, r30 - shlli r30, 26, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r3, 8, r4 - ldx.q r3, r63, r3 -LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */ - pt/l LOCAL(ct_r4_load), tr2 - movi 3, r30 - shlli r30, 23, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r4, 8, r5 - ldx.q r4, r63, r4 -LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */ - pt/l LOCAL(ct_r5_load), tr2 - movi 3, r30 - shlli r30, 20, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r5, 8, r6 - ldx.q r5, r63, r5 -LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */ - pt/l LOCAL(ct_r6_load), tr2 - movi 3 << 16, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r6, 8, r7 - ldx.q r6, r63, r6 -LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */ - pt/l LOCAL(ct_r7_load), tr2 - movi 3 << 12, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r7, 8, r8 - ldx.q r7, r63, r7 -LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */ - pt/l LOCAL(ct_r8_load), tr2 - movi 3 << 8, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r8, 8, r9 - ldx.q r8, r63, r8 -LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */ - pt/l LOCAL(ct_check_tramp), tr2 - ldx.q r9, r63, r9 - blink tr2, r63 -LOCAL(ct_r2_load): - ldx.q r2, r63, r2 - blink tr1, r63 -LOCAL(ct_r3_load): - ldx.q r3, r63, r3 - blink tr1, r63 -LOCAL(ct_r4_load): - ldx.q r4, r63, r4 - blink tr1, r63 -LOCAL(ct_r5_load): - ldx.q r5, r63, r5 - blink tr1, r63 -LOCAL(ct_r6_load): - ldx.q r6, r63, r6 - blink tr1, r63 -LOCAL(ct_r7_load): - ldx.q r7, r63, r7 - blink tr1, r63 -LOCAL(ct_r8_load): - ldx.q r8, r63, r8 - blink tr1, r63 -LOCAL(ct_r2_pop): /* Pop r2 from the stack. */ - movi 1, r30 - ldx.q r15, r63, r2 - shlli r30, 29, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r3_pop): /* Pop r3 from the stack. */ - movi 1, r30 - ldx.q r15, r63, r3 - shlli r30, 26, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r4_pop): /* Pop r4 from the stack. */ - movi 1, r30 - ldx.q r15, r63, r4 - shlli r30, 23, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r5_pop): /* Pop r5 from the stack. */ - movi 1, r30 - ldx.q r15, r63, r5 - shlli r30, 20, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r6_pop): /* Pop r6 from the stack. */ - movi 1, r30 - ldx.q r15, r63, r6 - shlli r30, 16, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r7_pop): /* Pop r7 from the stack. */ - ldx.q r15, r63, r7 - movi 1 << 12, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r8_pop): /* Pop r8 from the stack. */ - ldx.q r15, r63, r8 - movi 1 << 8, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */ - andi r1, 7 << 1, r30 - movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32 - shlli r30, 2, r31 - shori LOCAL(ct_end_of_pop_seq) & 65535, r32 - sub.l r32, r31, r33 - ptabs/l r33, tr2 - blink tr2, r63 -LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */ - ldx.q r15, r63, r3 - addi.l r15, 8, r15 - ldx.q r15, r63, r4 - addi.l r15, 8, r15 - ldx.q r15, r63, r5 - addi.l r15, 8, r15 - ldx.q r15, r63, r6 - addi.l r15, 8, r15 - ldx.q r15, r63, r7 - addi.l r15, 8, r15 - ldx.q r15, r63, r8 - addi.l r15, 8, r15 -LOCAL(ct_r9_pop): /* Pop r9 from the stack. */ - ldx.q r15, r63, r9 - addi.l r15, 8, r15 -LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */ -LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */ - pt/u LOCAL(ct_ret_wide), tr2 - andi r1, 1, r1 - bne/u r1, r63, tr2 -LOCAL(ct_call_func): /* Just branch to the function. */ - blink tr0, r63 -LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its - 64-bit return value. */ - add.l r18, r63, r10 - blink tr0, r18 - ptabs r10, tr0 -#if __LITTLE_ENDIAN__ - shari r2, 32, r3 - add.l r2, r63, r2 -#else - add.l r2, r63, r3 - shari r2, 32, r2 -#endif - blink tr0, r63 - - ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline)) -#endif /* L_shcompact_call_trampoline */ - -#ifdef L_shcompact_return_trampoline - /* This function does the converse of the code in `ret_wide' - above. It is tail-called by SHcompact functions returning - 64-bit non-floating-point values, to pack the 32-bit values in - r2 and r3 into r2. */ - - .mode SHmedia - .section .text..SHmedia32, "ax" - .align 2 - .global GLOBAL(GCC_shcompact_return_trampoline) - HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline)) -GLOBAL(GCC_shcompact_return_trampoline): - ptabs/l r18, tr0 -#if __LITTLE_ENDIAN__ - addz.l r2, r63, r2 - shlli r3, 32, r3 -#else - addz.l r3, r63, r3 - shlli r2, 32, r2 -#endif - or r3, r2, r2 - blink tr0, r63 - - ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline)) -#endif /* L_shcompact_return_trampoline */ - -#ifdef L_shcompact_incoming_args - .section .rodata - .align 1 -LOCAL(ia_main_table): -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) - .mode SHmedia - .section .text..SHmedia32, "ax" - .align 2 - - /* This function stores 64-bit general-purpose registers back in - the stack, and loads the address in which each register - was stored into itself. The lower 32 bits of r17 hold the address - to begin storing, and the upper 32 bits of r17 hold the cookie. - Its execution time is linear on the - number of registers that actually have to be copied, and it is - optimized for structures larger than 64 bits, as opposed to - individual `long long' arguments. See sh.h for details on the - actual bit pattern. */ - - .global GLOBAL(GCC_shcompact_incoming_args) - FUNC(GLOBAL(GCC_shcompact_incoming_args)) -GLOBAL(GCC_shcompact_incoming_args): - ptabs/l r18, tr0 /* Prepare to return. */ - shlri r17, 32, r0 /* Load the cookie. */ - movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43 - pt/l LOCAL(ia_loop), tr1 - add.l r17, r63, r17 - shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43 -LOCAL(ia_loop): - nsb r0, r36 - shlli r36, 1, r37 - ldx.w r43, r37, r38 -LOCAL(ia_main_label): - ptrel/l r38, tr2 - blink tr2, r63 -LOCAL(ia_r2_ld): /* Store r2 and load its address. */ - movi 3, r38 - shlli r38, 29, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r2 - add.l r17, r63, r2 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r3_ld): /* Store r3 and load its address. */ - movi 3, r38 - shlli r38, 26, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r3 - add.l r17, r63, r3 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r4_ld): /* Store r4 and load its address. */ - movi 3, r38 - shlli r38, 23, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r4 - add.l r17, r63, r4 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r5_ld): /* Store r5 and load its address. */ - movi 3, r38 - shlli r38, 20, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r5 - add.l r17, r63, r5 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r6_ld): /* Store r6 and load its address. */ - movi 3, r38 - shlli r38, 16, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r6 - add.l r17, r63, r6 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r7_ld): /* Store r7 and load its address. */ - movi 3 << 12, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r7 - add.l r17, r63, r7 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r8_ld): /* Store r8 and load its address. */ - movi 3 << 8, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r8 - add.l r17, r63, r8 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r9_ld): /* Store r9 and load its address. */ - stx.q r17, r63, r9 - add.l r17, r63, r9 - blink tr0, r63 -LOCAL(ia_r2_push): /* Push r2 onto the stack. */ - movi 1, r38 - shlli r38, 29, r39 - andc r0, r39, r0 - stx.q r17, r63, r2 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_r3_push): /* Push r3 onto the stack. */ - movi 1, r38 - shlli r38, 26, r39 - andc r0, r39, r0 - stx.q r17, r63, r3 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_r4_push): /* Push r4 onto the stack. */ - movi 1, r38 - shlli r38, 23, r39 - andc r0, r39, r0 - stx.q r17, r63, r4 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_r5_push): /* Push r5 onto the stack. */ - movi 1, r38 - shlli r38, 20, r39 - andc r0, r39, r0 - stx.q r17, r63, r5 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_r6_push): /* Push r6 onto the stack. */ - movi 1, r38 - shlli r38, 16, r39 - andc r0, r39, r0 - stx.q r17, r63, r6 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_r7_push): /* Push r7 onto the stack. */ - movi 1 << 12, r39 - andc r0, r39, r0 - stx.q r17, r63, r7 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_r8_push): /* Push r8 onto the stack. */ - movi 1 << 8, r39 - andc r0, r39, r0 - stx.q r17, r63, r8 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */ - andi r0, 7 << 1, r38 - movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40 - shlli r38, 2, r39 - shori LOCAL(ia_end_of_push_seq) & 65535, r40 - sub.l r40, r39, r41 - ptabs/l r41, tr2 - blink tr2, r63 -LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */ - stx.q r17, r63, r3 - addi.l r17, 8, r17 - stx.q r17, r63, r4 - addi.l r17, 8, r17 - stx.q r17, r63, r5 - addi.l r17, 8, r17 - stx.q r17, r63, r6 - addi.l r17, 8, r17 - stx.q r17, r63, r7 - addi.l r17, 8, r17 - stx.q r17, r63, r8 - addi.l r17, 8, r17 -LOCAL(ia_r9_push): /* Push r9 onto the stack. */ - stx.q r17, r63, r9 -LOCAL(ia_return): /* Return. */ - blink tr0, r63 -LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */ - ENDFUNC(GLOBAL(GCC_shcompact_incoming_args)) -#endif /* L_shcompact_incoming_args */ -#endif -#if __SH5__ -#ifdef L_nested_trampoline -#if __SH5__ == 32 - .section .text..SHmedia32,"ax" -#else - .text -#endif - .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */ - .global GLOBAL(GCC_nested_trampoline) - HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline)) -GLOBAL(GCC_nested_trampoline): - .mode SHmedia - ptrel/u r63, tr0 - gettr tr0, r0 -#if __SH5__ == 64 - ld.q r0, 24, r1 -#else - ld.l r0, 24, r1 -#endif - ptabs/l r1, tr1 -#if __SH5__ == 64 - ld.q r0, 32, r1 -#else - ld.l r0, 28, r1 -#endif - blink tr1, r63 - - ENDFUNC(GLOBAL(GCC_nested_trampoline)) -#endif /* L_nested_trampoline */ -#endif /* __SH5__ */ -#if __SH5__ == 32 -#ifdef L_push_pop_shmedia_regs - .section .text..SHmedia32,"ax" - .mode SHmedia - .align 2 -#ifndef __SH4_NOFPU__ - .global GLOBAL(GCC_push_shmedia_regs) - FUNC(GLOBAL(GCC_push_shmedia_regs)) -GLOBAL(GCC_push_shmedia_regs): - addi.l r15, -14*8, r15 - fst.d r15, 13*8, dr62 - fst.d r15, 12*8, dr60 - fst.d r15, 11*8, dr58 - fst.d r15, 10*8, dr56 - fst.d r15, 9*8, dr54 - fst.d r15, 8*8, dr52 - fst.d r15, 7*8, dr50 - fst.d r15, 6*8, dr48 - fst.d r15, 5*8, dr46 - fst.d r15, 4*8, dr44 - fst.d r15, 3*8, dr42 - fst.d r15, 2*8, dr40 - fst.d r15, 1*8, dr38 - fst.d r15, 0*8, dr36 -#else /* ! __SH4_NOFPU__ */ - .global GLOBAL(GCC_push_shmedia_regs_nofpu) - FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) -GLOBAL(GCC_push_shmedia_regs_nofpu): -#endif /* ! __SH4_NOFPU__ */ - ptabs/l r18, tr0 - addi.l r15, -27*8, r15 - gettr tr7, r62 - gettr tr6, r61 - gettr tr5, r60 - st.q r15, 26*8, r62 - st.q r15, 25*8, r61 - st.q r15, 24*8, r60 - st.q r15, 23*8, r59 - st.q r15, 22*8, r58 - st.q r15, 21*8, r57 - st.q r15, 20*8, r56 - st.q r15, 19*8, r55 - st.q r15, 18*8, r54 - st.q r15, 17*8, r53 - st.q r15, 16*8, r52 - st.q r15, 15*8, r51 - st.q r15, 14*8, r50 - st.q r15, 13*8, r49 - st.q r15, 12*8, r48 - st.q r15, 11*8, r47 - st.q r15, 10*8, r46 - st.q r15, 9*8, r45 - st.q r15, 8*8, r44 - st.q r15, 7*8, r35 - st.q r15, 6*8, r34 - st.q r15, 5*8, r33 - st.q r15, 4*8, r32 - st.q r15, 3*8, r31 - st.q r15, 2*8, r30 - st.q r15, 1*8, r29 - st.q r15, 0*8, r28 - blink tr0, r63 -#ifndef __SH4_NOFPU__ - ENDFUNC(GLOBAL(GCC_push_shmedia_regs)) -#else - ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) -#endif -#ifndef __SH4_NOFPU__ - .global GLOBAL(GCC_pop_shmedia_regs) - FUNC(GLOBAL(GCC_pop_shmedia_regs)) -GLOBAL(GCC_pop_shmedia_regs): - pt .L0, tr1 - movi 41*8, r0 - fld.d r15, 40*8, dr62 - fld.d r15, 39*8, dr60 - fld.d r15, 38*8, dr58 - fld.d r15, 37*8, dr56 - fld.d r15, 36*8, dr54 - fld.d r15, 35*8, dr52 - fld.d r15, 34*8, dr50 - fld.d r15, 33*8, dr48 - fld.d r15, 32*8, dr46 - fld.d r15, 31*8, dr44 - fld.d r15, 30*8, dr42 - fld.d r15, 29*8, dr40 - fld.d r15, 28*8, dr38 - fld.d r15, 27*8, dr36 - blink tr1, r63 -#else /* ! __SH4_NOFPU__ */ - .global GLOBAL(GCC_pop_shmedia_regs_nofpu) - FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) -GLOBAL(GCC_pop_shmedia_regs_nofpu): -#endif /* ! __SH4_NOFPU__ */ - movi 27*8, r0 -.L0: - ptabs r18, tr0 - ld.q r15, 26*8, r62 - ld.q r15, 25*8, r61 - ld.q r15, 24*8, r60 - ptabs r62, tr7 - ptabs r61, tr6 - ptabs r60, tr5 - ld.q r15, 23*8, r59 - ld.q r15, 22*8, r58 - ld.q r15, 21*8, r57 - ld.q r15, 20*8, r56 - ld.q r15, 19*8, r55 - ld.q r15, 18*8, r54 - ld.q r15, 17*8, r53 - ld.q r15, 16*8, r52 - ld.q r15, 15*8, r51 - ld.q r15, 14*8, r50 - ld.q r15, 13*8, r49 - ld.q r15, 12*8, r48 - ld.q r15, 11*8, r47 - ld.q r15, 10*8, r46 - ld.q r15, 9*8, r45 - ld.q r15, 8*8, r44 - ld.q r15, 7*8, r35 - ld.q r15, 6*8, r34 - ld.q r15, 5*8, r33 - ld.q r15, 4*8, r32 - ld.q r15, 3*8, r31 - ld.q r15, 2*8, r30 - ld.q r15, 1*8, r29 - ld.q r15, 0*8, r28 - add.l r15, r0, r15 - blink tr0, r63 - -#ifndef __SH4_NOFPU__ - ENDFUNC(GLOBAL(GCC_pop_shmedia_regs)) -#else - ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) -#endif -#endif /* __SH5__ == 32 */ -#endif /* L_push_pop_shmedia_regs */ - -#ifdef L_div_table -#if __SH5__ -#if defined(__pic__) && defined(__SHMEDIA__) - .global GLOBAL(sdivsi3) - FUNC(GLOBAL(sdivsi3)) -#if __SH5__ == 32 - .section .text..SHmedia32,"ax" -#else - .text -#endif -#if 0 -/* ??? FIXME: Presumably due to a linker bug, exporting data symbols - in a text section does not work (at least for shared libraries): - the linker sets the LSB of the address as if this was SHmedia code. */ -#define TEXT_DATA_BUG -#endif - .align 2 - // inputs: r4,r5 - // clobbered: r1,r18,r19,r20,r21,r25,tr0 - // result in r0 - .global GLOBAL(sdivsi3) -GLOBAL(sdivsi3): -#ifdef TEXT_DATA_BUG - ptb datalabel Local_div_table,tr0 -#else - ptb GLOBAL(div_table_internal),tr0 -#endif - nsb r5, r1 - shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 - shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) - /* bubble */ - gettr tr0,r20 - ldx.ub r20, r21, r19 // u0.8 - shari r25, 32, r25 // normalize to s2.30 - shlli r21, 1, r21 - muls.l r25, r19, r19 // s2.38 - ldx.w r20, r21, r21 // s2.14 - ptabs r18, tr0 - shari r19, 24, r19 // truncate to s2.14 - sub r21, r19, r19 // some 11 bit inverse in s1.14 - muls.l r19, r19, r21 // u0.28 - sub r63, r1, r1 - addi r1, 92, r1 - muls.l r25, r21, r18 // s2.58 - shlli r19, 45, r19 // multiply by two and convert to s2.58 - /* bubble */ - sub r19, r18, r18 - shari r18, 28, r18 // some 22 bit inverse in s1.30 - muls.l r18, r25, r0 // s2.60 - muls.l r18, r4, r25 // s32.30 - /* bubble */ - shari r0, 16, r19 // s-16.44 - muls.l r19, r18, r19 // s-16.74 - shari r25, 63, r0 - shari r4, 14, r18 // s19.-14 - shari r19, 30, r19 // s-16.44 - muls.l r19, r18, r19 // s15.30 - xor r21, r0, r21 // You could also use the constant 1 << 27. - add r21, r25, r21 - sub r21, r19, r21 - shard r21, r1, r21 - sub r21, r0, r0 - blink tr0, r63 - ENDFUNC(GLOBAL(sdivsi3)) -/* This table has been generated by divtab.c . -Defects for bias -330: - Max defect: 6.081536e-07 at -1.000000e+00 - Min defect: 2.849516e-08 at 1.030651e+00 - Max 2nd step defect: 9.606539e-12 at -1.000000e+00 - Min 2nd step defect: 0.000000e+00 at 0.000000e+00 - Defect at 1: 1.238659e-07 - Defect at -2: 1.061708e-07 */ -#else /* ! __pic__ || ! __SHMEDIA__ */ - .section .rodata -#endif /* __pic__ */ -#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__) - .balign 2 - .type Local_div_table,@object - .size Local_div_table,128 -/* negative division constants */ - .word -16638 - .word -17135 - .word -17737 - .word -18433 - .word -19103 - .word -19751 - .word -20583 - .word -21383 - .word -22343 - .word -23353 - .word -24407 - .word -25582 - .word -26863 - .word -28382 - .word -29965 - .word -31800 -/* negative division factors */ - .byte 66 - .byte 70 - .byte 75 - .byte 81 - .byte 87 - .byte 93 - .byte 101 - .byte 109 - .byte 119 - .byte 130 - .byte 142 - .byte 156 - .byte 172 - .byte 192 - .byte 214 - .byte 241 - .skip 16 -Local_div_table: - .skip 16 -/* positive division factors */ - .byte 241 - .byte 214 - .byte 192 - .byte 172 - .byte 156 - .byte 142 - .byte 130 - .byte 119 - .byte 109 - .byte 101 - .byte 93 - .byte 87 - .byte 81 - .byte 75 - .byte 70 - .byte 66 -/* positive division constants */ - .word 31801 - .word 29966 - .word 28383 - .word 26864 - .word 25583 - .word 24408 - .word 23354 - .word 22344 - .word 21384 - .word 20584 - .word 19752 - .word 19104 - .word 18434 - .word 17738 - .word 17136 - .word 16639 - .section .rodata -#endif /* TEXT_DATA_BUG */ - .balign 2 - .type GLOBAL(div_table),@object - .size GLOBAL(div_table),128 -/* negative division constants */ - .word -16638 - .word -17135 - .word -17737 - .word -18433 - .word -19103 - .word -19751 - .word -20583 - .word -21383 - .word -22343 - .word -23353 - .word -24407 - .word -25582 - .word -26863 - .word -28382 - .word -29965 - .word -31800 -/* negative division factors */ - .byte 66 - .byte 70 - .byte 75 - .byte 81 - .byte 87 - .byte 93 - .byte 101 - .byte 109 - .byte 119 - .byte 130 - .byte 142 - .byte 156 - .byte 172 - .byte 192 - .byte 214 - .byte 241 - .skip 16 - .global GLOBAL(div_table) -GLOBAL(div_table): - HIDDEN_ALIAS(div_table_internal,div_table) - .skip 16 -/* positive division factors */ - .byte 241 - .byte 214 - .byte 192 - .byte 172 - .byte 156 - .byte 142 - .byte 130 - .byte 119 - .byte 109 - .byte 101 - .byte 93 - .byte 87 - .byte 81 - .byte 75 - .byte 70 - .byte 66 -/* positive division constants */ - .word 31801 - .word 29966 - .word 28383 - .word 26864 - .word 25583 - .word 24408 - .word 23354 - .word 22344 - .word 21384 - .word 20584 - .word 19752 - .word 19104 - .word 18434 - .word 17738 - .word 17136 - .word 16639 - -#elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__) -/* This code used shld, thus is not suitable for SH1 / SH2. */ - -/* Signed / unsigned division without use of FPU, optimized for SH4. - Uses a lookup table for divisors in the range -128 .. +128, and - div1 with case distinction for larger divisors in three more ranges. - The code is lumped together with the table to allow the use of mova. */ -#ifdef __LITTLE_ENDIAN__ -#define L_LSB 0 -#define L_LSWMSB 1 -#define L_MSWLSB 2 -#else -#define L_LSB 3 -#define L_LSWMSB 2 -#define L_MSWLSB 1 -#endif - - .balign 4 - .global GLOBAL(udivsi3_i4i) - FUNC(GLOBAL(udivsi3_i4i)) -GLOBAL(udivsi3_i4i): - mov.w LOCAL(c128_w), r1 - div0u - mov r4,r0 - shlr8 r0 - cmp/hi r1,r5 - extu.w r5,r1 - bf LOCAL(udiv_le128) - cmp/eq r5,r1 - bf LOCAL(udiv_ge64k) - shlr r0 - mov r5,r1 - shll16 r5 - mov.l r4,@-r15 - div1 r5,r0 - mov.l r1,@-r15 - div1 r5,r0 - div1 r5,r0 - bra LOCAL(udiv_25) - div1 r5,r0 - -LOCAL(div_le128): - mova LOCAL(div_table_ix),r0 - bra LOCAL(div_le128_2) - mov.b @(r0,r5),r1 -LOCAL(udiv_le128): - mov.l r4,@-r15 - mova LOCAL(div_table_ix),r0 - mov.b @(r0,r5),r1 - mov.l r5,@-r15 -LOCAL(div_le128_2): - mova LOCAL(div_table_inv),r0 - mov.l @(r0,r1),r1 - mov r5,r0 - tst #0xfe,r0 - mova LOCAL(div_table_clz),r0 - dmulu.l r1,r4 - mov.b @(r0,r5),r1 - bt/s LOCAL(div_by_1) - mov r4,r0 - mov.l @r15+,r5 - sts mach,r0 - /* clrt */ - addc r4,r0 - mov.l @r15+,r4 - rotcr r0 - rts - shld r1,r0 - -LOCAL(div_by_1_neg): - neg r4,r0 -LOCAL(div_by_1): - mov.l @r15+,r5 - rts - mov.l @r15+,r4 - -LOCAL(div_ge64k): - bt/s LOCAL(div_r8) - div0u - shll8 r5 - bra LOCAL(div_ge64k_2) - div1 r5,r0 -LOCAL(udiv_ge64k): - cmp/hi r0,r5 - mov r5,r1 - bt LOCAL(udiv_r8) - shll8 r5 - mov.l r4,@-r15 - div1 r5,r0 - mov.l r1,@-r15 -LOCAL(div_ge64k_2): - div1 r5,r0 - mov.l LOCAL(zero_l),r1 - .rept 4 - div1 r5,r0 - .endr - mov.l r1,@-r15 - div1 r5,r0 - mov.w LOCAL(m256_w),r1 - div1 r5,r0 - mov.b r0,@(L_LSWMSB,r15) - xor r4,r0 - and r1,r0 - bra LOCAL(div_ge64k_end) - xor r4,r0 - -LOCAL(div_r8): - shll16 r4 - bra LOCAL(div_r8_2) - shll8 r4 -LOCAL(udiv_r8): - mov.l r4,@-r15 - shll16 r4 - clrt - shll8 r4 - mov.l r5,@-r15 -LOCAL(div_r8_2): - rotcl r4 - mov r0,r1 - div1 r5,r1 - mov r4,r0 - rotcl r0 - mov r5,r4 - div1 r5,r1 - .rept 5 - rotcl r0; div1 r5,r1 - .endr - rotcl r0 - mov.l @r15+,r5 - div1 r4,r1 - mov.l @r15+,r4 - rts - rotcl r0 - - ENDFUNC(GLOBAL(udivsi3_i4i)) - - .global GLOBAL(sdivsi3_i4i) - FUNC(GLOBAL(sdivsi3_i4i)) - /* This is link-compatible with a GLOBAL(sdivsi3) call, - but we effectively clobber only r1. */ -GLOBAL(sdivsi3_i4i): - mov.l r4,@-r15 - cmp/pz r5 - mov.w LOCAL(c128_w), r1 - bt/s LOCAL(pos_divisor) - cmp/pz r4 - mov.l r5,@-r15 - neg r5,r5 - bt/s LOCAL(neg_result) - cmp/hi r1,r5 - neg r4,r4 -LOCAL(pos_result): - extu.w r5,r0 - bf LOCAL(div_le128) - cmp/eq r5,r0 - mov r4,r0 - shlr8 r0 - bf/s LOCAL(div_ge64k) - cmp/hi r0,r5 - div0u - shll16 r5 - div1 r5,r0 - div1 r5,r0 - div1 r5,r0 -LOCAL(udiv_25): - mov.l LOCAL(zero_l),r1 - div1 r5,r0 - div1 r5,r0 - mov.l r1,@-r15 - .rept 3 - div1 r5,r0 - .endr - mov.b r0,@(L_MSWLSB,r15) - xtrct r4,r0 - swap.w r0,r0 - .rept 8 - div1 r5,r0 - .endr - mov.b r0,@(L_LSWMSB,r15) -LOCAL(div_ge64k_end): - .rept 8 - div1 r5,r0 - .endr - mov.l @r15+,r4 ! zero-extension and swap using LS unit. - extu.b r0,r0 - mov.l @r15+,r5 - or r4,r0 - mov.l @r15+,r4 - rts - rotcl r0 - -LOCAL(div_le128_neg): - tst #0xfe,r0 - mova LOCAL(div_table_ix),r0 - mov.b @(r0,r5),r1 - mova LOCAL(div_table_inv),r0 - bt/s LOCAL(div_by_1_neg) - mov.l @(r0,r1),r1 - mova LOCAL(div_table_clz),r0 - dmulu.l r1,r4 - mov.b @(r0,r5),r1 - mov.l @r15+,r5 - sts mach,r0 - /* clrt */ - addc r4,r0 - mov.l @r15+,r4 - rotcr r0 - shld r1,r0 - rts - neg r0,r0 - -LOCAL(pos_divisor): - mov.l r5,@-r15 - bt/s LOCAL(pos_result) - cmp/hi r1,r5 - neg r4,r4 -LOCAL(neg_result): - extu.w r5,r0 - bf LOCAL(div_le128_neg) - cmp/eq r5,r0 - mov r4,r0 - shlr8 r0 - bf/s LOCAL(div_ge64k_neg) - cmp/hi r0,r5 - div0u - mov.l LOCAL(zero_l),r1 - shll16 r5 - div1 r5,r0 - mov.l r1,@-r15 - .rept 7 - div1 r5,r0 - .endr - mov.b r0,@(L_MSWLSB,r15) - xtrct r4,r0 - swap.w r0,r0 - .rept 8 - div1 r5,r0 - .endr - mov.b r0,@(L_LSWMSB,r15) -LOCAL(div_ge64k_neg_end): - .rept 8 - div1 r5,r0 - .endr - mov.l @r15+,r4 ! zero-extension and swap using LS unit. - extu.b r0,r1 - mov.l @r15+,r5 - or r4,r1 -LOCAL(div_r8_neg_end): - mov.l @r15+,r4 - rotcl r1 - rts - neg r1,r0 - -LOCAL(div_ge64k_neg): - bt/s LOCAL(div_r8_neg) - div0u - shll8 r5 - mov.l LOCAL(zero_l),r1 - .rept 6 - div1 r5,r0 - .endr - mov.l r1,@-r15 - div1 r5,r0 - mov.w LOCAL(m256_w),r1 - div1 r5,r0 - mov.b r0,@(L_LSWMSB,r15) - xor r4,r0 - and r1,r0 - bra LOCAL(div_ge64k_neg_end) - xor r4,r0 - -LOCAL(c128_w): - .word 128 - -LOCAL(div_r8_neg): - clrt - shll16 r4 - mov r4,r1 - shll8 r1 - mov r5,r4 - .rept 7 - rotcl r1; div1 r5,r0 - .endr - mov.l @r15+,r5 - rotcl r1 - bra LOCAL(div_r8_neg_end) - div1 r4,r0 - -LOCAL(m256_w): - .word 0xff00 -/* This table has been generated by divtab-sh4.c. */ - .balign 4 -LOCAL(div_table_clz): - .byte 0 - .byte 1 - .byte 0 - .byte -1 - .byte -1 - .byte -2 - .byte -2 - .byte -2 - .byte -2 - .byte -3 - .byte -3 - .byte -3 - .byte -3 - .byte -3 - .byte -3 - .byte -3 - .byte -3 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 -/* Lookup table translating positive divisor to index into table of - normalized inverse. N.B. the '0' entry is also the last entry of the - previous table, and causes an unaligned access for division by zero. */ -LOCAL(div_table_ix): - .byte -6 - .byte -128 - .byte -128 - .byte 0 - .byte -128 - .byte -64 - .byte 0 - .byte 64 - .byte -128 - .byte -96 - .byte -64 - .byte -32 - .byte 0 - .byte 32 - .byte 64 - .byte 96 - .byte -128 - .byte -112 - .byte -96 - .byte -80 - .byte -64 - .byte -48 - .byte -32 - .byte -16 - .byte 0 - .byte 16 - .byte 32 - .byte 48 - .byte 64 - .byte 80 - .byte 96 - .byte 112 - .byte -128 - .byte -120 - .byte -112 - .byte -104 - .byte -96 - .byte -88 - .byte -80 - .byte -72 - .byte -64 - .byte -56 - .byte -48 - .byte -40 - .byte -32 - .byte -24 - .byte -16 - .byte -8 - .byte 0 - .byte 8 - .byte 16 - .byte 24 - .byte 32 - .byte 40 - .byte 48 - .byte 56 - .byte 64 - .byte 72 - .byte 80 - .byte 88 - .byte 96 - .byte 104 - .byte 112 - .byte 120 - .byte -128 - .byte -124 - .byte -120 - .byte -116 - .byte -112 - .byte -108 - .byte -104 - .byte -100 - .byte -96 - .byte -92 - .byte -88 - .byte -84 - .byte -80 - .byte -76 - .byte -72 - .byte -68 - .byte -64 - .byte -60 - .byte -56 - .byte -52 - .byte -48 - .byte -44 - .byte -40 - .byte -36 - .byte -32 - .byte -28 - .byte -24 - .byte -20 - .byte -16 - .byte -12 - .byte -8 - .byte -4 - .byte 0 - .byte 4 - .byte 8 - .byte 12 - .byte 16 - .byte 20 - .byte 24 - .byte 28 - .byte 32 - .byte 36 - .byte 40 - .byte 44 - .byte 48 - .byte 52 - .byte 56 - .byte 60 - .byte 64 - .byte 68 - .byte 72 - .byte 76 - .byte 80 - .byte 84 - .byte 88 - .byte 92 - .byte 96 - .byte 100 - .byte 104 - .byte 108 - .byte 112 - .byte 116 - .byte 120 - .byte 124 - .byte -128 -/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */ - .balign 4 -LOCAL(zero_l): - .long 0x0 - .long 0xF81F81F9 - .long 0xF07C1F08 - .long 0xE9131AC0 - .long 0xE1E1E1E2 - .long 0xDAE6076C - .long 0xD41D41D5 - .long 0xCD856891 - .long 0xC71C71C8 - .long 0xC0E07039 - .long 0xBACF914D - .long 0xB4E81B4F - .long 0xAF286BCB - .long 0xA98EF607 - .long 0xA41A41A5 - .long 0x9EC8E952 - .long 0x9999999A - .long 0x948B0FCE - .long 0x8F9C18FA - .long 0x8ACB90F7 - .long 0x86186187 - .long 0x81818182 - .long 0x7D05F418 - .long 0x78A4C818 - .long 0x745D1746 - .long 0x702E05C1 - .long 0x6C16C16D - .long 0x68168169 - .long 0x642C8591 - .long 0x60581606 - .long 0x5C9882BA - .long 0x58ED2309 -LOCAL(div_table_inv): - .long 0x55555556 - .long 0x51D07EAF - .long 0x4E5E0A73 - .long 0x4AFD6A06 - .long 0x47AE147B - .long 0x446F8657 - .long 0x41414142 - .long 0x3E22CBCF - .long 0x3B13B13C - .long 0x38138139 - .long 0x3521CFB3 - .long 0x323E34A3 - .long 0x2F684BDB - .long 0x2C9FB4D9 - .long 0x29E4129F - .long 0x27350B89 - .long 0x24924925 - .long 0x21FB7813 - .long 0x1F7047DD - .long 0x1CF06ADB - .long 0x1A7B9612 - .long 0x18118119 - .long 0x15B1E5F8 - .long 0x135C8114 - .long 0x11111112 - .long 0xECF56BF - .long 0xC9714FC - .long 0xA6810A7 - .long 0x8421085 - .long 0x624DD30 - .long 0x4104105 - .long 0x2040811 - /* maximum error: 0.987342 scaled: 0.921875*/ - - ENDFUNC(GLOBAL(sdivsi3_i4i)) -#endif /* SH3 / SH4 */ - -#endif /* L_div_table */ - -#ifdef L_udiv_qrnnd_16 -#if !__SHMEDIA__ - HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16)) - /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ - /* n1 < d, but n1 might be larger than d1. */ - .global GLOBAL(udiv_qrnnd_16) - .balign 8 -GLOBAL(udiv_qrnnd_16): - div0u - cmp/hi r6,r0 - bt .Lots - .rept 16 - div1 r6,r0 - .endr - extu.w r0,r1 - bt 0f - add r6,r0 -0: rotcl r1 - mulu.w r1,r5 - xtrct r4,r0 - swap.w r0,r0 - sts macl,r2 - cmp/hs r2,r0 - sub r2,r0 - bt 0f - addc r5,r0 - add #-1,r1 - bt 0f -1: add #-1,r1 - rts - add r5,r0 - .balign 8 -.Lots: - sub r5,r0 - swap.w r4,r1 - xtrct r0,r1 - clrt - mov r1,r0 - addc r5,r0 - mov #-1,r1 - SL1(bf, 1b, - shlr16 r1) -0: rts - nop - ENDFUNC(GLOBAL(udiv_qrnnd_16)) -#endif /* !__SHMEDIA__ */ -#endif /* L_udiv_qrnnd_16 */ diff --git a/gcc/config/sh/lib1funcs.h b/gcc/config/sh/lib1funcs.h deleted file mode 100644 index af4b41c..0000000 --- a/gcc/config/sh/lib1funcs.h +++ /dev/null @@ -1,76 +0,0 @@ -/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2009 - Free Software Foundation, Inc. - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - -#ifdef __ELF__ -#define LOCAL(X) .L_##X -#define FUNC(X) .type X,@function -#define HIDDEN_FUNC(X) FUNC(X); .hidden X -#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X) -#define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X -#define ENDFUNC(X) ENDFUNC0(X) -#else -#define LOCAL(X) L_##X -#define FUNC(X) -#define HIDDEN_FUNC(X) -#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y) -#define ENDFUNC(X) -#endif - -#define CONCAT(A,B) A##B -#define GLOBAL0(U,X) CONCAT(U,__##X) -#define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X) - -#define ALIAS(X,Y) .global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y) - -#if defined __SH2A__ && defined __FMOVD_ENABLED__ -#undef FMOVD_WORKS -#define FMOVD_WORKS -#endif - -#ifdef __LITTLE_ENDIAN__ -#define DR00 fr1 -#define DR01 fr0 -#define DR20 fr3 -#define DR21 fr2 -#define DR40 fr5 -#define DR41 fr4 -#else /* !__LITTLE_ENDIAN__ */ -#define DR00 fr0 -#define DR01 fr1 -#define DR20 fr2 -#define DR21 fr3 -#define DR40 fr4 -#define DR41 fr5 -#endif /* !__LITTLE_ENDIAN__ */ - -#ifdef __sh1__ -#define SL(branch, dest, in_slot, in_slot_arg2) \ - in_slot, in_slot_arg2; branch dest -#define SL1(branch, dest, in_slot) \ - in_slot; branch dest -#else /* ! __sh1__ */ -#define SL(branch, dest, in_slot, in_slot_arg2) \ - branch##.s dest; in_slot, in_slot_arg2 -#define SL1(branch, dest, in_slot) \ - branch##/s dest; in_slot -#endif /* !__sh1__ */ diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h index 1e65480..cc26e05 100644 --- a/gcc/config/sh/sh.h +++ b/gcc/config/sh/sh.h @@ -1983,7 +1983,7 @@ struct sh_args { that the native compiler puts too large (> 32) immediate shift counts into a register and shifts by the register, letting the SH decide what to do instead of doing that itself. */ -/* ??? The library routines in lib1funcs.asm truncate the shift count. +/* ??? The library routines in lib1funcs.S truncate the shift count. However, the SH3 has hardware shifts that do not truncate exactly as gcc expects - the sign bit is significant - so it appears that we need to leave this zero for correct SH3 code. */ diff --git a/gcc/config/sh/t-linux b/gcc/config/sh/t-linux index a5c7116..2304fb1 100644 --- a/gcc/config/sh/t-linux +++ b/gcc/config/sh/t-linux @@ -1,5 +1,3 @@ -LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array - LIB2FUNCS_EXTRA= $(srcdir)/config/sh/linux-atomic.asm MULTILIB_DIRNAMES= diff --git a/gcc/config/sh/t-netbsd b/gcc/config/sh/t-netbsd index de172d3..dea1c47 100644 --- a/gcc/config/sh/t-netbsd +++ b/gcc/config/sh/t-netbsd @@ -17,6 +17,5 @@ # <http://www.gnu.org/licenses/>. TARGET_LIBGCC2_CFLAGS = -fpic -mieee -LIB1ASMFUNCS_CACHE = _ic_invalidate LIB2FUNCS_EXTRA= diff --git a/gcc/config/sh/t-sh b/gcc/config/sh/t-sh index 6eaf784..56ea83e 100644 --- a/gcc/config/sh/t-sh +++ b/gcc/config/sh/t-sh @@ -22,13 +22,6 @@ sh-c.o: $(srcdir)/config/sh/sh-c.c \ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/sh/sh-c.c -LIB1ASMSRC = sh/lib1funcs.asm -LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movmem \ - _movmem_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \ - _div_table _udiv_qrnnd_16 \ - $(LIB1ASMFUNCS_CACHE) -LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array - TARGET_LIBGCC2_CFLAGS = -mieee DEFAULT_ENDIAN = $(word 1,$(TM_ENDIAN_CONFIG)) diff --git a/gcc/config/sh/t-sh64 b/gcc/config/sh/t-sh64 index d88f929..3bd9205 100644 --- a/gcc/config/sh/t-sh64 +++ b/gcc/config/sh/t-sh64 @@ -1,4 +1,4 @@ -# Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc. +# Copyright (C) 2002, 2004, 2005, 2011 Free Software Foundation, Inc. # # This file is part of GCC. # @@ -16,13 +16,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMFUNCS = \ - _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \ - _shcompact_call_trampoline _shcompact_return_trampoline \ - _shcompact_incoming_args _ic_invalidate _nested_trampoline \ - _push_pop_shmedia_regs \ - _udivdi3 _divdi3 _umoddi3 _moddi3 _div_table - MULTILIB_CPU_DIRS= $(ML_sh1) $(ML_sh2e) $(ML_sh2) $(ML_sh3e) $(ML_sh3) $(ML_sh4_nofpu) $(ML_sh4_single_only) $(ML_sh4_single) $(ML_sh4) $(ML_sh5_32media:m5-32media/=media32) $(ML_sh5_32media_nofpu:m5-32media-nofpu/=nofpu/media32) $(ML_sh5_compact:m5-compact/=compact) $(ML_sh5_compact_nofpu:m5-compact-nofpu/=nofpu/compact) $(ML_sh5_64media:m5-64media/=media64) $(ML_sh5_64media_nofpu:m5-64media-nofpu/=nofpu/media64) MULTILIB_RAW_DIRNAMES= $(MULTILIB_ENDIAN:/mb= mb) $(MULTILIB_CPU_DIRS:/=) |