7 files changed, 2 insertions, 4028 deletions
diff --git a/gcc/config/sh/lib1funcs.asm b/gcc/config/sh/lib1funcs.asm
deleted file mode 100644
index 2f0ca16..0000000
--- a/gcc/config/sh/lib1funcs.asm
+++ /dev/null
@@ -1,3933 +0,0 @@
-/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005, 2006, 2009
-   Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-
-!! libgcc routines for the Renesas / SuperH SH CPUs.
-!! Contributed by Steve Chamberlain.
-!! sac@cygnus.com
-
-!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
-!! recoded in assembly by Toshiyasu Morita
-!! tm@netcom.com
-
-#if defined(__ELF__) && defined(__linux__)
-.section .note.GNU-stack,"",%progbits
-.previous
-#endif
-
-/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
-   ELF local label prefixes by J"orn Rennecke
-   amylaar@cygnus.com  */
-
-#include "lib1funcs.h"
-
-/* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
-   so it is more convenient to define NO_FPSCR_VALUES here than to
-   define it on the command line.  */
-#if defined __vxworks && defined __PIC__
-#define NO_FPSCR_VALUES
-#endif
-	
-#if ! __SH5__
-#ifdef L_ashiftrt
-	.global	GLOBAL(ashiftrt_r4_0)
-	.global	GLOBAL(ashiftrt_r4_1)
-	.global	GLOBAL(ashiftrt_r4_2)
-	.global	GLOBAL(ashiftrt_r4_3)
-	.global	GLOBAL(ashiftrt_r4_4)
-	.global	GLOBAL(ashiftrt_r4_5)
-	.global	GLOBAL(ashiftrt_r4_6)
-	.global	GLOBAL(ashiftrt_r4_7)
-	.global	GLOBAL(ashiftrt_r4_8)
-	.global	GLOBAL(ashiftrt_r4_9)
-	.global	GLOBAL(ashiftrt_r4_10)
-	.global	GLOBAL(ashiftrt_r4_11)
-	.global	GLOBAL(ashiftrt_r4_12)
-	.global	GLOBAL(ashiftrt_r4_13)
-	.global	GLOBAL(ashiftrt_r4_14)
-	.global	GLOBAL(ashiftrt_r4_15)
-	.global	GLOBAL(ashiftrt_r4_16)
-	.global	GLOBAL(ashiftrt_r4_17)
-	.global	GLOBAL(ashiftrt_r4_18)
-	.global	GLOBAL(ashiftrt_r4_19)
-	.global	GLOBAL(ashiftrt_r4_20)
-	.global	GLOBAL(ashiftrt_r4_21)
-	.global	GLOBAL(ashiftrt_r4_22)
-	.global	GLOBAL(ashiftrt_r4_23)
-	.global	GLOBAL(ashiftrt_r4_24)
-	.global	GLOBAL(ashiftrt_r4_25)
-	.global	GLOBAL(ashiftrt_r4_26)
-	.global	GLOBAL(ashiftrt_r4_27)
-	.global	GLOBAL(ashiftrt_r4_28)
-	.global	GLOBAL(ashiftrt_r4_29)
-	.global	GLOBAL(ashiftrt_r4_30)
-	.global	GLOBAL(ashiftrt_r4_31)
-	.global	GLOBAL(ashiftrt_r4_32)
-
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
-	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
-
-	.align	1
-GLOBAL(ashiftrt_r4_32):
-GLOBAL(ashiftrt_r4_31):
-	rotcl	r4
-	rts
-	subc	r4,r4
-
-GLOBAL(ashiftrt_r4_30):
-	shar	r4
-GLOBAL(ashiftrt_r4_29):
-	shar	r4
-GLOBAL(ashiftrt_r4_28):
-	shar	r4
-GLOBAL(ashiftrt_r4_27):
-	shar	r4
-GLOBAL(ashiftrt_r4_26):
-	shar	r4
-GLOBAL(ashiftrt_r4_25):
-	shar	r4
-GLOBAL(ashiftrt_r4_24):
-	shlr16	r4
-	shlr8	r4
-	rts
-	exts.b	r4,r4
-
-GLOBAL(ashiftrt_r4_23):
-	shar	r4
-GLOBAL(ashiftrt_r4_22):
-	shar	r4
-GLOBAL(ashiftrt_r4_21):
-	shar	r4
-GLOBAL(ashiftrt_r4_20):
-	shar	r4
-GLOBAL(ashiftrt_r4_19):
-	shar	r4
-GLOBAL(ashiftrt_r4_18):
-	shar	r4
-GLOBAL(ashiftrt_r4_17):
-	shar	r4
-GLOBAL(ashiftrt_r4_16):
-	shlr16	r4
-	rts
-	exts.w	r4,r4
-
-GLOBAL(ashiftrt_r4_15):
-	shar	r4
-GLOBAL(ashiftrt_r4_14):
-	shar	r4
-GLOBAL(ashiftrt_r4_13):
-	shar	r4
-GLOBAL(ashiftrt_r4_12):
-	shar	r4
-GLOBAL(ashiftrt_r4_11):
-	shar	r4
-GLOBAL(ashiftrt_r4_10):
-	shar	r4
-GLOBAL(ashiftrt_r4_9):
-	shar	r4
-GLOBAL(ashiftrt_r4_8):
-	shar	r4
-GLOBAL(ashiftrt_r4_7):
-	shar	r4
-GLOBAL(ashiftrt_r4_6):
-	shar	r4
-GLOBAL(ashiftrt_r4_5):
-	shar	r4
-GLOBAL(ashiftrt_r4_4):
-	shar	r4
-GLOBAL(ashiftrt_r4_3):
-	shar	r4
-GLOBAL(ashiftrt_r4_2):
-	shar	r4
-GLOBAL(ashiftrt_r4_1):
-	rts
-	shar	r4
-
-GLOBAL(ashiftrt_r4_0):
-	rts
-	nop
-
-	ENDFUNC(GLOBAL(ashiftrt_r4_0))
-	ENDFUNC(GLOBAL(ashiftrt_r4_1))
-	ENDFUNC(GLOBAL(ashiftrt_r4_2))
-	ENDFUNC(GLOBAL(ashiftrt_r4_3))
-	ENDFUNC(GLOBAL(ashiftrt_r4_4))
-	ENDFUNC(GLOBAL(ashiftrt_r4_5))
-	ENDFUNC(GLOBAL(ashiftrt_r4_6))
-	ENDFUNC(GLOBAL(ashiftrt_r4_7))
-	ENDFUNC(GLOBAL(ashiftrt_r4_8))
-	ENDFUNC(GLOBAL(ashiftrt_r4_9))
-	ENDFUNC(GLOBAL(ashiftrt_r4_10))
-	ENDFUNC(GLOBAL(ashiftrt_r4_11))
-	ENDFUNC(GLOBAL(ashiftrt_r4_12))
-	ENDFUNC(GLOBAL(ashiftrt_r4_13))
-	ENDFUNC(GLOBAL(ashiftrt_r4_14))
-	ENDFUNC(GLOBAL(ashiftrt_r4_15))
-	ENDFUNC(GLOBAL(ashiftrt_r4_16))
-	ENDFUNC(GLOBAL(ashiftrt_r4_17))
-	ENDFUNC(GLOBAL(ashiftrt_r4_18))
-	ENDFUNC(GLOBAL(ashiftrt_r4_19))
-	ENDFUNC(GLOBAL(ashiftrt_r4_20))
-	ENDFUNC(GLOBAL(ashiftrt_r4_21))
-	ENDFUNC(GLOBAL(ashiftrt_r4_22))
-	ENDFUNC(GLOBAL(ashiftrt_r4_23))
-	ENDFUNC(GLOBAL(ashiftrt_r4_24))
-	ENDFUNC(GLOBAL(ashiftrt_r4_25))
-	ENDFUNC(GLOBAL(ashiftrt_r4_26))
-	ENDFUNC(GLOBAL(ashiftrt_r4_27))
-	ENDFUNC(GLOBAL(ashiftrt_r4_28))
-	ENDFUNC(GLOBAL(ashiftrt_r4_29))
-	ENDFUNC(GLOBAL(ashiftrt_r4_30))
-	ENDFUNC(GLOBAL(ashiftrt_r4_31))
-	ENDFUNC(GLOBAL(ashiftrt_r4_32))
-#endif
-
-#ifdef L_ashiftrt_n
-
-!
-! GLOBAL(ashrsi3)
-!
-! Entry:
-!
-! r4: Value to shift
-! r5: Shifts
-!
-! Exit:
-!
-! r0: Result
-!
-! Destroys:
-!
-! (none)
-!
-
-	.global	GLOBAL(ashrsi3)
-	HIDDEN_FUNC(GLOBAL(ashrsi3))
-	.align	2
-GLOBAL(ashrsi3):
-	mov	#31,r0
-	and	r0,r5
-	mova	LOCAL(ashrsi3_table),r0
-	mov.b	@(r0,r5),r5
-#ifdef __sh1__
-	add	r5,r0
-	jmp	@r0
-#else
-	braf	r5
-#endif
-	mov	r4,r0
-
-	.align	2
-LOCAL(ashrsi3_table):
-	.byte		LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
-	.byte		LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
-
-LOCAL(ashrsi3_31):
-	rotcl	r0
-	rts
-	subc	r0,r0
-
-LOCAL(ashrsi3_30):
-	shar	r0
-LOCAL(ashrsi3_29):
-	shar	r0
-LOCAL(ashrsi3_28):
-	shar	r0
-LOCAL(ashrsi3_27):
-	shar	r0
-LOCAL(ashrsi3_26):
-	shar	r0
-LOCAL(ashrsi3_25):
-	shar	r0
-LOCAL(ashrsi3_24):
-	shlr16	r0
-	shlr8	r0
-	rts
-	exts.b	r0,r0
-
-LOCAL(ashrsi3_23):
-	shar	r0
-LOCAL(ashrsi3_22):
-	shar	r0
-LOCAL(ashrsi3_21):
-	shar	r0
-LOCAL(ashrsi3_20):
-	shar	r0
-LOCAL(ashrsi3_19):
-	shar	r0
-LOCAL(ashrsi3_18):
-	shar	r0
-LOCAL(ashrsi3_17):
-	shar	r0
-LOCAL(ashrsi3_16):
-	shlr16	r0
-	rts
-	exts.w	r0,r0
-
-LOCAL(ashrsi3_15):
-	shar	r0
-LOCAL(ashrsi3_14):
-	shar	r0
-LOCAL(ashrsi3_13):
-	shar	r0
-LOCAL(ashrsi3_12):
-	shar	r0
-LOCAL(ashrsi3_11):
-	shar	r0
-LOCAL(ashrsi3_10):
-	shar	r0
-LOCAL(ashrsi3_9):
-	shar	r0
-LOCAL(ashrsi3_8):
-	shar	r0
-LOCAL(ashrsi3_7):
-	shar	r0
-LOCAL(ashrsi3_6):
-	shar	r0
-LOCAL(ashrsi3_5):
-	shar	r0
-LOCAL(ashrsi3_4):
-	shar	r0
-LOCAL(ashrsi3_3):
-	shar	r0
-LOCAL(ashrsi3_2):
-	shar	r0
-LOCAL(ashrsi3_1):
-	rts
-	shar	r0
-
-LOCAL(ashrsi3_0):
-	rts
-	nop
-
-	ENDFUNC(GLOBAL(ashrsi3))
-#endif
-
-#ifdef L_ashiftlt
-
-!
-! GLOBAL(ashlsi3)
-!
-! Entry:
-!
-! r4: Value to shift
-! r5: Shifts
-!
-! Exit:
-!
-! r0: Result
-!
-! Destroys:
-!
-! (none)
-!
-	.global	GLOBAL(ashlsi3)
-	HIDDEN_FUNC(GLOBAL(ashlsi3))
-	.align	2
-GLOBAL(ashlsi3):
-	mov	#31,r0
-	and	r0,r5
-	mova	LOCAL(ashlsi3_table),r0
-	mov.b	@(r0,r5),r5
-#ifdef __sh1__
-	add	r5,r0
-	jmp	@r0
-#else
-	braf	r5
-#endif
-	mov	r4,r0
-
-	.align	2
-LOCAL(ashlsi3_table):
-	.byte		LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
-	.byte		LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
-
-LOCAL(ashlsi3_6):
-	shll2	r0
-LOCAL(ashlsi3_4):
-	shll2	r0
-LOCAL(ashlsi3_2):
-	rts
-	shll2	r0
-
-LOCAL(ashlsi3_7):
-	shll2	r0
-LOCAL(ashlsi3_5):
-	shll2	r0
-LOCAL(ashlsi3_3):
-	shll2	r0
-LOCAL(ashlsi3_1):
-	rts
-	shll	r0
-
-LOCAL(ashlsi3_14):
-	shll2	r0
-LOCAL(ashlsi3_12):
-	shll2	r0
-LOCAL(ashlsi3_10):
-	shll2	r0
-LOCAL(ashlsi3_8):
-	rts
-	shll8	r0
-
-LOCAL(ashlsi3_15):
-	shll2	r0
-LOCAL(ashlsi3_13):
-	shll2	r0
-LOCAL(ashlsi3_11):
-	shll2	r0
-LOCAL(ashlsi3_9):
-	shll8	r0
-	rts
-	shll	r0
-
-LOCAL(ashlsi3_22):
-	shll2	r0
-LOCAL(ashlsi3_20):
-	shll2	r0
-LOCAL(ashlsi3_18):
-	shll2	r0
-LOCAL(ashlsi3_16):
-	rts
-	shll16	r0
-
-LOCAL(ashlsi3_23):
-	shll2	r0
-LOCAL(ashlsi3_21):
-	shll2	r0
-LOCAL(ashlsi3_19):
-	shll2	r0
-LOCAL(ashlsi3_17):
-	shll16	r0
-	rts
-	shll	r0
-
-LOCAL(ashlsi3_30):
-	shll2	r0
-LOCAL(ashlsi3_28):
-	shll2	r0
-LOCAL(ashlsi3_26):
-	shll2	r0
-LOCAL(ashlsi3_24):
-	shll16	r0
-	rts
-	shll8	r0
-
-LOCAL(ashlsi3_31):
-	shll2	r0
-LOCAL(ashlsi3_29):
-	shll2	r0
-LOCAL(ashlsi3_27):
-	shll2	r0
-LOCAL(ashlsi3_25):
-	shll16	r0
-	shll8	r0
-	rts
-	shll	r0
-
-LOCAL(ashlsi3_0):
-	rts
-	nop
-
-	ENDFUNC(GLOBAL(ashlsi3))
-#endif
-
-#ifdef L_lshiftrt
-
-!
-! GLOBAL(lshrsi3)
-!
-! Entry:
-!
-! r4: Value to shift
-! r5: Shifts
-!
-! Exit:
-!
-! r0: Result
-!
-! Destroys:
-!
-! (none)
-!
-	.global	GLOBAL(lshrsi3)
-	HIDDEN_FUNC(GLOBAL(lshrsi3))
-	.align	2
-GLOBAL(lshrsi3):
-	mov	#31,r0
-	and	r0,r5
-	mova	LOCAL(lshrsi3_table),r0
-	mov.b	@(r0,r5),r5
-#ifdef __sh1__
-	add	r5,r0
-	jmp	@r0
-#else
-	braf	r5
-#endif
-	mov	r4,r0
-
-	.align	2
-LOCAL(lshrsi3_table):
-	.byte		LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
-	.byte		LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
-
-LOCAL(lshrsi3_6):
-	shlr2	r0
-LOCAL(lshrsi3_4):
-	shlr2	r0
-LOCAL(lshrsi3_2):
-	rts
-	shlr2	r0
-
-LOCAL(lshrsi3_7):
-	shlr2	r0
-LOCAL(lshrsi3_5):
-	shlr2	r0
-LOCAL(lshrsi3_3):
-	shlr2	r0
-LOCAL(lshrsi3_1):
-	rts
-	shlr	r0
-
-LOCAL(lshrsi3_14):
-	shlr2	r0
-LOCAL(lshrsi3_12):
-	shlr2	r0
-LOCAL(lshrsi3_10):
-	shlr2	r0
-LOCAL(lshrsi3_8):
-	rts
-	shlr8	r0
-
-LOCAL(lshrsi3_15):
-	shlr2	r0
-LOCAL(lshrsi3_13):
-	shlr2	r0
-LOCAL(lshrsi3_11):
-	shlr2	r0
-LOCAL(lshrsi3_9):
-	shlr8	r0
-	rts
-	shlr	r0
-
-LOCAL(lshrsi3_22):
-	shlr2	r0
-LOCAL(lshrsi3_20):
-	shlr2	r0
-LOCAL(lshrsi3_18):
-	shlr2	r0
-LOCAL(lshrsi3_16):
-	rts
-	shlr16	r0
-
-LOCAL(lshrsi3_23):
-	shlr2	r0
-LOCAL(lshrsi3_21):
-	shlr2	r0
-LOCAL(lshrsi3_19):
-	shlr2	r0
-LOCAL(lshrsi3_17):
-	shlr16	r0
-	rts
-	shlr	r0
-
-LOCAL(lshrsi3_30):
-	shlr2	r0
-LOCAL(lshrsi3_28):
-	shlr2	r0
-LOCAL(lshrsi3_26):
-	shlr2	r0
-LOCAL(lshrsi3_24):
-	shlr16	r0
-	rts
-	shlr8	r0
-
-LOCAL(lshrsi3_31):
-	shlr2	r0
-LOCAL(lshrsi3_29):
-	shlr2	r0
-LOCAL(lshrsi3_27):
-	shlr2	r0
-LOCAL(lshrsi3_25):
-	shlr16	r0
-	shlr8	r0
-	rts
-	shlr	r0
-
-LOCAL(lshrsi3_0):
-	rts
-	nop
-
-	ENDFUNC(GLOBAL(lshrsi3))
-#endif
-
-#ifdef L_movmem
-	.text
-	.balign	4
-	.global	GLOBAL(movmem)
-	HIDDEN_FUNC(GLOBAL(movmem))
-	HIDDEN_ALIAS(movstr,movmem)
-	/* This would be a lot simpler if r6 contained the byte count
-	   minus 64, and we wouldn't be called here for a byte count of 64.  */
-GLOBAL(movmem):
-	sts.l	pr,@-r15
-	shll2	r6
-	bsr	GLOBAL(movmemSI52+2)
-	mov.l	@(48,r5),r0
-	.balign	4
-LOCAL(movmem_loop): /* Reached with rts */
-	mov.l	@(60,r5),r0
-	add	#-64,r6
-	mov.l	r0,@(60,r4)
-	tst	r6,r6
-	mov.l	@(56,r5),r0
-	bt	LOCAL(movmem_done)
-	mov.l	r0,@(56,r4)
-	cmp/pl	r6
-	mov.l	@(52,r5),r0
-	add	#64,r5
-	mov.l	r0,@(52,r4)
-	add	#64,r4
-	bt	GLOBAL(movmemSI52)
-! done all the large groups, do the remainder
-! jump to movmem+
-	mova	GLOBAL(movmemSI4)+4,r0
-	add	r6,r0
-	jmp	@r0
-LOCAL(movmem_done): ! share slot insn, works out aligned.
-	lds.l	@r15+,pr
-	mov.l	r0,@(56,r4)
-	mov.l	@(52,r5),r0
-	rts
-	mov.l	r0,@(52,r4)
-	.balign	4
-! ??? We need aliases movstr* for movmem* for the older libraries.  These
-! aliases will be removed at the some point in the future.
-	.global	GLOBAL(movmemSI64)
-	HIDDEN_FUNC(GLOBAL(movmemSI64))
-	HIDDEN_ALIAS(movstrSI64,movmemSI64)
-GLOBAL(movmemSI64):
-	mov.l	@(60,r5),r0
-	mov.l	r0,@(60,r4)
-	.global	GLOBAL(movmemSI60)
-	HIDDEN_FUNC(GLOBAL(movmemSI60))
-	HIDDEN_ALIAS(movstrSI60,movmemSI60)
-GLOBAL(movmemSI60):
-	mov.l	@(56,r5),r0
-	mov.l	r0,@(56,r4)
-	.global	GLOBAL(movmemSI56)
-	HIDDEN_FUNC(GLOBAL(movmemSI56))
-	HIDDEN_ALIAS(movstrSI56,movmemSI56)
-GLOBAL(movmemSI56):
-	mov.l	@(52,r5),r0
-	mov.l	r0,@(52,r4)
-	.global	GLOBAL(movmemSI52)
-	HIDDEN_FUNC(GLOBAL(movmemSI52))
-	HIDDEN_ALIAS(movstrSI52,movmemSI52)
-GLOBAL(movmemSI52):
-	mov.l	@(48,r5),r0
-	mov.l	r0,@(48,r4)
-	.global	GLOBAL(movmemSI48)
-	HIDDEN_FUNC(GLOBAL(movmemSI48))
-	HIDDEN_ALIAS(movstrSI48,movmemSI48)
-GLOBAL(movmemSI48):
-	mov.l	@(44,r5),r0
-	mov.l	r0,@(44,r4)
-	.global	GLOBAL(movmemSI44)
-	HIDDEN_FUNC(GLOBAL(movmemSI44))
-	HIDDEN_ALIAS(movstrSI44,movmemSI44)
-GLOBAL(movmemSI44):
-	mov.l	@(40,r5),r0
-	mov.l	r0,@(40,r4)
-	.global	GLOBAL(movmemSI40)
-	HIDDEN_FUNC(GLOBAL(movmemSI40))
-	HIDDEN_ALIAS(movstrSI40,movmemSI40)
-GLOBAL(movmemSI40):
-	mov.l	@(36,r5),r0
-	mov.l	r0,@(36,r4)
-	.global	GLOBAL(movmemSI36)
-	HIDDEN_FUNC(GLOBAL(movmemSI36))
-	HIDDEN_ALIAS(movstrSI36,movmemSI36)
-GLOBAL(movmemSI36):
-	mov.l	@(32,r5),r0
-	mov.l	r0,@(32,r4)
-	.global	GLOBAL(movmemSI32)
-	HIDDEN_FUNC(GLOBAL(movmemSI32))
-	HIDDEN_ALIAS(movstrSI32,movmemSI32)
-GLOBAL(movmemSI32):
-	mov.l	@(28,r5),r0
-	mov.l	r0,@(28,r4)
-	.global	GLOBAL(movmemSI28)
-	HIDDEN_FUNC(GLOBAL(movmemSI28))
-	HIDDEN_ALIAS(movstrSI28,movmemSI28)
-GLOBAL(movmemSI28):
-	mov.l	@(24,r5),r0
-	mov.l	r0,@(24,r4)
-	.global	GLOBAL(movmemSI24)
-	HIDDEN_FUNC(GLOBAL(movmemSI24))
-	HIDDEN_ALIAS(movstrSI24,movmemSI24)
-GLOBAL(movmemSI24):
-	mov.l	@(20,r5),r0
-	mov.l	r0,@(20,r4)
-	.global	GLOBAL(movmemSI20)
-	HIDDEN_FUNC(GLOBAL(movmemSI20))
-	HIDDEN_ALIAS(movstrSI20,movmemSI20)
-GLOBAL(movmemSI20):
-	mov.l	@(16,r5),r0
-	mov.l	r0,@(16,r4)
-	.global	GLOBAL(movmemSI16)
-	HIDDEN_FUNC(GLOBAL(movmemSI16))
-	HIDDEN_ALIAS(movstrSI16,movmemSI16)
-GLOBAL(movmemSI16):
-	mov.l	@(12,r5),r0
-	mov.l	r0,@(12,r4)
-	.global	GLOBAL(movmemSI12)
-	HIDDEN_FUNC(GLOBAL(movmemSI12))
-	HIDDEN_ALIAS(movstrSI12,movmemSI12)
-GLOBAL(movmemSI12):
-	mov.l	@(8,r5),r0
-	mov.l	r0,@(8,r4)
-	.global	GLOBAL(movmemSI8)
-	HIDDEN_FUNC(GLOBAL(movmemSI8))
-	HIDDEN_ALIAS(movstrSI8,movmemSI8)
-GLOBAL(movmemSI8):
-	mov.l	@(4,r5),r0
-	mov.l	r0,@(4,r4)
-	.global	GLOBAL(movmemSI4)
-	HIDDEN_FUNC(GLOBAL(movmemSI4))
-	HIDDEN_ALIAS(movstrSI4,movmemSI4)
-GLOBAL(movmemSI4):
-	mov.l	@(0,r5),r0
-	rts
-	mov.l	r0,@(0,r4)
-
-	ENDFUNC(GLOBAL(movmemSI64))
-	ENDFUNC(GLOBAL(movmemSI60))
-	ENDFUNC(GLOBAL(movmemSI56))
-	ENDFUNC(GLOBAL(movmemSI52))
-	ENDFUNC(GLOBAL(movmemSI48))
-	ENDFUNC(GLOBAL(movmemSI44))
-	ENDFUNC(GLOBAL(movmemSI40))
-	ENDFUNC(GLOBAL(movmemSI36))
-	ENDFUNC(GLOBAL(movmemSI32))
-	ENDFUNC(GLOBAL(movmemSI28))
-	ENDFUNC(GLOBAL(movmemSI24))
-	ENDFUNC(GLOBAL(movmemSI20))
-	ENDFUNC(GLOBAL(movmemSI16))
-	ENDFUNC(GLOBAL(movmemSI12))
-	ENDFUNC(GLOBAL(movmemSI8))
-	ENDFUNC(GLOBAL(movmemSI4))
-	ENDFUNC(GLOBAL(movmem))
-#endif
-
-#ifdef L_movmem_i4
-	.text
-	.global	GLOBAL(movmem_i4_even)
-	.global	GLOBAL(movmem_i4_odd)
-	.global	GLOBAL(movmemSI12_i4)
-
-	HIDDEN_FUNC(GLOBAL(movmem_i4_even))
-	HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
-	HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
-
-	HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
-	HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
-	HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
-
-	.p2align	5
-L_movmem_2mod4_end:
-	mov.l	r0,@(16,r4)
-	rts
-	mov.l	r1,@(20,r4)
-
-	.p2align	2
-
-GLOBAL(movmem_i4_even):
-	mov.l	@r5+,r0
-	bra	L_movmem_start_even
-	mov.l	@r5+,r1
-
-GLOBAL(movmem_i4_odd):
-	mov.l	@r5+,r1
-	add	#-4,r4
-	mov.l	@r5+,r2
-	mov.l	@r5+,r3
-	mov.l	r1,@(4,r4)
-	mov.l	r2,@(8,r4)
-
-L_movmem_loop:
-	mov.l	r3,@(12,r4)
-	dt	r6
-	mov.l	@r5+,r0
-	bt/s	L_movmem_2mod4_end
-	mov.l	@r5+,r1
-	add	#16,r4
-L_movmem_start_even:
-	mov.l	@r5+,r2
-	mov.l	@r5+,r3
-	mov.l	r0,@r4
-	dt	r6
-	mov.l	r1,@(4,r4)
-	bf/s	L_movmem_loop
-	mov.l	r2,@(8,r4)
-	rts
-	mov.l	r3,@(12,r4)
-
-	ENDFUNC(GLOBAL(movmem_i4_even))
-	ENDFUNC(GLOBAL(movmem_i4_odd))
-
-	.p2align	4
-GLOBAL(movmemSI12_i4):
-	mov.l	@r5,r0
-	mov.l	@(4,r5),r1
-	mov.l	@(8,r5),r2
-	mov.l	r0,@r4
-	mov.l	r1,@(4,r4)
-	rts
-	mov.l	r2,@(8,r4)
-
-	ENDFUNC(GLOBAL(movmemSI12_i4))
-#endif
-
-#ifdef L_mulsi3
-
-
-	.global	GLOBAL(mulsi3)
-	HIDDEN_FUNC(GLOBAL(mulsi3))
-
-! r4 =       aabb
-! r5 =       ccdd
-! r0 = aabb*ccdd  via partial products
-!
-! if aa == 0 and cc = 0
-! r0 = bb*dd
-!
-! else
-! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
-!
-
-GLOBAL(mulsi3):
-	mulu.w  r4,r5		! multiply the lsws  macl=bb*dd
-	mov     r5,r3		! r3 = ccdd
-	swap.w  r4,r2		! r2 = bbaa
-	xtrct   r2,r3		! r3 = aacc
-	tst  	r3,r3		! msws zero ?
-	bf      hiset
-	rts			! yes - then we have the answer
-	sts     macl,r0
-
-hiset:	sts	macl,r0		! r0 = bb*dd
-	mulu.w	r2,r5		! brewing macl = aa*dd
-	sts	macl,r1
-	mulu.w	r3,r4		! brewing macl = cc*bb
-	sts	macl,r2
-	add	r1,r2
-	shll16	r2
-	rts
-	add	r2,r0
-
-	ENDFUNC(GLOBAL(mulsi3))
-#endif
-#endif /* ! __SH5__ */
-#ifdef L_sdivsi3_i4
-	.title "SH DIVIDE"
-!! 4 byte integer Divide code for the Renesas SH
-#ifdef __SH4__
-!! args in r4 and r5, result in fpul, clobber dr0, dr2
-
-	.global	GLOBAL(sdivsi3_i4)
-	HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
-GLOBAL(sdivsi3_i4):
-	lds r4,fpul
-	float fpul,dr0
-	lds r5,fpul
-	float fpul,dr2
-	fdiv dr2,dr0
-	rts
-	ftrc dr0,fpul
-
-	ENDFUNC(GLOBAL(sdivsi3_i4))
-#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
-!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
-
-#if ! __SH5__ || __SH5__ == 32
-#if __SH5__
-	.mode	SHcompact
-#endif
-	.global	GLOBAL(sdivsi3_i4)
-	HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
-GLOBAL(sdivsi3_i4):
-	sts.l fpscr,@-r15
-	mov #8,r2
-	swap.w r2,r2
-	lds r2,fpscr
-	lds r4,fpul
-	float fpul,dr0
-	lds r5,fpul
-	float fpul,dr2
-	fdiv dr2,dr0
-	ftrc dr0,fpul
-	rts
-	lds.l @r15+,fpscr
-
-	ENDFUNC(GLOBAL(sdivsi3_i4))
-#endif /* ! __SH5__ || __SH5__ == 32 */
-#endif /* ! __SH4__ */
-#endif
-
-#ifdef L_sdivsi3
-/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
-   sh2e/sh3e code.  */
-#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
-!!
-!! Steve Chamberlain
-!! sac@cygnus.com
-!!
-!!
-
-!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
-
-	.global	GLOBAL(sdivsi3)
-#if __SHMEDIA__
-#if __SH5__ == 32
-	.section	.text..SHmedia32,"ax"
-#else
-	.text
-#endif
-	.align	2
-#if 0
-/* The assembly code that follows is a hand-optimized version of the C
-   code that follows.  Note that the registers that are modified are
-   exactly those listed as clobbered in the patterns divsi3_i1 and
-   divsi3_i1_media.
-	
-int __sdivsi3 (i, j)
-     int i, j;
-{
-  register unsigned long long r18 asm ("r18");
-  register unsigned long long r19 asm ("r19");
-  register unsigned long long r0 asm ("r0") = 0;
-  register unsigned long long r1 asm ("r1") = 1;
-  register int r2 asm ("r2") = i >> 31;
-  register int r3 asm ("r3") = j >> 31;
-
-  r2 = r2 ? r2 : r1;
-  r3 = r3 ? r3 : r1;
-  r18 = i * r2;
-  r19 = j * r3;
-  r2 *= r3;
-  
-  r19 <<= 31;
-  r1 <<= 31;
-  do
-    if (r18 >= r19)
-      r0 |= r1, r18 -= r19;
-  while (r19 >>= 1, r1 >>= 1);
-
-  return r2 * (int)r0;
-}
-*/
-GLOBAL(sdivsi3):
-	pt/l	LOCAL(sdivsi3_dontadd), tr2
-	pt/l	LOCAL(sdivsi3_loop), tr1
-	ptabs/l	r18, tr0
-	movi	0, r0
-	movi	1, r1
-	shari.l	r4, 31, r2
-	shari.l	r5, 31, r3
-	cmveq	r2, r1, r2
-	cmveq	r3, r1, r3
-	muls.l	r4, r2, r18
-	muls.l	r5, r3, r19
-	muls.l	r2, r3, r2
-	shlli	r19, 31, r19
-	shlli	r1, 31, r1
-LOCAL(sdivsi3_loop):
-	bgtu	r19, r18, tr2
-	or	r0, r1, r0
-	sub	r18, r19, r18
-LOCAL(sdivsi3_dontadd):
-	shlri	r1, 1, r1
-	shlri	r19, 1, r19
-	bnei	r1, 0, tr1
-	muls.l	r0, r2, r0
-	add.l	r0, r63, r0
-	blink	tr0, r63
-#elif 0 /* ! 0 */
- // inputs: r4,r5
- // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
- // result in r0
-GLOBAL(sdivsi3):
- // can create absolute value without extra latency,
- // but dependent on proper sign extension of inputs:
- // shari.l r5,31,r2
- // xor r5,r2,r20
- // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
- shari.l r5,31,r2
- ori r2,1,r2
- muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
- movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
- shari.l r4,31,r3
- nsb r20,r0
- shlld r20,r0,r25
- shlri r25,48,r25
- sub r19,r25,r1
- mmulfx.w r1,r1,r2
- mshflo.w r1,r63,r1
- // If r4 was to be used in-place instead of r21, could use this sequence
- // to compute absolute:
- // sub r63,r4,r19 // compute absolute value of r4
- // shlri r4,32,r3 // into lower 32 bit of r4, keeping
- // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
- ori r3,1,r3
- mmulfx.w r25,r2,r2
- sub r19,r0,r0
- muls.l r4,r3,r21
- msub.w r1,r2,r2
- addi r2,-2,r1
- mulu.l r21,r1,r19
- mmulfx.w r2,r2,r2
- shlli r1,15,r1
- shlrd r19,r0,r19
- mulu.l r19,r20,r3
- mmacnfx.wl r25,r2,r1
- ptabs r18,tr0
- sub r21,r3,r25
-
- mulu.l r25,r1,r2
- addi r0,14,r0
- xor r4,r5,r18
- shlrd r2,r0,r2
- mulu.l r2,r20,r3
- add r19,r2,r19
- shari.l r18,31,r18
- sub r25,r3,r25
-
- mulu.l r25,r1,r2
- sub r25,r20,r25
- add r19,r18,r19
- shlrd r2,r0,r2
- mulu.l r2,r20,r3
- addi r25,1,r25
- add r19,r2,r19
-
- cmpgt r25,r3,r25
- add.l r19,r25,r0
- xor r0,r18,r0
- blink tr0,r63
-#else /* ! 0 && ! 0 */
-
- // inputs: r4,r5
- // clobbered: r1,r18,r19,r20,r21,r25,tr0
- // result in r0
-	HIDDEN_FUNC(GLOBAL(sdivsi3_2))
-#ifndef __pic__
-	FUNC(GLOBAL(sdivsi3))
-GLOBAL(sdivsi3): /* this is the shcompact entry point */
- // The special SHmedia entry point sdivsi3_1 prevents accidental linking
- // with the SHcompact implementation, which clobbers tr1 / tr2.
- .global GLOBAL(sdivsi3_1)
-GLOBAL(sdivsi3_1):
- .global GLOBAL(div_table_internal)
- movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
- shori GLOBAL(div_table_internal) & 65535, r20
-#endif
- .global GLOBAL(sdivsi3_2)
- // div_table in r20
- // clobbered: r1,r18,r19,r21,r25,tr0
-GLOBAL(sdivsi3_2):
- nsb r5, r1
- shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
- shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
- ldx.ub r20, r21, r19 // u0.8
- shari r25, 32, r25   // normalize to s2.30
- shlli r21, 1, r21
- muls.l r25, r19, r19 // s2.38
- ldx.w r20, r21, r21  // s2.14
-  ptabs r18, tr0
- shari r19, 24, r19   // truncate to s2.14
- sub r21, r19, r19    // some 11 bit inverse in s1.14
- muls.l r19, r19, r21 // u0.28
-  sub r63, r1, r1
-  addi r1, 92, r1
- muls.l r25, r21, r18 // s2.58
- shlli r19, 45, r19   // multiply by two and convert to s2.58
-  /* bubble */
- sub r19, r18, r18
- shari r18, 28, r18   // some 22 bit inverse in s1.30
- muls.l r18, r25, r0  // s2.60
-  muls.l r18, r4, r25 // s32.30
-  /* bubble */
- shari r0, 16, r19   // s-16.44
- muls.l r19, r18, r19 // s-16.74
-  shari r25, 63, r0
-  shari r4, 14, r18   // s19.-14
- shari r19, 30, r19   // s-16.44
- muls.l r19, r18, r19 // s15.30
-  xor r21, r0, r21    // You could also use the constant 1 << 27.
-  add r21, r25, r21
- sub r21, r19, r21
- shard r21, r1, r21
- sub r21, r0, r0
- blink tr0, r63
-#ifndef __pic__
-	ENDFUNC(GLOBAL(sdivsi3))
-#endif
-	ENDFUNC(GLOBAL(sdivsi3_2))
-#endif
-#elif defined __SHMEDIA__
-/* m5compact-nofpu */
- // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
-	.mode	SHmedia
-	.section	.text..SHmedia32,"ax"
-	.align	2
-	FUNC(GLOBAL(sdivsi3))
-GLOBAL(sdivsi3):
-	pt/l LOCAL(sdivsi3_dontsub), tr0
-	pt/l LOCAL(sdivsi3_loop), tr1
-	ptabs/l r18,tr2
-	shari.l r4,31,r18
-	shari.l r5,31,r19
-	xor r4,r18,r20
-	xor r5,r19,r21
-	sub.l r20,r18,r20
-	sub.l r21,r19,r21
-	xor r18,r19,r19
-	shlli r21,32,r25
-	addi r25,-1,r21
-	addz.l r20,r63,r20
-LOCAL(sdivsi3_loop):
-	shlli r20,1,r20
-	bgeu/u r21,r20,tr0
-	sub r20,r21,r20
-LOCAL(sdivsi3_dontsub):
-	addi.l r25,-1,r25
-	bnei r25,-32,tr1
-	xor r20,r19,r20
-	sub.l r20,r19,r0
-	blink tr2,r63
-	ENDFUNC(GLOBAL(sdivsi3))
-#else /* ! __SHMEDIA__ */
-	FUNC(GLOBAL(sdivsi3))
-GLOBAL(sdivsi3):
-	mov	r4,r1
-	mov	r5,r0
-
-	tst	r0,r0
-	bt	div0
-	mov	#0,r2
-	div0s	r2,r1
-	subc	r3,r3
-	subc	r2,r1
-	div0s	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	div1	r0,r3
-	rotcl	r1
-	addc	r2,r1
-	rts
-	mov	r1,r0
-
-
-div0:	rts
-	mov	#0,r0
-
-	ENDFUNC(GLOBAL(sdivsi3))
-#endif /* ! __SHMEDIA__ */
-#endif /* ! __SH4__ */
-#endif
-#ifdef L_udivsi3_i4
-
-	.title "SH DIVIDE"
-!! 4 byte integer Divide code for the Renesas SH
-#ifdef __SH4__
-!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
-!! and t bit
-
-	.global	GLOBAL(udivsi3_i4)
-	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
-GLOBAL(udivsi3_i4):
-	mov #1,r1
-	cmp/hi r1,r5
-	bf trivial
-	rotr r1
-	xor r1,r4
-	lds r4,fpul
-	mova L1,r0
-#ifdef FMOVD_WORKS
-	fmov.d @r0+,dr4
-#else
-	fmov.s @r0+,DR40
-	fmov.s @r0,DR41
-#endif
-	float fpul,dr0
-	xor r1,r5
-	lds r5,fpul
-	float fpul,dr2
-	fadd dr4,dr0
-	fadd dr4,dr2
-	fdiv dr2,dr0
-	rts
-	ftrc dr0,fpul
-
-trivial:
-	rts
-	lds r4,fpul
-
-	.align 2
-#ifdef FMOVD_WORKS
-	.align 3	! make double below 8 byte aligned.
-#endif
-L1:
-	.double 2147483648
-
-	ENDFUNC(GLOBAL(udivsi3_i4))
-#elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
-#if ! __SH5__ || __SH5__ == 32
-!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
-	.mode	SHmedia
-	.global	GLOBAL(udivsi3_i4)
-	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
-GLOBAL(udivsi3_i4):
-	addz.l	r4,r63,r20
-	addz.l	r5,r63,r21
-	fmov.qd	r20,dr0
-	fmov.qd	r21,dr32
-	ptabs	r18,tr0
-	float.qd dr0,dr0
-	float.qd dr32,dr32
-	fdiv.d	dr0,dr32,dr0
-	ftrc.dq dr0,dr32
-	fmov.s fr33,fr32
-	blink tr0,r63
-
-	ENDFUNC(GLOBAL(udivsi3_i4))
-#endif /* ! __SH5__ || __SH5__ == 32 */
-#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
-!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
-
-	.global	GLOBAL(udivsi3_i4)
-	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
-GLOBAL(udivsi3_i4):
-	mov #1,r1
-	cmp/hi r1,r5
-	bf trivial
-	sts.l fpscr,@-r15
-	mova L1,r0
-	lds.l @r0+,fpscr
-	rotr r1
-	xor r1,r4
-	lds r4,fpul
-#ifdef FMOVD_WORKS
-	fmov.d @r0+,dr4
-#else
-	fmov.s @r0+,DR40
-	fmov.s @r0,DR41
-#endif
-	float fpul,dr0
-	xor r1,r5
-	lds r5,fpul
-	float fpul,dr2
-	fadd dr4,dr0
-	fadd dr4,dr2
-	fdiv dr2,dr0
-	ftrc dr0,fpul
-	rts
-	lds.l @r15+,fpscr
-
-#ifdef FMOVD_WORKS
-	.align 3	! make double below 8 byte aligned.
-#endif
-trivial:
-	rts
-	lds r4,fpul
-
-	.align 2
-L1:
-#ifndef FMOVD_WORKS
-	.long 0x80000
-#else
-	.long 0x180000
-#endif
-	.double 2147483648
-
-	ENDFUNC(GLOBAL(udivsi3_i4))
-#endif /* ! __SH4__ */
-#endif
-
-#ifdef L_udivsi3
-/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
-   sh2e/sh3e code.  */
-#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
-
-!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
-	.global	GLOBAL(udivsi3)
-	HIDDEN_FUNC(GLOBAL(udivsi3))
-
-#if __SHMEDIA__
-#if __SH5__ == 32
-	.section	.text..SHmedia32,"ax"
-#else
-	.text
-#endif
-	.align	2
-#if 0
-/* The assembly code that follows is a hand-optimized version of the C
-   code that follows.  Note that the registers that are modified are
-   exactly those listed as clobbered in the patterns udivsi3_i1 and
-   udivsi3_i1_media.
-	
-unsigned 
-__udivsi3 (i, j)
-    unsigned i, j; 
-{
-  register unsigned long long r0 asm ("r0") = 0;
-  register unsigned long long r18 asm ("r18") = 1;
-  register unsigned long long r4 asm ("r4") = i;
-  register unsigned long long r19 asm ("r19") = j;
-
-  r19 <<= 31;
-  r18 <<= 31;
-  do
-    if (r4 >= r19)
-      r0 |= r18, r4 -= r19;
-  while (r19 >>= 1, r18 >>= 1);
-
-  return r0;
-}
-*/
-GLOBAL(udivsi3):
-	pt/l	LOCAL(udivsi3_dontadd), tr2
-	pt/l	LOCAL(udivsi3_loop), tr1
-	ptabs/l	r18, tr0
-	movi	0, r0
-	movi	1, r18
-	addz.l	r5, r63, r19
-	addz.l	r4, r63, r4
-	shlli	r19, 31, r19
-	shlli	r18, 31, r18
-LOCAL(udivsi3_loop):
-	bgtu	r19, r4, tr2
-	or	r0, r18, r0
-	sub	r4, r19, r4
-LOCAL(udivsi3_dontadd):
-	shlri	r18, 1, r18
-	shlri	r19, 1, r19
-	bnei	r18, 0, tr1
-	blink	tr0, r63
-#else
-GLOBAL(udivsi3):
- // inputs: r4,r5
- // clobbered: r18,r19,r20,r21,r22,r25,tr0
- // result in r0.
- addz.l r5,r63,r22
- nsb r22,r0
- shlld r22,r0,r25
- shlri r25,48,r25
- movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
- sub r20,r25,r21
- mmulfx.w r21,r21,r19
- mshflo.w r21,r63,r21
- ptabs r18,tr0
- mmulfx.w r25,r19,r19
- sub r20,r0,r0
- /* bubble */
- msub.w r21,r19,r19
- addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
-		    before the msub.w, but we need a different value for
-		    r19 to keep errors under control.  */
- mulu.l r4,r21,r18
- mmulfx.w r19,r19,r19
- shlli r21,15,r21
- shlrd r18,r0,r18
- mulu.l r18,r22,r20
- mmacnfx.wl r25,r19,r21
- /* bubble */
- sub r4,r20,r25
-
- mulu.l r25,r21,r19
- addi r0,14,r0
- /* bubble */
- shlrd r19,r0,r19
- mulu.l r19,r22,r20
- add r18,r19,r18
- /* bubble */
- sub.l r25,r20,r25
-
- mulu.l r25,r21,r19
- addz.l r25,r63,r25
- sub r25,r22,r25
- shlrd r19,r0,r19
- mulu.l r19,r22,r20
- addi r25,1,r25
- add r18,r19,r18
-
- cmpgt r25,r20,r25
- add.l r18,r25,r0
- blink tr0,r63
-#endif
-#elif defined (__SHMEDIA__)
-/* m5compact-nofpu - more emphasis on code size than on speed, but don't
-   ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
-   So use a short shmedia loop.  */
- // clobbered: r20,r21,r25,tr0,tr1,tr2
-	.mode	SHmedia
-	.section	.text..SHmedia32,"ax"
-	.align	2
-GLOBAL(udivsi3):
- pt/l LOCAL(udivsi3_dontsub), tr0
- pt/l LOCAL(udivsi3_loop), tr1
- ptabs/l r18,tr2
- shlli r5,32,r25
- addi r25,-1,r21
- addz.l r4,r63,r20
-LOCAL(udivsi3_loop):
- shlli r20,1,r20
- bgeu/u r21,r20,tr0
- sub r20,r21,r20
-LOCAL(udivsi3_dontsub):
- addi.l r25,-1,r25
- bnei r25,-32,tr1
- add.l r20,r63,r0
- blink tr2,r63
-#else /* ! defined (__SHMEDIA__) */
-LOCAL(div8):
- div1 r5,r4
-LOCAL(div7):
- div1 r5,r4; div1 r5,r4; div1 r5,r4
- div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
-
-LOCAL(divx4):
- div1 r5,r4; rotcl r0
- div1 r5,r4; rotcl r0
- div1 r5,r4; rotcl r0
- rts; div1 r5,r4
-
-GLOBAL(udivsi3):
- sts.l pr,@-r15
- extu.w r5,r0
- cmp/eq r5,r0
-#ifdef __sh1__
- bf LOCAL(large_divisor)
-#else
- bf/s LOCAL(large_divisor)
-#endif
- div0u
- swap.w r4,r0
- shlr16 r4
- bsr LOCAL(div8)
- shll16 r5
- bsr LOCAL(div7)
- div1 r5,r4
- xtrct r4,r0
- xtrct r0,r4
- bsr LOCAL(div8)
- swap.w r4,r4
- bsr LOCAL(div7)
- div1 r5,r4
- lds.l @r15+,pr
- xtrct r4,r0
- swap.w r0,r0
- rotcl r0
- rts
- shlr16 r5
-
-LOCAL(large_divisor):
-#ifdef __sh1__
- div0u
-#endif
- mov #0,r0
- xtrct r4,r0
- xtrct r0,r4
- bsr LOCAL(divx4)
- rotcl r0
- bsr LOCAL(divx4)
- rotcl r0
- bsr LOCAL(divx4)
- rotcl r0
- bsr LOCAL(divx4)
- rotcl r0
- lds.l @r15+,pr
- rts
- rotcl r0
-
-	ENDFUNC(GLOBAL(udivsi3))
-#endif /* ! __SHMEDIA__ */
-#endif /* __SH4__ */
-#endif /* L_udivsi3 */
-
-#ifdef L_udivdi3
-#ifdef __SHMEDIA__
-	.mode	SHmedia
-	.section	.text..SHmedia32,"ax"
-	.align	2
-	.global	GLOBAL(udivdi3)
-	FUNC(GLOBAL(udivdi3))
-GLOBAL(udivdi3):
-	HIDDEN_ALIAS(udivdi3_internal,udivdi3)
-	shlri r3,1,r4
-	nsb r4,r22
-	shlld r3,r22,r6
-	shlri r6,49,r5
-	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
-	sub r21,r5,r1
-	mmulfx.w r1,r1,r4
-	mshflo.w r1,r63,r1
-	sub r63,r22,r20 // r63 == 64 % 64
-	mmulfx.w r5,r4,r4
-	pta LOCAL(large_divisor),tr0
-	addi r20,32,r9
-	msub.w r1,r4,r1
-	madd.w r1,r1,r1
-	mmulfx.w r1,r1,r4
-	shlri r6,32,r7
-	bgt/u r9,r63,tr0 // large_divisor
-	mmulfx.w r5,r4,r4
-	shlri r2,32+14,r19
-	addi r22,-31,r0
-	msub.w r1,r4,r1
-
-	mulu.l r1,r7,r4
-	addi r1,-3,r5
-	mulu.l r5,r19,r5
-	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
-	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
-	                 the case may be, %0000000000000000 000.11111111111, still */
-	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
-	mulu.l r5,r3,r8
-	mshalds.l r1,r21,r1
-	shari r4,26,r4
-	shlld r8,r0,r8
-	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
-	sub r2,r8,r2
-	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
-
-	shlri r2,22,r21
-	mulu.l r21,r1,r21
-	shlld r5,r0,r8
-	addi r20,30-22,r0
-	shlrd r21,r0,r21
-	mulu.l r21,r3,r5
-	add r8,r21,r8
-	mcmpgt.l r21,r63,r21 // See Note 1
-	addi r20,30,r0
-	mshfhi.l r63,r21,r21
-	sub r2,r5,r2
-	andc r2,r21,r2
-
-	/* small divisor: need a third divide step */
-	mulu.l r2,r1,r7
-	ptabs r18,tr0
-	addi r2,1,r2
-	shlrd r7,r0,r7
-	mulu.l r7,r3,r5
-	add r8,r7,r8
-	sub r2,r3,r2
-	cmpgt r2,r5,r5
-	add r8,r5,r2
-	/* could test r3 here to check for divide by zero.  */
-	blink tr0,r63
-
-LOCAL(large_divisor):
-	mmulfx.w r5,r4,r4
-	shlrd r2,r9,r25
-	shlri r25,32,r8
-	msub.w r1,r4,r1
-
-	mulu.l r1,r7,r4
-	addi r1,-3,r5
-	mulu.l r5,r8,r5
-	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
-	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
-	                 the case may be, %0000000000000000 000.11111111111, still */
-	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
-	shlri r5,14-1,r8
-	mulu.l r8,r7,r5
-	mshalds.l r1,r21,r1
-	shari r4,26,r4
-	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
-	sub r25,r5,r25
-	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
-
-	shlri r25,22,r21
-	mulu.l r21,r1,r21
-	pta LOCAL(no_lo_adj),tr0
-	addi r22,32,r0
-	shlri r21,40,r21
-	mulu.l r21,r7,r5
-	add r8,r21,r8
-	shlld r2,r0,r2
-	sub r25,r5,r25
-	bgtu/u r7,r25,tr0 // no_lo_adj
-	addi r8,1,r8
-	sub r25,r7,r25
-LOCAL(no_lo_adj):
-	mextr4 r2,r25,r2
-
-	/* large_divisor: only needs a few adjustments.  */
-	mulu.l r8,r6,r5
-	ptabs r18,tr0
-	/* bubble */
-	cmpgtu r5,r2,r5
-	sub r8,r5,r2
-	blink tr0,r63
-	ENDFUNC(GLOBAL(udivdi3))
-/* Note 1: To shift the result of the second divide stage so that the result
-   always fits into 32 bits, yet we still reduce the rest sufficiently
-   would require a lot of instructions to do the shifts just right.  Using
-   the full 64 bit shift result to multiply with the divisor would require
-   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
-   Fortunately, if the upper 32 bits of the shift result are nonzero, we
-   know that the rest after taking this partial result into account will
-   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
-   upper 32 bits of the partial result are nonzero.  */
-#endif /* __SHMEDIA__ */
-#endif /* L_udivdi3 */
-
-#ifdef L_divdi3
-#ifdef __SHMEDIA__
-	.mode	SHmedia
-	.section	.text..SHmedia32,"ax"
-	.align	2
-	.global	GLOBAL(divdi3)
-	FUNC(GLOBAL(divdi3))
-GLOBAL(divdi3):
-	pta GLOBAL(udivdi3_internal),tr0
-	shari r2,63,r22
-	shari r3,63,r23
-	xor r2,r22,r2
-	xor r3,r23,r3
-	sub r2,r22,r2
-	sub r3,r23,r3
-	beq/u r22,r23,tr0
-	ptabs r18,tr1
-	blink tr0,r18
-	sub r63,r2,r2
-	blink tr1,r63
-	ENDFUNC(GLOBAL(divdi3))
-#endif /* __SHMEDIA__ */
-#endif /* L_divdi3 */
-
-#ifdef L_umoddi3
-#ifdef __SHMEDIA__
-	.mode	SHmedia
-	.section	.text..SHmedia32,"ax"
-	.align	2
-	.global	GLOBAL(umoddi3)
-	FUNC(GLOBAL(umoddi3))
-GLOBAL(umoddi3):
-	HIDDEN_ALIAS(umoddi3_internal,umoddi3)
-	shlri r3,1,r4
-	nsb r4,r22
-	shlld r3,r22,r6
-	shlri r6,49,r5
-	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
-	sub r21,r5,r1
-	mmulfx.w r1,r1,r4
-	mshflo.w r1,r63,r1
-	sub r63,r22,r20 // r63 == 64 % 64
-	mmulfx.w r5,r4,r4
-	pta LOCAL(large_divisor),tr0
-	addi r20,32,r9
-	msub.w r1,r4,r1
-	madd.w r1,r1,r1
-	mmulfx.w r1,r1,r4
-	shlri r6,32,r7
-	bgt/u r9,r63,tr0 // large_divisor
-	mmulfx.w r5,r4,r4
-	shlri r2,32+14,r19
-	addi r22,-31,r0
-	msub.w r1,r4,r1
-
-	mulu.l r1,r7,r4
-	addi r1,-3,r5
-	mulu.l r5,r19,r5
-	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
-	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
-	                 the case may be, %0000000000000000 000.11111111111, still */
-	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
-	mulu.l r5,r3,r5
-	mshalds.l r1,r21,r1
-	shari r4,26,r4
-	shlld r5,r0,r5
-	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
-	sub r2,r5,r2
-	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
-
-	shlri r2,22,r21
-	mulu.l r21,r1,r21
-	addi r20,30-22,r0
-	/* bubble */ /* could test r3 here to check for divide by zero.  */
-	shlrd r21,r0,r21
-	mulu.l r21,r3,r5
-	mcmpgt.l r21,r63,r21 // See Note 1
-	addi r20,30,r0
-	mshfhi.l r63,r21,r21
-	sub r2,r5,r2
-	andc r2,r21,r2
-
-	/* small divisor: need a third divide step */
-	mulu.l r2,r1,r7
-	ptabs r18,tr0
-	sub r2,r3,r8 /* re-use r8 here for rest - r3 */
-	shlrd r7,r0,r7
-	mulu.l r7,r3,r5
-	/* bubble */
-	addi r8,1,r7
-	cmpgt r7,r5,r7
-	cmvne r7,r8,r2
-	sub r2,r5,r2
-	blink tr0,r63
-
-LOCAL(large_divisor):
-	mmulfx.w r5,r4,r4
-	shlrd r2,r9,r25
-	shlri r25,32,r8
-	msub.w r1,r4,r1
-
-	mulu.l r1,r7,r4
-	addi r1,-3,r5
-	mulu.l r5,r8,r5
-	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
-	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
-	                 the case may be, %0000000000000000 000.11111111111, still */
-	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
-	shlri r5,14-1,r8
-	mulu.l r8,r7,r5
-	mshalds.l r1,r21,r1
-	shari r4,26,r4
-	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
-	sub r25,r5,r25
-	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
-
-	shlri r25,22,r21
-	mulu.l r21,r1,r21
-	pta LOCAL(no_lo_adj),tr0
-	addi r22,32,r0
-	shlri r21,40,r21
-	mulu.l r21,r7,r5
-	add r8,r21,r8
-	shlld r2,r0,r2
-	sub r25,r5,r25
-	bgtu/u r7,r25,tr0 // no_lo_adj
-	addi r8,1,r8
-	sub r25,r7,r25
-LOCAL(no_lo_adj):
-	mextr4 r2,r25,r2
-
-	/* large_divisor: only needs a few adjustments.  */
-	mulu.l r8,r6,r5
-	ptabs r18,tr0
-	add r2,r6,r7
-	cmpgtu r5,r2,r8
-	cmvne r8,r7,r2
-	sub r2,r5,r2
-	shlrd r2,r22,r2
-	blink tr0,r63
-	ENDFUNC(GLOBAL(umoddi3))
-/* Note 1: To shift the result of the second divide stage so that the result
-   always fits into 32 bits, yet we still reduce the rest sufficiently
-   would require a lot of instructions to do the shifts just right.  Using
-   the full 64 bit shift result to multiply with the divisor would require
-   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
-   Fortunately, if the upper 32 bits of the shift result are nonzero, we
-   know that the rest after taking this partial result into account will
-   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
-   upper 32 bits of the partial result are nonzero.  */
-#endif /* __SHMEDIA__ */
-#endif /* L_umoddi3 */
-
-#ifdef L_moddi3
-#ifdef __SHMEDIA__
-	.mode	SHmedia
-	.section	.text..SHmedia32,"ax"
-	.align	2
-	.global	GLOBAL(moddi3)
-	FUNC(GLOBAL(moddi3))
-GLOBAL(moddi3):
-	pta GLOBAL(umoddi3_internal),tr0
-	shari r2,63,r22
-	shari r3,63,r23
-	xor r2,r22,r2
-	xor r3,r23,r3
-	sub r2,r22,r2
-	sub r3,r23,r3
-	beq/u r22,r63,tr0
-	ptabs r18,tr1
-	blink tr0,r18
-	sub r63,r2,r2
-	blink tr1,r63
-	ENDFUNC(GLOBAL(moddi3))
-#endif /* __SHMEDIA__ */
-#endif /* L_moddi3 */
-
-#ifdef L_set_fpscr
-#if !defined (__SH2A_NOFPU__)
-#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
-#ifdef __SH5__
-	.mode	SHcompact
-#endif
-	.global GLOBAL(set_fpscr)
-	HIDDEN_FUNC(GLOBAL(set_fpscr))
-GLOBAL(set_fpscr):
-	lds r4,fpscr
-#ifdef __PIC__
-	mov.l	r12,@-r15
-#ifdef __vxworks
-	mov.l	LOCAL(set_fpscr_L0_base),r12
-	mov.l	LOCAL(set_fpscr_L0_index),r0
-	mov.l	@r12,r12
-	mov.l	@(r0,r12),r12
-#else
-	mova	LOCAL(set_fpscr_L0),r0
-	mov.l	LOCAL(set_fpscr_L0),r12
-	add	r0,r12
-#endif
-	mov.l	LOCAL(set_fpscr_L1),r0
-	mov.l	@(r0,r12),r1
-	mov.l	@r15+,r12
-#else
-	mov.l LOCAL(set_fpscr_L1),r1
-#endif
-	swap.w r4,r0
-	or #24,r0
-#ifndef FMOVD_WORKS
-	xor #16,r0
-#endif
-#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
-	swap.w r0,r3
-	mov.l r3,@(4,r1)
-#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
-	swap.w r0,r2
-	mov.l r2,@r1
-#endif
-#ifndef FMOVD_WORKS
-	xor #8,r0
-#else
-	xor #24,r0
-#endif
-#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
-	swap.w r0,r2
-	rts
-	mov.l r2,@r1
-#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
-	swap.w r0,r3
-	rts
-	mov.l r3,@(4,r1)
-#endif
-	.align 2
-#ifdef __PIC__
-#ifdef __vxworks
-LOCAL(set_fpscr_L0_base):
-	.long ___GOTT_BASE__
-LOCAL(set_fpscr_L0_index):
-	.long ___GOTT_INDEX__
-#else
-LOCAL(set_fpscr_L0):
-	.long _GLOBAL_OFFSET_TABLE_
-#endif
-LOCAL(set_fpscr_L1):
-	.long GLOBAL(fpscr_values@GOT)
-#else
-LOCAL(set_fpscr_L1):
-	.long GLOBAL(fpscr_values)
-#endif
-
-	ENDFUNC(GLOBAL(set_fpscr))
-#ifndef NO_FPSCR_VALUES
-#ifdef __ELF__
-        .comm   GLOBAL(fpscr_values),8,4
-#else
-        .comm   GLOBAL(fpscr_values),8
-#endif /* ELF */
-#endif /* NO_FPSCR_VALUES */
-#endif /* SH2E / SH3E / SH4 */
-#endif /* __SH2A_NOFPU__ */
-#endif /* L_set_fpscr */
-#ifdef L_ic_invalidate
-#if __SH5__ == 32
-	.mode	SHmedia
-	.section	.text..SHmedia32,"ax"
-	.align	2
-	.global	GLOBAL(init_trampoline)
-	HIDDEN_FUNC(GLOBAL(init_trampoline))
-GLOBAL(init_trampoline):
-	st.l	r0,8,r2
-#ifdef __LITTLE_ENDIAN__
-	movi	9,r20
-	shori	0x402b,r20
-	shori	0xd101,r20
-	shori	0xd002,r20
-#else
-	movi	0xffffffffffffd002,r20
-	shori	0xd101,r20
-	shori	0x402b,r20
-	shori	9,r20
-#endif
-	st.q	r0,0,r20
-	st.l	r0,12,r3
-	ENDFUNC(GLOBAL(init_trampoline))
-	.global	GLOBAL(ic_invalidate)
-	HIDDEN_FUNC(GLOBAL(ic_invalidate))
-GLOBAL(ic_invalidate):
-	ocbwb	r0,0
-	synco
-	icbi	r0, 0
-	ptabs	r18, tr0
-	synci
-	blink	tr0, r63
-	ENDFUNC(GLOBAL(ic_invalidate))
-#elif defined(__SH4A__)
-	.global GLOBAL(ic_invalidate)
-	HIDDEN_FUNC(GLOBAL(ic_invalidate))
-GLOBAL(ic_invalidate):
-	ocbwb	@r4
-	synco
-	icbi	@r4
-	rts
-	  nop
-	ENDFUNC(GLOBAL(ic_invalidate))
-#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
-	/* For system code, we use ic_invalidate_line_i, but user code
-	   needs a different mechanism.  A kernel call is generally not
-	   available, and it would also be slow.  Different SH4 variants use
-	   different sizes and associativities of the Icache.  We use a small
-	   bit of dispatch code that can be put hidden in every shared object,
-	   which calls the actual processor-specific invalidation code in a
-	   separate module.
-	   Or if you have operating system support, the OS could mmap the
-	   procesor-specific code from a single page, since it is highly
-	   repetitive.  */
-	.global GLOBAL(ic_invalidate)
-	HIDDEN_FUNC(GLOBAL(ic_invalidate))
-GLOBAL(ic_invalidate):
-#ifdef __pic__
-#ifdef __vxworks
-	mov.l	1f,r1
-	mov.l	2f,r0
-	mov.l	@r1,r1
-	mov.l	0f,r2
-	mov.l	@(r0,r1),r0
-#else
-	mov.l	1f,r1
-	mova	1f,r0
-	mov.l	0f,r2
-	add	r1,r0
-#endif
-	mov.l	@(r0,r2),r1
-#else
-	mov.l	0f,r1
-#endif
-	ocbwb	@r4
-	mov.l	@(8,r1),r0
-	sub	r1,r4
-	and	r4,r0
-	add	r1,r0
-	jmp	@r0
-	mov.l	@(4,r1),r0
-	.align	2
-#ifndef __pic__
-0:	.long   GLOBAL(ic_invalidate_array)
-#else /* __pic__ */
-	.global GLOBAL(ic_invalidate_array)
-0:	.long   GLOBAL(ic_invalidate_array)@GOT
-#ifdef __vxworks
-1:	.long	___GOTT_BASE__
-2:	.long	___GOTT_INDEX__
-#else
-1:	.long   _GLOBAL_OFFSET_TABLE_
-#endif
-	ENDFUNC(GLOBAL(ic_invalidate))
-#endif /* __pic__ */
-#endif /* SH4 */
-#endif /* L_ic_invalidate */
-
-#ifdef L_ic_invalidate_array
-#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
-	.global GLOBAL(ic_invalidate_array)
-	/* This is needed when an SH4 dso with trampolines is used on SH4A.  */
-	.global GLOBAL(ic_invalidate_array)
-	FUNC(GLOBAL(ic_invalidate_array))
-GLOBAL(ic_invalidate_array):
-	add	r1,r4
-	synco
-	icbi	@r4
-	rts
-	  nop
-	.align 2
-	.long	0
-	ENDFUNC(GLOBAL(ic_invalidate_array))
-#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
-	.global GLOBAL(ic_invalidate_array)
-	.p2align 5
-	FUNC(GLOBAL(ic_invalidate_array))
-/* This must be aligned to the beginning of a cache line.  */
-GLOBAL(ic_invalidate_array):
-#ifndef WAYS
-#define WAYS 4
-#define WAY_SIZE 0x4000
-#endif
-#if WAYS == 1
-	.rept	WAY_SIZE * WAYS / 32
-	rts
-	nop
-	.rept	7
-	.long	WAY_SIZE - 32
-	.endr
-	.endr
-#elif WAYS <= 6
-	.rept	WAY_SIZE * WAYS / 32
-	braf	r0
-	add	#-8,r0
-	.long	WAY_SIZE + 8
-	.long	WAY_SIZE - 32
-	.rept	WAYS-2
-	braf	r0
-	nop
-	.endr
-	.rept	7 - WAYS
-	rts
-	nop
-	.endr
-	.endr
-#else /* WAYS > 6 */
-	/* This variant needs two different pages for mmap-ing.  */
- 	.rept	WAYS-1
-	.rept	WAY_SIZE / 32
-	braf	r0
-	nop
-	.long	WAY_SIZE
-	.rept 6
-	.long	WAY_SIZE - 32
-	.endr
-	.endr
-	.endr
-	.rept	WAY_SIZE / 32
-	rts
-	.rept	15
-	nop
-	.endr
-	.endr
-#endif /* WAYS */
-	ENDFUNC(GLOBAL(ic_invalidate_array))
-#endif /* SH4 */
-#endif /* L_ic_invalidate_array */
-
-#if defined (__SH5__) && __SH5__ == 32
-#ifdef L_shcompact_call_trampoline
-	.section	.rodata
-	.align	1
-LOCAL(ct_main_table):
-.word	LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
-.word	LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
-	.mode	SHmedia
-	.section	.text..SHmedia32, "ax"
-	.align	2
-	
-     /* This function loads 64-bit general-purpose registers from the
-	stack, from a memory address contained in them or from an FP
-	register, according to a cookie passed in r1.  Its execution
-	time is linear on the number of registers that actually have
-	to be copied.  See sh.h for details on the actual bit pattern.
-
-	The function to be called is passed in r0.  If a 32-bit return
-	value is expected, the actual function will be tail-called,
-	otherwise the return address will be stored in r10 (that the
-	caller should expect to be clobbered) and the return value
-	will be expanded into r2/r3 upon return.  */
-	
-	.global	GLOBAL(GCC_shcompact_call_trampoline)
-	FUNC(GLOBAL(GCC_shcompact_call_trampoline))
-GLOBAL(GCC_shcompact_call_trampoline):
-	ptabs/l	r0, tr0	/* Prepare to call the actual function.  */
-	movi	((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
-	pt/l	LOCAL(ct_loop), tr1
-	addz.l	r1, r63, r1
-	shori	((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
-LOCAL(ct_loop):
-	nsb	r1, r28
-	shlli	r28, 1, r29
-	ldx.w	r0, r29, r30
-LOCAL(ct_main_label):
-	ptrel/l	r30, tr2
-	blink	tr2, r63
-LOCAL(ct_r2_fp):	/* Copy r2 from an FP register.  */
-	/* It must be dr0, so just do it.  */
-	fmov.dq	dr0, r2
-	movi	7, r30
-	shlli	r30, 29, r31
-	andc	r1, r31, r1
-	blink	tr1, r63
-LOCAL(ct_r3_fp):	/* Copy r3 from an FP register.  */
-	/* It is either dr0 or dr2.  */
-	movi	7, r30
-	shlri	r1, 26, r32
-	shlli	r30, 26, r31
-	andc	r1, r31, r1
-	fmov.dq	dr0, r3
-	beqi/l	r32, 4, tr1
-	fmov.dq	dr2, r3
-	blink	tr1, r63
-LOCAL(ct_r4_fp):	/* Copy r4 from an FP register.  */
-	shlri	r1, 23 - 3, r34
-	andi	r34, 3 << 3, r33
-	addi	r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
-LOCAL(ct_r4_fp_base):
-	ptrel/l	r32, tr2
-	movi	7, r30
-	shlli	r30, 23, r31
-	andc	r1, r31, r1
-	blink	tr2, r63
-LOCAL(ct_r4_fp_copy):
-	fmov.dq	dr0, r4
-	blink	tr1, r63
-	fmov.dq	dr2, r4
-	blink	tr1, r63
-	fmov.dq	dr4, r4
-	blink	tr1, r63
-LOCAL(ct_r5_fp):	/* Copy r5 from an FP register.  */
-	shlri	r1, 20 - 3, r34
-	andi	r34, 3 << 3, r33
-	addi	r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
-LOCAL(ct_r5_fp_base):
-	ptrel/l	r32, tr2
-	movi	7, r30
-	shlli	r30, 20, r31
-	andc	r1, r31, r1
-	blink	tr2, r63
-LOCAL(ct_r5_fp_copy):
-	fmov.dq	dr0, r5
-	blink	tr1, r63
-	fmov.dq	dr2, r5
-	blink	tr1, r63
-	fmov.dq	dr4, r5
-	blink	tr1, r63
-	fmov.dq	dr6, r5
-	blink	tr1, r63
-LOCAL(ct_r6_fph):	/* Copy r6 from a high FP register.  */
-	/* It must be dr8.  */
-	fmov.dq	dr8, r6
-	movi	15, r30
-	shlli	r30, 16, r31
-	andc	r1, r31, r1
-	blink	tr1, r63
-LOCAL(ct_r6_fpl):	/* Copy r6 from a low FP register.  */
-	shlri	r1, 16 - 3, r34
-	andi	r34, 3 << 3, r33
-	addi	r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
-LOCAL(ct_r6_fp_base):
-	ptrel/l	r32, tr2
-	movi	7, r30
-	shlli	r30, 16, r31
-	andc	r1, r31, r1
-	blink	tr2, r63
-LOCAL(ct_r6_fp_copy):
-	fmov.dq	dr0, r6
-	blink	tr1, r63
-	fmov.dq	dr2, r6
-	blink	tr1, r63
-	fmov.dq	dr4, r6
-	blink	tr1, r63
-	fmov.dq	dr6, r6
-	blink	tr1, r63
-LOCAL(ct_r7_fph):	/* Copy r7 from a high FP register.  */
-	/* It is either dr8 or dr10.  */
-	movi	15 << 12, r31
-	shlri	r1, 12, r32
-	andc	r1, r31, r1
-	fmov.dq	dr8, r7
-	beqi/l	r32, 8, tr1
-	fmov.dq	dr10, r7
-	blink	tr1, r63
-LOCAL(ct_r7_fpl):	/* Copy r7 from a low FP register.  */
-	shlri	r1, 12 - 3, r34
-	andi	r34, 3 << 3, r33
-	addi	r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
-LOCAL(ct_r7_fp_base):
-	ptrel/l	r32, tr2
-	movi	7 << 12, r31
-	andc	r1, r31, r1
-	blink	tr2, r63
-LOCAL(ct_r7_fp_copy):
-	fmov.dq	dr0, r7
-	blink	tr1, r63
-	fmov.dq	dr2, r7
-	blink	tr1, r63
-	fmov.dq	dr4, r7
-	blink	tr1, r63
-	fmov.dq	dr6, r7
-	blink	tr1, r63
-LOCAL(ct_r8_fph):	/* Copy r8 from a high FP register.  */
-	/* It is either dr8 or dr10.  */
-	movi	15 << 8, r31
-	andi	r1, 1 << 8, r32
-	andc	r1, r31, r1
-	fmov.dq	dr8, r8
-	beq/l	r32, r63, tr1
-	fmov.dq	dr10, r8
-	blink	tr1, r63
-LOCAL(ct_r8_fpl):	/* Copy r8 from a low FP register.  */
-	shlri	r1, 8 - 3, r34
-	andi	r34, 3 << 3, r33
-	addi	r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
-LOCAL(ct_r8_fp_base):
-	ptrel/l	r32, tr2
-	movi	7 << 8, r31
-	andc	r1, r31, r1
-	blink	tr2, r63
-LOCAL(ct_r8_fp_copy):
-	fmov.dq	dr0, r8
-	blink	tr1, r63
-	fmov.dq	dr2, r8
-	blink	tr1, r63
-	fmov.dq	dr4, r8
-	blink	tr1, r63
-	fmov.dq	dr6, r8
-	blink	tr1, r63
-LOCAL(ct_r9_fph):	/* Copy r9 from a high FP register.  */
-	/* It is either dr8 or dr10.  */
-	movi	15 << 4, r31
-	andi	r1, 1 << 4, r32
-	andc	r1, r31, r1
-	fmov.dq	dr8, r9
-	beq/l	r32, r63, tr1
-	fmov.dq	dr10, r9
-	blink	tr1, r63
-LOCAL(ct_r9_fpl):	/* Copy r9 from a low FP register.  */
-	shlri	r1, 4 - 3, r34
-	andi	r34, 3 << 3, r33
-	addi	r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
-LOCAL(ct_r9_fp_base):
-	ptrel/l	r32, tr2
-	movi	7 << 4, r31
-	andc	r1, r31, r1
-	blink	tr2, r63
-LOCAL(ct_r9_fp_copy):
-	fmov.dq	dr0, r9
-	blink	tr1, r63
-	fmov.dq	dr2, r9
-	blink	tr1, r63
-	fmov.dq	dr4, r9
-	blink	tr1, r63
-	fmov.dq	dr6, r9
-	blink	tr1, r63
-LOCAL(ct_r2_ld):	/* Copy r2 from a memory address.  */
-	pt/l	LOCAL(ct_r2_load), tr2
-	movi	3, r30
-	shlli	r30, 29, r31
-	and	r1, r31, r32
-	andc	r1, r31, r1
-	beq/l	r31, r32, tr2
-	addi.l	r2, 8, r3
-	ldx.q	r2, r63, r2
-	/* Fall through.  */
-LOCAL(ct_r3_ld):	/* Copy r3 from a memory address.  */
-	pt/l	LOCAL(ct_r3_load), tr2
-	movi	3, r30
-	shlli	r30, 26, r31
-	and	r1, r31, r32
-	andc	r1, r31, r1
-	beq/l	r31, r32, tr2
-	addi.l	r3, 8, r4
-	ldx.q	r3, r63, r3
-LOCAL(ct_r4_ld):	/* Copy r4 from a memory address.  */
-	pt/l	LOCAL(ct_r4_load), tr2
-	movi	3, r30
-	shlli	r30, 23, r31
-	and	r1, r31, r32
-	andc	r1, r31, r1
-	beq/l	r31, r32, tr2
-	addi.l	r4, 8, r5
-	ldx.q	r4, r63, r4
-LOCAL(ct_r5_ld):	/* Copy r5 from a memory address.  */
-	pt/l	LOCAL(ct_r5_load), tr2
-	movi	3, r30
-	shlli	r30, 20, r31
-	and	r1, r31, r32
-	andc	r1, r31, r1
-	beq/l	r31, r32, tr2
-	addi.l	r5, 8, r6
-	ldx.q	r5, r63, r5
-LOCAL(ct_r6_ld):	/* Copy r6 from a memory address.  */
-	pt/l	LOCAL(ct_r6_load), tr2
-	movi	3 << 16, r31
-	and	r1, r31, r32
-	andc	r1, r31, r1
-	beq/l	r31, r32, tr2
-	addi.l	r6, 8, r7
-	ldx.q	r6, r63, r6
-LOCAL(ct_r7_ld):	/* Copy r7 from a memory address.  */
-	pt/l	LOCAL(ct_r7_load), tr2
-	movi	3 << 12, r31
-	and	r1, r31, r32
-	andc	r1, r31, r1
-	beq/l	r31, r32, tr2
-	addi.l	r7, 8, r8
-	ldx.q	r7, r63, r7
-LOCAL(ct_r8_ld):	/* Copy r8 from a memory address.  */
-	pt/l	LOCAL(ct_r8_load), tr2
-	movi	3 << 8, r31
-	and	r1, r31, r32
-	andc	r1, r31, r1
-	beq/l	r31, r32, tr2
-	addi.l	r8, 8, r9
-	ldx.q	r8, r63, r8
-LOCAL(ct_r9_ld):	/* Copy r9 from a memory address.  */
-	pt/l	LOCAL(ct_check_tramp), tr2
-	ldx.q	r9, r63, r9
-	blink	tr2, r63
-LOCAL(ct_r2_load):
-	ldx.q	r2, r63, r2
-	blink	tr1, r63
-LOCAL(ct_r3_load):
-	ldx.q	r3, r63, r3
-	blink	tr1, r63
-LOCAL(ct_r4_load):
-	ldx.q	r4, r63, r4
-	blink	tr1, r63
-LOCAL(ct_r5_load):
-	ldx.q	r5, r63, r5
-	blink	tr1, r63
-LOCAL(ct_r6_load):
-	ldx.q	r6, r63, r6
-	blink	tr1, r63
-LOCAL(ct_r7_load):
-	ldx.q	r7, r63, r7
-	blink	tr1, r63
-LOCAL(ct_r8_load):
-	ldx.q	r8, r63, r8
-	blink	tr1, r63
-LOCAL(ct_r2_pop):	/* Pop r2 from the stack.  */
-	movi	1, r30
-	ldx.q	r15, r63, r2
-	shlli	r30, 29, r31
-	addi.l	r15, 8, r15
-	andc	r1, r31, r1
-	blink	tr1, r63
-LOCAL(ct_r3_pop):	/* Pop r3 from the stack.  */
-	movi	1, r30
-	ldx.q	r15, r63, r3
-	shlli	r30, 26, r31
-	addi.l	r15, 8, r15
-	andc	r1, r31, r1
-	blink	tr1, r63
-LOCAL(ct_r4_pop):	/* Pop r4 from the stack.  */
-	movi	1, r30
-	ldx.q	r15, r63, r4
-	shlli	r30, 23, r31
-	addi.l	r15, 8, r15
-	andc	r1, r31, r1
-	blink	tr1, r63
-LOCAL(ct_r5_pop):	/* Pop r5 from the stack.  */
-	movi	1, r30
-	ldx.q	r15, r63, r5
-	shlli	r30, 20, r31
-	addi.l	r15, 8, r15
-	andc	r1, r31, r1
-	blink	tr1, r63
-LOCAL(ct_r6_pop):	/* Pop r6 from the stack.  */
-	movi	1, r30
-	ldx.q	r15, r63, r6
-	shlli	r30, 16, r31
-	addi.l	r15, 8, r15
-	andc	r1, r31, r1
-	blink	tr1, r63
-LOCAL(ct_r7_pop):	/* Pop r7 from the stack.  */
-	ldx.q	r15, r63, r7
-	movi	1 << 12, r31
-	addi.l	r15, 8, r15
-	andc	r1, r31, r1
-	blink	tr1, r63
-LOCAL(ct_r8_pop):	/* Pop r8 from the stack.  */
-	ldx.q	r15, r63, r8
-	movi	1 << 8, r31
-	addi.l	r15, 8, r15
-	andc	r1, r31, r1
-	blink	tr1, r63
-LOCAL(ct_pop_seq):	/* Pop a sequence of registers off the stack.  */
-	andi	r1, 7 << 1, r30
-	movi	(LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
-	shlli	r30, 2, r31
-	shori	LOCAL(ct_end_of_pop_seq) & 65535, r32
-	sub.l	r32, r31, r33
-	ptabs/l	r33, tr2
-	blink	tr2, r63
-LOCAL(ct_start_of_pop_seq):	/* Beginning of pop sequence.  */
-	ldx.q	r15, r63, r3
-	addi.l	r15, 8, r15
-	ldx.q	r15, r63, r4
-	addi.l	r15, 8, r15
-	ldx.q	r15, r63, r5
-	addi.l	r15, 8, r15
-	ldx.q	r15, r63, r6
-	addi.l	r15, 8, r15
-	ldx.q	r15, r63, r7
-	addi.l	r15, 8, r15
-	ldx.q	r15, r63, r8
-	addi.l	r15, 8, r15
-LOCAL(ct_r9_pop):	/* Pop r9 from the stack.  */
-	ldx.q	r15, r63, r9
-	addi.l	r15, 8, r15
-LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction.  */
-LOCAL(ct_check_tramp):	/* Check whether we need a trampoline.  */
-	pt/u	LOCAL(ct_ret_wide), tr2
-	andi	r1, 1, r1
-	bne/u	r1, r63, tr2
-LOCAL(ct_call_func):	/* Just branch to the function.  */
-	blink	tr0, r63
-LOCAL(ct_ret_wide):	/* Call the function, so that we can unpack its 
-			   64-bit return value.  */
-	add.l	r18, r63, r10
-	blink	tr0, r18
-	ptabs	r10, tr0
-#if __LITTLE_ENDIAN__
-	shari	r2, 32, r3
-	add.l	r2, r63, r2
-#else
-	add.l	r2, r63, r3
-	shari	r2, 32, r2
-#endif
-	blink	tr0, r63
-
-	ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
-#endif /* L_shcompact_call_trampoline */
-
-#ifdef L_shcompact_return_trampoline
-     /* This function does the converse of the code in `ret_wide'
-	above.  It is tail-called by SHcompact functions returning
-	64-bit non-floating-point values, to pack the 32-bit values in
-	r2 and r3 into r2.  */
-
-	.mode	SHmedia
-	.section	.text..SHmedia32, "ax"
-	.align	2
-	.global	GLOBAL(GCC_shcompact_return_trampoline)
-	HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
-GLOBAL(GCC_shcompact_return_trampoline):
-	ptabs/l	r18, tr0
-#if __LITTLE_ENDIAN__
-	addz.l	r2, r63, r2
-	shlli	r3, 32, r3
-#else
-	addz.l	r3, r63, r3
-	shlli	r2, 32, r2
-#endif
-	or	r3, r2, r2
-	blink	tr0, r63
-
-	ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
-#endif /* L_shcompact_return_trampoline */
-
-#ifdef L_shcompact_incoming_args
-	.section	.rodata
-	.align	1
-LOCAL(ia_main_table):
-.word	1 /* Invalid, just loop */
-.word	LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
-.word	1 /* Invalid, just loop */
-.word	LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
-.word	1 /* Invalid, just loop */
-.word	LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
-.word	1 /* Invalid, just loop */
-.word	LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
-.word	1 /* Invalid, just loop */
-.word	1 /* Invalid, just loop */
-.word	LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
-.word	1 /* Invalid, just loop */
-.word	1 /* Invalid, just loop */
-.word	LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
-.word	1 /* Invalid, just loop */
-.word	1 /* Invalid, just loop */
-.word	LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
-.word	1 /* Invalid, just loop */
-.word	1 /* Invalid, just loop */
-.word	LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
-.word	LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
-	.mode	SHmedia
-	.section	.text..SHmedia32, "ax"
-	.align	2
-	
-     /* This function stores 64-bit general-purpose registers back in
-	the stack, and loads the address in which each register
-	was stored into itself.  The lower 32 bits of r17 hold the address
-	to begin storing, and the upper 32 bits of r17 hold the cookie.
-	Its execution time is linear on the
-	number of registers that actually have to be copied, and it is
-	optimized for structures larger than 64 bits, as opposed to
-	individual `long long' arguments.  See sh.h for details on the
-	actual bit pattern.  */
-	
-	.global	GLOBAL(GCC_shcompact_incoming_args)
- 	FUNC(GLOBAL(GCC_shcompact_incoming_args))
-GLOBAL(GCC_shcompact_incoming_args):
-	ptabs/l	r18, tr0	/* Prepare to return.  */
-	shlri	r17, 32, r0	/* Load the cookie.  */
-	movi	((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
-	pt/l	LOCAL(ia_loop), tr1
-	add.l	r17, r63, r17
-	shori	((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
-LOCAL(ia_loop):
-	nsb	r0, r36
-	shlli	r36, 1, r37
-	ldx.w	r43, r37, r38
-LOCAL(ia_main_label):
-	ptrel/l	r38, tr2
-	blink	tr2, r63
-LOCAL(ia_r2_ld):	/* Store r2 and load its address.  */
-	movi	3, r38
-	shlli	r38, 29, r39
-	and	r0, r39, r40
-	andc	r0, r39, r0
-	stx.q	r17, r63, r2
-	add.l	r17, r63, r2
-	addi.l	r17, 8, r17
-	beq/u	r39, r40, tr1
-LOCAL(ia_r3_ld):	/* Store r3 and load its address.  */
-	movi	3, r38
-	shlli	r38, 26, r39
-	and	r0, r39, r40
-	andc	r0, r39, r0
-	stx.q	r17, r63, r3
-	add.l	r17, r63, r3
-	addi.l	r17, 8, r17
-	beq/u	r39, r40, tr1
-LOCAL(ia_r4_ld):	/* Store r4 and load its address.  */
-	movi	3, r38
-	shlli	r38, 23, r39
-	and	r0, r39, r40
-	andc	r0, r39, r0
-	stx.q	r17, r63, r4
-	add.l	r17, r63, r4
-	addi.l	r17, 8, r17
-	beq/u	r39, r40, tr1
-LOCAL(ia_r5_ld):	/* Store r5 and load its address.  */
-	movi	3, r38
-	shlli	r38, 20, r39
-	and	r0, r39, r40
-	andc	r0, r39, r0
-	stx.q	r17, r63, r5
-	add.l	r17, r63, r5
-	addi.l	r17, 8, r17
-	beq/u	r39, r40, tr1
-LOCAL(ia_r6_ld):	/* Store r6 and load its address.  */
-	movi	3, r38
-	shlli	r38, 16, r39
-	and	r0, r39, r40
-	andc	r0, r39, r0
-	stx.q	r17, r63, r6
-	add.l	r17, r63, r6
-	addi.l	r17, 8, r17
-	beq/u	r39, r40, tr1
-LOCAL(ia_r7_ld):	/* Store r7 and load its address.  */
-	movi	3 << 12, r39
-	and	r0, r39, r40
-	andc	r0, r39, r0
-	stx.q	r17, r63, r7
-	add.l	r17, r63, r7
-	addi.l	r17, 8, r17
-	beq/u	r39, r40, tr1
-LOCAL(ia_r8_ld):	/* Store r8 and load its address.  */
-	movi	3 << 8, r39
-	and	r0, r39, r40
-	andc	r0, r39, r0
-	stx.q	r17, r63, r8
-	add.l	r17, r63, r8
-	addi.l	r17, 8, r17
-	beq/u	r39, r40, tr1
-LOCAL(ia_r9_ld):	/* Store r9 and load its address.  */
-	stx.q	r17, r63, r9
-	add.l	r17, r63, r9
-	blink	tr0, r63
-LOCAL(ia_r2_push):	/* Push r2 onto the stack.  */
-	movi	1, r38
-	shlli	r38, 29, r39
-	andc	r0, r39, r0
-	stx.q	r17, r63, r2
-	addi.l	r17, 8, r17
-	blink	tr1, r63
-LOCAL(ia_r3_push):	/* Push r3 onto the stack.  */
-	movi	1, r38
-	shlli	r38, 26, r39
-	andc	r0, r39, r0
-	stx.q	r17, r63, r3
-	addi.l	r17, 8, r17
-	blink	tr1, r63
-LOCAL(ia_r4_push):	/* Push r4 onto the stack.  */
-	movi	1, r38
-	shlli	r38, 23, r39
-	andc	r0, r39, r0
-	stx.q	r17, r63, r4
-	addi.l	r17, 8, r17
-	blink	tr1, r63
-LOCAL(ia_r5_push):	/* Push r5 onto the stack.  */
-	movi	1, r38
-	shlli	r38, 20, r39
-	andc	r0, r39, r0
-	stx.q	r17, r63, r5
-	addi.l	r17, 8, r17
-	blink	tr1, r63
-LOCAL(ia_r6_push):	/* Push r6 onto the stack.  */
-	movi	1, r38
-	shlli	r38, 16, r39
-	andc	r0, r39, r0
-	stx.q	r17, r63, r6
-	addi.l	r17, 8, r17
-	blink	tr1, r63
-LOCAL(ia_r7_push):	/* Push r7 onto the stack.  */
-	movi	1 << 12, r39
-	andc	r0, r39, r0
-	stx.q	r17, r63, r7
-	addi.l	r17, 8, r17
-	blink	tr1, r63
-LOCAL(ia_r8_push):	/* Push r8 onto the stack.  */
-	movi	1 << 8, r39
-	andc	r0, r39, r0
-	stx.q	r17, r63, r8
-	addi.l	r17, 8, r17
-	blink	tr1, r63
-LOCAL(ia_push_seq):	/* Push a sequence of registers onto the stack.  */
-	andi	r0, 7 << 1, r38
-	movi	(LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
-	shlli	r38, 2, r39
-	shori	LOCAL(ia_end_of_push_seq) & 65535, r40
-	sub.l	r40, r39, r41
-	ptabs/l	r41, tr2
-	blink	tr2, r63
-LOCAL(ia_stack_of_push_seq):	 /* Beginning of push sequence.  */
-	stx.q	r17, r63, r3
-	addi.l	r17, 8, r17
-	stx.q	r17, r63, r4
-	addi.l	r17, 8, r17
-	stx.q	r17, r63, r5
-	addi.l	r17, 8, r17
-	stx.q	r17, r63, r6
-	addi.l	r17, 8, r17
-	stx.q	r17, r63, r7
-	addi.l	r17, 8, r17
-	stx.q	r17, r63, r8
-	addi.l	r17, 8, r17
-LOCAL(ia_r9_push):	/* Push r9 onto the stack.  */
-	stx.q	r17, r63, r9
-LOCAL(ia_return):	/* Return.  */
-	blink	tr0, r63
-LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction.  */
-	ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
-#endif /* L_shcompact_incoming_args */
-#endif
-#if __SH5__
-#ifdef L_nested_trampoline
-#if __SH5__ == 32
-	.section	.text..SHmedia32,"ax"
-#else
-	.text
-#endif
-	.align	3 /* It is copied in units of 8 bytes in SHmedia mode.  */
-	.global	GLOBAL(GCC_nested_trampoline)
-	HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
-GLOBAL(GCC_nested_trampoline):
-	.mode	SHmedia
-	ptrel/u	r63, tr0
-	gettr	tr0, r0
-#if __SH5__ == 64
-	ld.q	r0, 24, r1
-#else
-	ld.l	r0, 24, r1
-#endif
-	ptabs/l	r1, tr1
-#if __SH5__ == 64
-	ld.q	r0, 32, r1
-#else
-	ld.l	r0, 28, r1
-#endif
-	blink	tr1, r63
-
-	ENDFUNC(GLOBAL(GCC_nested_trampoline))
-#endif /* L_nested_trampoline */
-#endif /* __SH5__ */
-#if __SH5__ == 32
-#ifdef L_push_pop_shmedia_regs
-	.section	.text..SHmedia32,"ax"
-	.mode	SHmedia
-	.align	2
-#ifndef __SH4_NOFPU__	
-	.global	GLOBAL(GCC_push_shmedia_regs)
-	FUNC(GLOBAL(GCC_push_shmedia_regs))
-GLOBAL(GCC_push_shmedia_regs):
-	addi.l	r15, -14*8, r15
-	fst.d	r15, 13*8, dr62
-	fst.d	r15, 12*8, dr60
-	fst.d	r15, 11*8, dr58
-	fst.d	r15, 10*8, dr56
-	fst.d	r15,  9*8, dr54
-	fst.d	r15,  8*8, dr52
-	fst.d	r15,  7*8, dr50
-	fst.d	r15,  6*8, dr48
-	fst.d	r15,  5*8, dr46
-	fst.d	r15,  4*8, dr44
-	fst.d	r15,  3*8, dr42
-	fst.d	r15,  2*8, dr40
-	fst.d	r15,  1*8, dr38
-	fst.d	r15,  0*8, dr36
-#else /* ! __SH4_NOFPU__ */
-	.global	GLOBAL(GCC_push_shmedia_regs_nofpu)
-	FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
-GLOBAL(GCC_push_shmedia_regs_nofpu):
-#endif /* ! __SH4_NOFPU__ */
-	ptabs/l	r18, tr0
-	addi.l	r15, -27*8, r15
-	gettr	tr7, r62
-	gettr	tr6, r61
-	gettr	tr5, r60
-	st.q	r15, 26*8, r62
-	st.q	r15, 25*8, r61
-	st.q	r15, 24*8, r60
-	st.q	r15, 23*8, r59
-	st.q	r15, 22*8, r58
-	st.q	r15, 21*8, r57
-	st.q	r15, 20*8, r56
-	st.q	r15, 19*8, r55
-	st.q	r15, 18*8, r54
-	st.q	r15, 17*8, r53
-	st.q	r15, 16*8, r52
-	st.q	r15, 15*8, r51
-	st.q	r15, 14*8, r50
-	st.q	r15, 13*8, r49
-	st.q	r15, 12*8, r48
-	st.q	r15, 11*8, r47
-	st.q	r15, 10*8, r46
-	st.q	r15,  9*8, r45
-	st.q	r15,  8*8, r44
-	st.q	r15,  7*8, r35
-	st.q	r15,  6*8, r34
-	st.q	r15,  5*8, r33
-	st.q	r15,  4*8, r32
-	st.q	r15,  3*8, r31
-	st.q	r15,  2*8, r30
-	st.q	r15,  1*8, r29
-	st.q	r15,  0*8, r28
-	blink	tr0, r63
-#ifndef __SH4_NOFPU__	
-	ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
-#else
-	ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
-#endif
-#ifndef __SH4_NOFPU__	
-	.global	GLOBAL(GCC_pop_shmedia_regs)
-	FUNC(GLOBAL(GCC_pop_shmedia_regs))
-GLOBAL(GCC_pop_shmedia_regs):
-	pt	.L0, tr1
-	movi	41*8, r0
-	fld.d	r15, 40*8, dr62
-	fld.d	r15, 39*8, dr60
-	fld.d	r15, 38*8, dr58
-	fld.d	r15, 37*8, dr56
-	fld.d	r15, 36*8, dr54
-	fld.d	r15, 35*8, dr52
-	fld.d	r15, 34*8, dr50
-	fld.d	r15, 33*8, dr48
-	fld.d	r15, 32*8, dr46
-	fld.d	r15, 31*8, dr44
-	fld.d	r15, 30*8, dr42
-	fld.d	r15, 29*8, dr40
-	fld.d	r15, 28*8, dr38
-	fld.d	r15, 27*8, dr36
-	blink	tr1, r63
-#else /* ! __SH4_NOFPU__	*/
-	.global	GLOBAL(GCC_pop_shmedia_regs_nofpu)
-	FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
-GLOBAL(GCC_pop_shmedia_regs_nofpu):
-#endif /* ! __SH4_NOFPU__	*/
-	movi	27*8, r0
-.L0:
-	ptabs	r18, tr0
-	ld.q	r15, 26*8, r62
-	ld.q	r15, 25*8, r61
-	ld.q	r15, 24*8, r60
-	ptabs	r62, tr7
-	ptabs	r61, tr6
-	ptabs	r60, tr5
-	ld.q	r15, 23*8, r59
-	ld.q	r15, 22*8, r58
-	ld.q	r15, 21*8, r57
-	ld.q	r15, 20*8, r56
-	ld.q	r15, 19*8, r55
-	ld.q	r15, 18*8, r54
-	ld.q	r15, 17*8, r53
-	ld.q	r15, 16*8, r52
-	ld.q	r15, 15*8, r51
-	ld.q	r15, 14*8, r50
-	ld.q	r15, 13*8, r49
-	ld.q	r15, 12*8, r48
-	ld.q	r15, 11*8, r47
-	ld.q	r15, 10*8, r46
-	ld.q	r15,  9*8, r45
-	ld.q	r15,  8*8, r44
-	ld.q	r15,  7*8, r35
-	ld.q	r15,  6*8, r34
-	ld.q	r15,  5*8, r33
-	ld.q	r15,  4*8, r32
-	ld.q	r15,  3*8, r31
-	ld.q	r15,  2*8, r30
-	ld.q	r15,  1*8, r29
-	ld.q	r15,  0*8, r28
-	add.l	r15, r0, r15
-	blink	tr0, r63
-
-#ifndef __SH4_NOFPU__
-	ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
-#else
-	ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
-#endif
-#endif /* __SH5__ == 32 */
-#endif /* L_push_pop_shmedia_regs */
-
-#ifdef L_div_table
-#if __SH5__
-#if defined(__pic__) && defined(__SHMEDIA__)
-	.global	GLOBAL(sdivsi3)
-	FUNC(GLOBAL(sdivsi3))
-#if __SH5__ == 32
-	.section	.text..SHmedia32,"ax"
-#else
-	.text
-#endif
-#if 0
-/* ??? FIXME: Presumably due to a linker bug, exporting data symbols
-   in a text section does not work (at least for shared libraries):
-   the linker sets the LSB of the address as if this was SHmedia code.  */
-#define TEXT_DATA_BUG
-#endif
-	.align	2
- // inputs: r4,r5
- // clobbered: r1,r18,r19,r20,r21,r25,tr0
- // result in r0
- .global GLOBAL(sdivsi3)
-GLOBAL(sdivsi3):
-#ifdef TEXT_DATA_BUG
- ptb datalabel Local_div_table,tr0
-#else
- ptb GLOBAL(div_table_internal),tr0
-#endif
- nsb r5, r1
- shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
- shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
- /* bubble */
- gettr tr0,r20
- ldx.ub r20, r21, r19 // u0.8
- shari r25, 32, r25   // normalize to s2.30
- shlli r21, 1, r21
- muls.l r25, r19, r19 // s2.38
- ldx.w r20, r21, r21  // s2.14
-  ptabs r18, tr0
- shari r19, 24, r19   // truncate to s2.14
- sub r21, r19, r19    // some 11 bit inverse in s1.14
- muls.l r19, r19, r21 // u0.28
-  sub r63, r1, r1
-  addi r1, 92, r1
- muls.l r25, r21, r18 // s2.58
- shlli r19, 45, r19   // multiply by two and convert to s2.58
-  /* bubble */
- sub r19, r18, r18
- shari r18, 28, r18   // some 22 bit inverse in s1.30
- muls.l r18, r25, r0  // s2.60
-  muls.l r18, r4, r25 // s32.30
-  /* bubble */
- shari r0, 16, r19   // s-16.44
- muls.l r19, r18, r19 // s-16.74
-  shari r25, 63, r0
-  shari r4, 14, r18   // s19.-14
- shari r19, 30, r19   // s-16.44
- muls.l r19, r18, r19 // s15.30
-  xor r21, r0, r21    // You could also use the constant 1 << 27.
-  add r21, r25, r21
- sub r21, r19, r21
- shard r21, r1, r21
- sub r21, r0, r0
- blink tr0, r63
-	ENDFUNC(GLOBAL(sdivsi3))
-/* This table has been generated by divtab.c .
-Defects for bias -330:
-   Max defect: 6.081536e-07 at -1.000000e+00
-   Min defect: 2.849516e-08 at 1.030651e+00
-   Max 2nd step defect: 9.606539e-12 at -1.000000e+00
-   Min 2nd step defect: 0.000000e+00 at 0.000000e+00
-   Defect at 1: 1.238659e-07
-   Defect at -2: 1.061708e-07 */
-#else /* ! __pic__ || ! __SHMEDIA__ */
-	.section	.rodata
-#endif /* __pic__ */
-#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
-	.balign 2
-	.type	Local_div_table,@object
-	.size	Local_div_table,128
-/* negative division constants */
-	.word	-16638
-	.word	-17135
-	.word	-17737
-	.word	-18433
-	.word	-19103
-	.word	-19751
-	.word	-20583
-	.word	-21383
-	.word	-22343
-	.word	-23353
-	.word	-24407
-	.word	-25582
-	.word	-26863
-	.word	-28382
-	.word	-29965
-	.word	-31800
-/* negative division factors */
-	.byte	66
-	.byte	70
-	.byte	75
-	.byte	81
-	.byte	87
-	.byte	93
-	.byte	101
-	.byte	109
-	.byte	119
-	.byte	130
-	.byte	142
-	.byte	156
-	.byte	172
-	.byte	192
-	.byte	214
-	.byte	241
-	.skip 16
-Local_div_table:
-	.skip 16
-/* positive division factors */
-	.byte	241
-	.byte	214
-	.byte	192
-	.byte	172
-	.byte	156
-	.byte	142
-	.byte	130
-	.byte	119
-	.byte	109
-	.byte	101
-	.byte	93
-	.byte	87
-	.byte	81
-	.byte	75
-	.byte	70
-	.byte	66
-/* positive division constants */
-	.word	31801
-	.word	29966
-	.word	28383
-	.word	26864
-	.word	25583
-	.word	24408
-	.word	23354
-	.word	22344
-	.word	21384
-	.word	20584
-	.word	19752
-	.word	19104
-	.word	18434
-	.word	17738
-	.word	17136
-	.word	16639
-	.section	.rodata
-#endif /* TEXT_DATA_BUG */
-	.balign 2
-	.type	GLOBAL(div_table),@object
-	.size	GLOBAL(div_table),128
-/* negative division constants */
-	.word	-16638
-	.word	-17135
-	.word	-17737
-	.word	-18433
-	.word	-19103
-	.word	-19751
-	.word	-20583
-	.word	-21383
-	.word	-22343
-	.word	-23353
-	.word	-24407
-	.word	-25582
-	.word	-26863
-	.word	-28382
-	.word	-29965
-	.word	-31800
-/* negative division factors */
-	.byte	66
-	.byte	70
-	.byte	75
-	.byte	81
-	.byte	87
-	.byte	93
-	.byte	101
-	.byte	109
-	.byte	119
-	.byte	130
-	.byte	142
-	.byte	156
-	.byte	172
-	.byte	192
-	.byte	214
-	.byte	241
-	.skip 16
-	.global	GLOBAL(div_table)
-GLOBAL(div_table):
-	HIDDEN_ALIAS(div_table_internal,div_table)
-	.skip 16
-/* positive division factors */
-	.byte	241
-	.byte	214
-	.byte	192
-	.byte	172
-	.byte	156
-	.byte	142
-	.byte	130
-	.byte	119
-	.byte	109
-	.byte	101
-	.byte	93
-	.byte	87
-	.byte	81
-	.byte	75
-	.byte	70
-	.byte	66
-/* positive division constants */
-	.word	31801
-	.word	29966
-	.word	28383
-	.word	26864
-	.word	25583
-	.word	24408
-	.word	23354
-	.word	22344
-	.word	21384
-	.word	20584
-	.word	19752
-	.word	19104
-	.word	18434
-	.word	17738
-	.word	17136
-	.word	16639
-
-#elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
-/* This code used shld, thus is not suitable for SH1 / SH2.  */
-
-/* Signed / unsigned division without use of FPU, optimized for SH4.
-   Uses a lookup table for divisors in the range -128 .. +128, and
-   div1 with case distinction for larger divisors in three more ranges.
-   The code is lumped together with the table to allow the use of mova.  */
-#ifdef __LITTLE_ENDIAN__
-#define L_LSB 0
-#define L_LSWMSB 1
-#define L_MSWLSB 2
-#else
-#define L_LSB 3
-#define L_LSWMSB 2
-#define L_MSWLSB 1
-#endif
-
-	.balign 4
-	.global	GLOBAL(udivsi3_i4i)
-	FUNC(GLOBAL(udivsi3_i4i))
-GLOBAL(udivsi3_i4i):
-	mov.w LOCAL(c128_w), r1
-	div0u
-	mov r4,r0
-	shlr8 r0
-	cmp/hi r1,r5
-	extu.w r5,r1
-	bf LOCAL(udiv_le128)
-	cmp/eq r5,r1
-	bf LOCAL(udiv_ge64k)
-	shlr r0
-	mov r5,r1
-	shll16 r5
-	mov.l r4,@-r15
-	div1 r5,r0
-	mov.l r1,@-r15
-	div1 r5,r0
-	div1 r5,r0
-	bra LOCAL(udiv_25)
-	div1 r5,r0
-
-LOCAL(div_le128):
-	mova LOCAL(div_table_ix),r0
-	bra LOCAL(div_le128_2)
-	mov.b @(r0,r5),r1
-LOCAL(udiv_le128):
-	mov.l r4,@-r15
-	mova LOCAL(div_table_ix),r0
-	mov.b @(r0,r5),r1
-	mov.l r5,@-r15
-LOCAL(div_le128_2):
-	mova LOCAL(div_table_inv),r0
-	mov.l @(r0,r1),r1
-	mov r5,r0
-	tst #0xfe,r0
-	mova LOCAL(div_table_clz),r0
-	dmulu.l r1,r4
-	mov.b @(r0,r5),r1
-	bt/s LOCAL(div_by_1)
-	mov r4,r0
-	mov.l @r15+,r5
-	sts mach,r0
-	/* clrt */
-	addc r4,r0
-	mov.l @r15+,r4
-	rotcr r0
-	rts
-	shld r1,r0
-
-LOCAL(div_by_1_neg):
-	neg r4,r0
-LOCAL(div_by_1):
-	mov.l @r15+,r5
-	rts
-	mov.l @r15+,r4
-
-LOCAL(div_ge64k):
-	bt/s LOCAL(div_r8)
-	div0u
-	shll8 r5
-	bra LOCAL(div_ge64k_2)
-	div1 r5,r0
-LOCAL(udiv_ge64k):
-	cmp/hi r0,r5
-	mov r5,r1
-	bt LOCAL(udiv_r8)
-	shll8 r5
-	mov.l r4,@-r15
-	div1 r5,r0
-	mov.l r1,@-r15
-LOCAL(div_ge64k_2):
-	div1 r5,r0
-	mov.l LOCAL(zero_l),r1
-	.rept 4
-	div1 r5,r0
-	.endr
-	mov.l r1,@-r15
-	div1 r5,r0
-	mov.w LOCAL(m256_w),r1
-	div1 r5,r0
-	mov.b r0,@(L_LSWMSB,r15)
-	xor r4,r0
-	and r1,r0
-	bra LOCAL(div_ge64k_end)
-	xor r4,r0
-	
-LOCAL(div_r8):
-	shll16 r4
-	bra LOCAL(div_r8_2)
-	shll8 r4
-LOCAL(udiv_r8):
-	mov.l r4,@-r15
-	shll16 r4
-	clrt
-	shll8 r4
-	mov.l r5,@-r15
-LOCAL(div_r8_2):
-	rotcl r4
-	mov r0,r1
-	div1 r5,r1
-	mov r4,r0
-	rotcl r0
-	mov r5,r4
-	div1 r5,r1
-	.rept 5
-	rotcl r0; div1 r5,r1
-	.endr
-	rotcl r0
-	mov.l @r15+,r5
-	div1 r4,r1
-	mov.l @r15+,r4
-	rts
-	rotcl r0
-
-	ENDFUNC(GLOBAL(udivsi3_i4i))
-
-	.global	GLOBAL(sdivsi3_i4i)
-	FUNC(GLOBAL(sdivsi3_i4i))
-	/* This is link-compatible with a GLOBAL(sdivsi3) call,
-	   but we effectively clobber only r1.  */
-GLOBAL(sdivsi3_i4i):
-	mov.l r4,@-r15
-	cmp/pz r5
-	mov.w LOCAL(c128_w), r1
-	bt/s LOCAL(pos_divisor)
-	cmp/pz r4
-	mov.l r5,@-r15
-	neg r5,r5
-	bt/s LOCAL(neg_result)
-	cmp/hi r1,r5
-	neg r4,r4
-LOCAL(pos_result):
-	extu.w r5,r0
-	bf LOCAL(div_le128)
-	cmp/eq r5,r0
-	mov r4,r0
-	shlr8 r0
-	bf/s LOCAL(div_ge64k)
-	cmp/hi r0,r5
-	div0u
-	shll16 r5
-	div1 r5,r0
-	div1 r5,r0
-	div1 r5,r0
-LOCAL(udiv_25):
-	mov.l LOCAL(zero_l),r1
-	div1 r5,r0
-	div1 r5,r0
-	mov.l r1,@-r15
-	.rept 3
-	div1 r5,r0
-	.endr
-	mov.b r0,@(L_MSWLSB,r15)
-	xtrct r4,r0
-	swap.w r0,r0
-	.rept 8
-	div1 r5,r0
-	.endr
-	mov.b r0,@(L_LSWMSB,r15)
-LOCAL(div_ge64k_end):
-	.rept 8
-	div1 r5,r0
-	.endr
-	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
-	extu.b r0,r0
-	mov.l @r15+,r5
-	or r4,r0
-	mov.l @r15+,r4
-	rts
-	rotcl r0
-
-LOCAL(div_le128_neg):
-	tst #0xfe,r0
-	mova LOCAL(div_table_ix),r0
-	mov.b @(r0,r5),r1
-	mova LOCAL(div_table_inv),r0
-	bt/s LOCAL(div_by_1_neg)
-	mov.l @(r0,r1),r1
-	mova LOCAL(div_table_clz),r0
-	dmulu.l r1,r4
-	mov.b @(r0,r5),r1
-	mov.l @r15+,r5
-	sts mach,r0
-	/* clrt */
-	addc r4,r0
-	mov.l @r15+,r4
-	rotcr r0
-	shld r1,r0
-	rts
-	neg r0,r0
-
-LOCAL(pos_divisor):
-	mov.l r5,@-r15
-	bt/s LOCAL(pos_result)
-	cmp/hi r1,r5
-	neg r4,r4
-LOCAL(neg_result):
-	extu.w r5,r0
-	bf LOCAL(div_le128_neg)
-	cmp/eq r5,r0
-	mov r4,r0
-	shlr8 r0
-	bf/s LOCAL(div_ge64k_neg)
-	cmp/hi r0,r5
-	div0u
-	mov.l LOCAL(zero_l),r1
-	shll16 r5
-	div1 r5,r0
-	mov.l r1,@-r15
-	.rept 7
-	div1 r5,r0
-	.endr
-	mov.b r0,@(L_MSWLSB,r15)
-	xtrct r4,r0
-	swap.w r0,r0
-	.rept 8
-	div1 r5,r0
-	.endr
-	mov.b r0,@(L_LSWMSB,r15)
-LOCAL(div_ge64k_neg_end):
-	.rept 8
-	div1 r5,r0
-	.endr
-	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
-	extu.b r0,r1
-	mov.l @r15+,r5
-	or r4,r1
-LOCAL(div_r8_neg_end):
-	mov.l @r15+,r4
-	rotcl r1
-	rts
-	neg r1,r0
-
-LOCAL(div_ge64k_neg):
-	bt/s LOCAL(div_r8_neg)
-	div0u
-	shll8 r5
-	mov.l LOCAL(zero_l),r1
-	.rept 6
-	div1 r5,r0
-	.endr
-	mov.l r1,@-r15
-	div1 r5,r0
-	mov.w LOCAL(m256_w),r1
-	div1 r5,r0
-	mov.b r0,@(L_LSWMSB,r15)
-	xor r4,r0
-	and r1,r0
-	bra LOCAL(div_ge64k_neg_end)
-	xor r4,r0
-
-LOCAL(c128_w):
-	.word 128
-
-LOCAL(div_r8_neg):
-	clrt
-	shll16 r4
-	mov r4,r1
-	shll8 r1
-	mov r5,r4
-	.rept 7
-	rotcl r1; div1 r5,r0
-	.endr
-	mov.l @r15+,r5
-	rotcl r1
-	bra LOCAL(div_r8_neg_end)
-	div1 r4,r0
-
-LOCAL(m256_w):
-	.word 0xff00
-/* This table has been generated by divtab-sh4.c.  */
-	.balign 4
-LOCAL(div_table_clz):
-	.byte	0
-	.byte	1
-	.byte	0
-	.byte	-1
-	.byte	-1
-	.byte	-2
-	.byte	-2
-	.byte	-2
-	.byte	-2
-	.byte	-3
-	.byte	-3
-	.byte	-3
-	.byte	-3
-	.byte	-3
-	.byte	-3
-	.byte	-3
-	.byte	-3
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-/* Lookup table translating positive divisor to index into table of
-   normalized inverse.  N.B. the '0' entry is also the last entry of the
- previous table, and causes an unaligned access for division by zero.  */
-LOCAL(div_table_ix):
-	.byte	-6
-	.byte	-128
-	.byte	-128
-	.byte	0
-	.byte	-128
-	.byte	-64
-	.byte	0
-	.byte	64
-	.byte	-128
-	.byte	-96
-	.byte	-64
-	.byte	-32
-	.byte	0
-	.byte	32
-	.byte	64
-	.byte	96
-	.byte	-128
-	.byte	-112
-	.byte	-96
-	.byte	-80
-	.byte	-64
-	.byte	-48
-	.byte	-32
-	.byte	-16
-	.byte	0
-	.byte	16
-	.byte	32
-	.byte	48
-	.byte	64
-	.byte	80
-	.byte	96
-	.byte	112
-	.byte	-128
-	.byte	-120
-	.byte	-112
-	.byte	-104
-	.byte	-96
-	.byte	-88
-	.byte	-80
-	.byte	-72
-	.byte	-64
-	.byte	-56
-	.byte	-48
-	.byte	-40
-	.byte	-32
-	.byte	-24
-	.byte	-16
-	.byte	-8
-	.byte	0
-	.byte	8
-	.byte	16
-	.byte	24
-	.byte	32
-	.byte	40
-	.byte	48
-	.byte	56
-	.byte	64
-	.byte	72
-	.byte	80
-	.byte	88
-	.byte	96
-	.byte	104
-	.byte	112
-	.byte	120
-	.byte	-128
-	.byte	-124
-	.byte	-120
-	.byte	-116
-	.byte	-112
-	.byte	-108
-	.byte	-104
-	.byte	-100
-	.byte	-96
-	.byte	-92
-	.byte	-88
-	.byte	-84
-	.byte	-80
-	.byte	-76
-	.byte	-72
-	.byte	-68
-	.byte	-64
-	.byte	-60
-	.byte	-56
-	.byte	-52
-	.byte	-48
-	.byte	-44
-	.byte	-40
-	.byte	-36
-	.byte	-32
-	.byte	-28
-	.byte	-24
-	.byte	-20
-	.byte	-16
-	.byte	-12
-	.byte	-8
-	.byte	-4
-	.byte	0
-	.byte	4
-	.byte	8
-	.byte	12
-	.byte	16
-	.byte	20
-	.byte	24
-	.byte	28
-	.byte	32
-	.byte	36
-	.byte	40
-	.byte	44
-	.byte	48
-	.byte	52
-	.byte	56
-	.byte	60
-	.byte	64
-	.byte	68
-	.byte	72
-	.byte	76
-	.byte	80
-	.byte	84
-	.byte	88
-	.byte	92
-	.byte	96
-	.byte	100
-	.byte	104
-	.byte	108
-	.byte	112
-	.byte	116
-	.byte	120
-	.byte	124
-	.byte	-128
-/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */
-	.balign 4
-LOCAL(zero_l):
-	.long	0x0
-	.long	0xF81F81F9
-	.long	0xF07C1F08
-	.long	0xE9131AC0
-	.long	0xE1E1E1E2
-	.long	0xDAE6076C
-	.long	0xD41D41D5
-	.long	0xCD856891
-	.long	0xC71C71C8
-	.long	0xC0E07039
-	.long	0xBACF914D
-	.long	0xB4E81B4F
-	.long	0xAF286BCB
-	.long	0xA98EF607
-	.long	0xA41A41A5
-	.long	0x9EC8E952
-	.long	0x9999999A
-	.long	0x948B0FCE
-	.long	0x8F9C18FA
-	.long	0x8ACB90F7
-	.long	0x86186187
-	.long	0x81818182
-	.long	0x7D05F418
-	.long	0x78A4C818
-	.long	0x745D1746
-	.long	0x702E05C1
-	.long	0x6C16C16D
-	.long	0x68168169
-	.long	0x642C8591
-	.long	0x60581606
-	.long	0x5C9882BA
-	.long	0x58ED2309
-LOCAL(div_table_inv):
-	.long	0x55555556
-	.long	0x51D07EAF
-	.long	0x4E5E0A73
-	.long	0x4AFD6A06
-	.long	0x47AE147B
-	.long	0x446F8657
-	.long	0x41414142
-	.long	0x3E22CBCF
-	.long	0x3B13B13C
-	.long	0x38138139
-	.long	0x3521CFB3
-	.long	0x323E34A3
-	.long	0x2F684BDB
-	.long	0x2C9FB4D9
-	.long	0x29E4129F
-	.long	0x27350B89
-	.long	0x24924925
-	.long	0x21FB7813
-	.long	0x1F7047DD
-	.long	0x1CF06ADB
-	.long	0x1A7B9612
-	.long	0x18118119
-	.long	0x15B1E5F8
-	.long	0x135C8114
-	.long	0x11111112
-	.long	0xECF56BF
-	.long	0xC9714FC
-	.long	0xA6810A7
-	.long	0x8421085
-	.long	0x624DD30
-	.long	0x4104105
-	.long	0x2040811
-	/* maximum error: 0.987342 scaled: 0.921875*/
-
-	ENDFUNC(GLOBAL(sdivsi3_i4i))
-#endif /* SH3 / SH4 */
-
-#endif /* L_div_table */
-
-#ifdef L_udiv_qrnnd_16
-#if !__SHMEDIA__
-	HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
-	/* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
-	/* n1 < d, but n1 might be larger than d1.  */
-	.global GLOBAL(udiv_qrnnd_16)
-	.balign 8
-GLOBAL(udiv_qrnnd_16):
-	div0u
-	cmp/hi r6,r0
-	bt .Lots
-	.rept 16
-	div1 r6,r0 
-	.endr
-	extu.w r0,r1
-	bt 0f
-	add r6,r0
-0:	rotcl r1
-	mulu.w r1,r5
-	xtrct r4,r0
-	swap.w r0,r0
-	sts macl,r2
-	cmp/hs r2,r0
-	sub r2,r0
-	bt 0f
-	addc r5,r0
-	add #-1,r1
-	bt 0f
-1:	add #-1,r1
-	rts
-	add r5,r0
-	.balign 8
-.Lots:
-	sub r5,r0
-	swap.w r4,r1
-	xtrct r0,r1
-	clrt
-	mov r1,r0
-	addc r5,r0
-	mov #-1,r1
-	SL1(bf, 1b,
-	shlr16 r1)
-0:	rts
-	nop
-	ENDFUNC(GLOBAL(udiv_qrnnd_16))
-#endif /* !__SHMEDIA__ */
-#endif /* L_udiv_qrnnd_16 */
diff --git a/gcc/config/sh/lib1funcs.h b/gcc/config/sh/lib1funcs.h
deleted file mode 100644
index af4b41c..0000000
--- a/gcc/config/sh/lib1funcs.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005, 2006, 2009
-   Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-#ifdef __ELF__
-#define LOCAL(X)	.L_##X
-#define FUNC(X)		.type X,@function
-#define HIDDEN_FUNC(X)	FUNC(X); .hidden X
-#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X)
-#define ENDFUNC0(X)	.Lfe_##X: .size X,.Lfe_##X-X
-#define ENDFUNC(X)	ENDFUNC0(X)
-#else
-#define LOCAL(X)	L_##X
-#define FUNC(X)
-#define HIDDEN_FUNC(X)
-#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y)
-#define ENDFUNC(X)
-#endif
-
-#define	CONCAT(A,B)	A##B
-#define	GLOBAL0(U,X)	CONCAT(U,__##X)
-#define	GLOBAL(X)	GLOBAL0(__USER_LABEL_PREFIX__,X)
-
-#define ALIAS(X,Y)	.global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y)
-
-#if defined __SH2A__ && defined __FMOVD_ENABLED__
-#undef  FMOVD_WORKS
-#define FMOVD_WORKS
-#endif
-
-#ifdef __LITTLE_ENDIAN__
-#define DR00 fr1
-#define DR01 fr0
-#define DR20 fr3
-#define DR21 fr2
-#define DR40 fr5
-#define DR41 fr4
-#else /* !__LITTLE_ENDIAN__ */
-#define DR00 fr0
-#define DR01 fr1
-#define DR20 fr2
-#define DR21 fr3
-#define DR40 fr4
-#define DR41 fr5
-#endif /* !__LITTLE_ENDIAN__ */
-
-#ifdef __sh1__
-#define SL(branch, dest, in_slot, in_slot_arg2) \
-	in_slot, in_slot_arg2; branch dest
-#define SL1(branch, dest, in_slot) \
-	in_slot; branch dest
-#else /* ! __sh1__ */
-#define SL(branch, dest, in_slot, in_slot_arg2) \
-	branch##.s dest; in_slot, in_slot_arg2
-#define SL1(branch, dest, in_slot) \
-	branch##/s dest; in_slot
-#endif /* !__sh1__ */
diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h
index 1e65480..cc26e05 100644
--- a/gcc/config/sh/sh.h
+++ b/gcc/config/sh/sh.h
@@ -1983,7 +1983,7 @@ struct sh_args {
    that the native compiler puts too large (> 32) immediate shift counts
    into a register and shifts by the register, letting the SH decide what
    to do instead of doing that itself.  */
-/* ??? The library routines in lib1funcs.asm truncate the shift count.
+/* ??? The library routines in lib1funcs.S truncate the shift count.
    However, the SH3 has hardware shifts that do not truncate exactly as gcc
    expects - the sign bit is significant - so it appears that we need to
    leave this zero for correct SH3 code.  */
diff --git a/gcc/config/sh/t-linux b/gcc/config/sh/t-linux
index a5c7116..2304fb1 100644
--- a/gcc/config/sh/t-linux
+++ b/gcc/config/sh/t-linux
@@ -1,5 +1,3 @@
-LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array
-
 LIB2FUNCS_EXTRA= $(srcdir)/config/sh/linux-atomic.asm
 
 MULTILIB_DIRNAMES= 
diff --git a/gcc/config/sh/t-netbsd b/gcc/config/sh/t-netbsd
index de172d3..dea1c47 100644
--- a/gcc/config/sh/t-netbsd
+++ b/gcc/config/sh/t-netbsd
@@ -17,6 +17,5 @@
 # <http://www.gnu.org/licenses/>.
 
 TARGET_LIBGCC2_CFLAGS = -fpic -mieee
-LIB1ASMFUNCS_CACHE = _ic_invalidate
 
 LIB2FUNCS_EXTRA=
diff --git a/gcc/config/sh/t-sh b/gcc/config/sh/t-sh
index 6eaf784..56ea83e 100644
--- a/gcc/config/sh/t-sh
+++ b/gcc/config/sh/t-sh
@@ -22,13 +22,6 @@ sh-c.o: $(srcdir)/config/sh/sh-c.c \
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 		$(srcdir)/config/sh/sh-c.c
 
-LIB1ASMSRC = sh/lib1funcs.asm
-LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movmem \
-  _movmem_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
-  _div_table _udiv_qrnnd_16 \
-  $(LIB1ASMFUNCS_CACHE)
-LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array
-
 TARGET_LIBGCC2_CFLAGS = -mieee
 
 DEFAULT_ENDIAN = $(word 1,$(TM_ENDIAN_CONFIG))
diff --git a/gcc/config/sh/t-sh64 b/gcc/config/sh/t-sh64
index d88f929..3bd9205 100644
--- a/gcc/config/sh/t-sh64
+++ b/gcc/config/sh/t-sh64
@@ -1,4 +1,4 @@
-# Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
+# Copyright (C) 2002, 2004, 2005, 2011 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -16,13 +16,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMFUNCS = \
-  _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
-  _shcompact_call_trampoline _shcompact_return_trampoline \
-  _shcompact_incoming_args _ic_invalidate _nested_trampoline \
-  _push_pop_shmedia_regs \
-  _udivdi3 _divdi3 _umoddi3 _moddi3 _div_table
-
 MULTILIB_CPU_DIRS= $(ML_sh1) $(ML_sh2e) $(ML_sh2) $(ML_sh3e) $(ML_sh3) $(ML_sh4_nofpu) $(ML_sh4_single_only) $(ML_sh4_single) $(ML_sh4) $(ML_sh5_32media:m5-32media/=media32) $(ML_sh5_32media_nofpu:m5-32media-nofpu/=nofpu/media32) $(ML_sh5_compact:m5-compact/=compact) $(ML_sh5_compact_nofpu:m5-compact-nofpu/=nofpu/compact) $(ML_sh5_64media:m5-64media/=media64) $(ML_sh5_64media_nofpu:m5-64media-nofpu/=nofpu/media64)
 
 MULTILIB_RAW_DIRNAMES= $(MULTILIB_ENDIAN:/mb= mb) $(MULTILIB_CPU_DIRS:/=)