From a31df303476b91404062ec3bac44e484efec7fee Mon Sep 17 00:00:00 2001 From: Joern Rennecke Date: Sun, 7 Jul 2002 00:27:20 +0000 Subject: * libc/machine/sh/Makefile.am (lib_a_SOURCES): Make strcmp.S unconditional. * libc/machine/sh/Makefile.in: Regenerate. * libc/machine/sh/asm.h (DELAYED_BRANCHES, SL): Also for __SH5__ . * strcmp.S (strcmp): Add SHmedia variant. Use different registers for SHcompact. --- newlib/libc/machine/sh/Makefile.am | 4 - newlib/libc/machine/sh/Makefile.in | 12 +-- newlib/libc/machine/sh/asm.h | 3 +- newlib/libc/machine/sh/strcmp.S | 197 +++++++++++++++++++++++++++++++++---- 4 files changed, 184 insertions(+), 32 deletions(-) (limited to 'newlib/libc') diff --git a/newlib/libc/machine/sh/Makefile.am b/newlib/libc/machine/sh/Makefile.am index d2d4966..dd4048b 100644 --- a/newlib/libc/machine/sh/Makefile.am +++ b/newlib/libc/machine/sh/Makefile.am @@ -6,11 +6,7 @@ INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS) noinst_LIBRARIES = lib.a -if SH64 -lib_a_SOURCES = memcpy.S memset.S setjmp.S strcpy.S strlen.S -else lib_a_SOURCES = memcpy.S memset.S setjmp.S strcpy.S strlen.S strcmp.S -endif memcpy.o: asm.h memset.o: asm.h diff --git a/newlib/libc/machine/sh/Makefile.in b/newlib/libc/machine/sh/Makefile.in index 93caa07..d6dc919 100644 --- a/newlib/libc/machine/sh/Makefile.in +++ b/newlib/libc/machine/sh/Makefile.in @@ -1,6 +1,6 @@ -# Makefile.in generated automatically by automake 1.4 from Makefile.am +# Makefile.in generated automatically by automake 1.4-p5 from Makefile.am -# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. +# Copyright (C) 1994, 1995-8, 1999, 2001 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. @@ -88,8 +88,8 @@ AUTOMAKE_OPTIONS = cygnus INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS) noinst_LIBRARIES = lib.a -@SH64_TRUE@lib_a_SOURCES = @SH64_TRUE@memcpy.S memset.S setjmp.S strcpy.S strlen.S -@SH64_FALSE@lib_a_SOURCES = @SH64_FALSE@memcpy.S memset.S setjmp.S strcpy.S strlen.S strcmp.S + +lib_a_SOURCES = memcpy.S memset.S setjmp.S strcpy.S strlen.S strcmp.S ACLOCAL_AMFLAGS = -I ../../.. CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host @@ -103,9 +103,7 @@ DEFS = @DEFS@ -I. -I$(srcdir) CPPFLAGS = @CPPFLAGS@ LIBS = @LIBS@ lib_a_LIBADD = -@SH64_TRUE@lib_a_OBJECTS = memcpy.o memset.o setjmp.o strcpy.o strlen.o -@SH64_FALSE@lib_a_OBJECTS = memcpy.o memset.o setjmp.o strcpy.o \ -@SH64_FALSE@strlen.o strcmp.o +lib_a_OBJECTS = memcpy.o memset.o setjmp.o strcpy.o strlen.o strcmp.o CFLAGS = @CFLAGS@ COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) CCLD = $(CC) diff --git a/newlib/libc/machine/sh/asm.h b/newlib/libc/machine/sh/asm.h index 2f809af..5b85c50 100644 --- a/newlib/libc/machine/sh/asm.h +++ b/newlib/libc/machine/sh/asm.h @@ -23,7 +23,8 @@ _ENTRY(_C_LABEL(name)) #if (defined (__sh2__) || defined (__sh3__) || defined (__SH3E__) \ - || defined (__SH4_SINGLE__) || defined (__SH4__)) || defined(__SH4_SINGLE_ONLY__) + || defined (__SH4_SINGLE__) || defined (__SH4__)) \ + || defined (__SH4_SINGLE_ONLY__) || defined (__SH5__) #define DELAYED_BRANCHES #define SL(branch, dest, in_slot, in_slot_arg2) \ branch##.s dest; in_slot, in_slot_arg2 diff --git a/newlib/libc/machine/sh/strcmp.S b/newlib/libc/machine/sh/strcmp.S index a112b13..850c82b 100644 --- a/newlib/libc/machine/sh/strcmp.S +++ b/newlib/libc/machine/sh/strcmp.S @@ -1,42 +1,198 @@ -! Entry: r4: destination -! r5: source -! Exit: r0: result -! r1-r2,r4-r5: clobbered +! SH5 code Copyright 2002 SuperH Ltd. #include "asm.h" ENTRY(strcmp) - mov r4,r0 - or r5,r0 + +#if __SHMEDIA__ + ld.ub r2,0,r4 + pt/l quickret0,tr0 + ld.ub r3,0,r5 + ptabs r18,tr2 + beqi/u r4,0,tr0 + ld.ub r2,1,r6 + bne/u r4,r5,tr0 + pt/l quickret1,tr1 + ld.ub r3,1,r7 + beqi/u r6,0,tr1 + ld.ub r2,2,r4 + bne/u r6,r7,tr1 + ld.ub r3,2,r5 + beqi/u r4,0,tr0 + ld.ub r2,3,r6 + bne/u r4,r5,tr0 + ld.ub r3,3,r7 + beqi/u r6,0,tr1 + ld.ub r2,4,r4 + bne/u r6,r7,tr1 + ld.ub r3,4,r5 + beqi/u r4,0,tr0 + ld.ub r2,5,r6 + bne/u r4,r5,tr0 + ld.ub r3,5,r7 + beqi/u r6,0,tr1 + ld.ub r2,6,r4 + bne/u r6,r7,tr1 + ld.ub r3,6,r5 + beqi/u r4,0,tr0 + ld.ub r2,7,r6 + bne/u r4,r5,tr0 + ld.ub r3,7,r7 + beqi/u r6,0,tr1 + sub r3,r2,r3 + bne/u r6,r7,tr1 + + andi r2,-8,r2 + add r3,r2,r3 + ldlo.q r3,8,r23 + pt r23_zero,tr0 + shlli r3,3,r22 + sub r63,r22,r20 + movi 0x101,r6 + mperm.w r6,r63,r6 + SHLO r6,r22,r7 + msubs.ub r7,r23,r8 + pt loop,tr1 + bnei/u r8,0,tr0 // r23_zero + pt found_zero,tr0 + ori r3,7,r3 + addi r3,9,r3 + sub r3,r2,r3 + bne/l r7,r6,tr1 // loop + /* The strings are aligned to each other. */ + pt al_loop,tr1 + pt al_found_zero,tr0 + addi r3,-8,r3 +al_loop: + ld.q r2,8,r4 + ldx.q r2,r3,r5 + addi r2,8,r2 + mcmpeq.b r63,r4,r8 + pt cmp_quad,tr3 + bnei/u r8,0,tr0 // al_found_zero + beq/l r4,r5,tr1 // al_loop + blink tr3,r63 // cmp_quad + + .balign 8 +quickret0: + sub r4,r5,r2 + blink tr2,r63 +quickret1: + sub r6,r7,r2 + blink tr2,r63 + +loop: + ld.q r2,8,r4 + ldx.q r2,r3,r19 + addi r2,8,r2 + msubs.ub r6,r4,r8 + mcmpeq.b r63,r19,r9 + SHHI r19,r20,r21 + or r21,r23,r5 + SHLO r19,r22,r23 + bne/u r8,r9,tr0 // found_zero + beq/l r4,r5,tr1 // loop +cmp_quad: +#ifdef __LITTLE_ENDIAN__ + byterev r4,r4 + byterev r5,r5 +#endif + cmpgtu r4,r5,r6 + cmpgtu r5,r4,r7 + sub r6,r7,r2 + blink tr2,r63 +found_zero: + pt zero_now,tr0 + mcmpeq.b r63,r5,r7 + pt cmp_quad,tr1 + bne/u r8,r7,tr0 // zero_now + bne/u r4,r5,tr1 // cmp_quad + SHLO r9,r22,r8 +r23_zero: + ld.q r2,8,r4 + add r23,r63,r5 +zero_now: +al_found_zero: +/* We konw that one of the values has at lest one zero, and r8 holds + an 0x01 or 0xff mask for every zero found in one of the operands. + If both operands have the first zero in the same place, this mask + allows us to truncate the comparison to the valid bytes in the + strings. If the first zero is in different places, it doesn't + matter if some invalid bytes are included, since the comparison + of the zero with the non-zero will determine the outcome. */ +#ifdef __LITTLE_ENDIAN__ + shlli r8,8,r8 + addi r8,-1,r9 + andc r9,r8,r8 + and r8,r4,r4 + and r8,r5,r5 +#else + shlri r8,1,r8 + nsb r8,r8 + addi r8,8,r8 + andi r8,56,r8 + sub r63,r8,r8 + shlrd r4,r8,r4 + shlrd r5,r8,r5 +#endif +#ifdef __LITTLE_ENDIAN__ + byterev r4,r4 + byterev r5,r5 +#endif + cmpgtu r4,r5,r6 + cmpgtu r5,r4,r7 + sub r6,r7,r2 + blink tr2,r63 + +#else /* ! __SHMEDIA__, i.e. SH 1..4 / SHcompact */ + +#ifdef __SH5__ +#define STR1 r2 +#define STR2 r3 +#define RESULT r2 +#define TMP r4 +#else +! Entry: r4: string1 +! r5: string2 +! Exit: r0: result +! r1-r2,r4-r5: clobbered +#define STR1 r4 +#define STR2 r5 +#define RESULT r0 +#define TMP r2 +#endif /* __SH5__ */ + + mov STR1,r0 + or STR2,r0 tst #3,r0 bf L_setup_char_loop mov #0,r0 #ifdef DELAYED_BRANCHES - mov.l @r4+,r1 + mov.l @STR1+,r1 .align 2 Longword_loop: - mov.l @r5+,r2 + mov.l @STR2+,TMP cmp/str r0,r1 bt Longword_loop_end - cmp/eq r1,r2 + cmp/eq r1,TMP bt.s Longword_loop - mov.l @r4+,r1 - add #-4, r4 + mov.l @STR1+,r1 + add #-4, STR1 Longword_loop_end: - add #-4, r4 - add #-4, r5 + add #-4, STR1 + add #-4, STR2 L_setup_char_loop: - mov.b @r4+,r0 + mov.b @STR1+,r0 .align 2 L_char_loop: - mov.b @r5+,r1 + mov.b @STR2+,r1 tst r0,r0 bt L_return cmp/eq r0,r1 bt.s L_char_loop - mov.b @r4+,r0 - add #-2,r4 - mov.b @r4,r0 + mov.b @STR1+,r0 + add #-2,STR1 + mov.b @STR1,r0 #else /* ! DELAYED_BRANCHES */ .align 2 Longword_loop: @@ -60,7 +216,8 @@ L_char_loop: bt L_char_loop #endif L_return: - extu.b r0,r0 + extu.b r0,RESULT extu.b r1,r1 rts - sub r1,r0 + sub r1,RESULT +#endif /* ! __SHMEDIA__ */ -- cgit v1.1