aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog19
-rw-r--r--locale/programs/stringtrans.c2
-rw-r--r--posix/glob.h7
-rw-r--r--socket/sys/socket.h (renamed from sysdeps/unix/sysv/linux/sys/socket.h)18
-rw-r--r--stdio-common/printf-parse.h2
-rw-r--r--stdio-common/printf-prs.c7
-rw-r--r--stdio-common/vfprintf.c7
-rw-r--r--sysdeps/generic/machine-gmon.h2
-rw-r--r--sysdeps/generic/socketbits.h (renamed from sysdeps/generic/sys/socket.h)147
-rw-r--r--sysdeps/mach/hurd/connect.c5
-rw-r--r--sysdeps/mach/hurd/send.c5
-rw-r--r--sysdeps/unix/inet/syscalls.list4
-rw-r--r--sysdeps/unix/sysv/linux/configure10
-rw-r--r--sysdeps/unix/sysv/linux/socketbits.h2
-rw-r--r--wcsmbs/btowc.c10
-rw-r--r--wcsmbs/mbrlen.c3
-rw-r--r--wcsmbs/mbrtowc.c111
-rw-r--r--wcsmbs/mbsinit.c18
-rw-r--r--wcsmbs/mbsrtowcs.c114
-rw-r--r--wcsmbs/wchar.h20
-rw-r--r--wcsmbs/wcrtomb.c62
-rw-r--r--wcsmbs/wcsrtombs.c95
-rw-r--r--wcsmbs/wctob.c9
23 files changed, 396 insertions, 283 deletions
diff --git a/ChangeLog b/ChangeLog
index a619747..bd2e083 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,24 @@
Tue Jul 9 09:37:55 1996 Roland McGrath <roland@delasyd.gnu.ai.mit.edu>
+ * posix/glob.h (__glob_opendir_hook, __glob_readdir_hook,
+ __glob_closedir_hook): Remove decls.
+
+ * sysdeps/generic/machine-gmon.h: Declare mcount_internal.
+
+ * sysdeps/unix/inet/syscalls.list: Define __ names with weak aliases
+ for send and connect syscalls.
+
+ * socket/sys/socket.h: New file, taken from non-sysdep parts of
+ linux/sys/socket.h; break sysdeps parts out into socketbits.h.
+ Declare __ names for send and connect.
+ * sysdeps/generic/socketbits.h: New file.
+ * sysdeps/unix/sysv/linux/socketbits.h: New file.
+ * sysdeps/unix/sysv/linux/sys/socket.h: File removed.
+ * sysdeps/generic/sys/socket.h: File removed.
+
+ * sysdeps/mach/hurd/connect.c: Define __ name and weak alias.
+ * sysdeps/mach/hurd/send.c: Likewise.
+
* sysdeps/mach/libc-lock.h: New file.
* sysdeps/unix/readdir.c: Do locking.
* sysdeps/unix/seekdir.c: Likewise.
diff --git a/locale/programs/stringtrans.c b/locale/programs/stringtrans.c
index bff5aa4..10b04fa 100644
--- a/locale/programs/stringtrans.c
+++ b/locale/programs/stringtrans.c
@@ -1,6 +1,6 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
This file is part of the GNU C Library.
-COntributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
diff --git a/posix/glob.h b/posix/glob.h
index 73fd018..8607e9f 100644
--- a/posix/glob.h
+++ b/posix/glob.h
@@ -100,13 +100,6 @@ extern int glob __P ((const char *__pattern, int __flags,
extern void globfree __P ((glob_t *__pglob));
-#if !defined (_POSIX_C_SOURCE) || _POSIX_C_SOURCE < 2 || defined (_GNU_SOURCE)
-/* If they are not NULL, `glob' uses these functions to read directories. */
-extern __ptr_t (*__glob_opendir_hook) __P ((const char *__directory));
-extern const char *(*__glob_readdir_hook) __P ((__ptr_t __stream));
-extern void (*__glob_closedir_hook) __P ((__ptr_t __stream));
-#endif
-
#ifdef __cplusplus
}
#endif
diff --git a/sysdeps/unix/sysv/linux/sys/socket.h b/socket/sys/socket.h
index a680d5f..172c897 100644
--- a/sysdeps/unix/sysv/linux/sys/socket.h
+++ b/socket/sys/socket.h
@@ -1,4 +1,5 @@
-/* Copyright (C) 1991, 92, 94, 95, 96 Free Software Foundation, Inc.
+/* Declarations of socket constants, types, and functions.
+Copyright (C) 1991, 92, 94, 95, 96 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -27,8 +28,11 @@ __BEGIN_DECLS
#include <stddef.h>
-/* Get Linux specific constants and data types. */
-#include <linux/socket.h>
+/* This operating system-specific header file defines the SOCK_*, PF_*,
+ AF_*, MSG_*, SOL_*, and SO_* constants, and the `struct sockaddr',
+ `struct msghdr', and `struct linger' types. */
+#include <socketbits.h>
+
/* This is the type we use for generic socket address arguments.
@@ -90,6 +94,8 @@ extern int getsockname __P ((int __fd, __SOCKADDR_ARG __addr,
For connectionless socket types, just set the default address to send to
and the only address from which to accept transmissions.
Return 0 on success, -1 for errors. */
+extern int __connect __P ((int __fd,
+ __CONST_SOCKADDR_ARG __addr, size_t __len));
extern int connect __P ((int __fd,
__CONST_SOCKADDR_ARG __addr, size_t __len));
@@ -100,6 +106,7 @@ extern int getpeername __P ((int __fd, __SOCKADDR_ARG __addr,
/* Send N bytes of BUF to socket FD. Returns the number sent or -1. */
+extern int __send __P ((int __fd, __ptr_t __buf, size_t __n, int __flags));
extern int send __P ((int __fd, __ptr_t __buf, size_t __n, int __flags));
/* Read N bytes into BUF from socket FD.
@@ -164,6 +171,11 @@ extern int accept __P ((int __fd, __SOCKADDR_ARG __addr,
extern int shutdown __P ((int __fd, int __how));
+/* FDTYPE is S_IFSOCK or another S_IF* macro defined in <sys/stat.h>;
+ returns 1 if FD is open on an object of the indicated type, 0 if not,
+ or -1 for errors (setting errno). */
+extern int isfdtype __P ((int __fd, int __fdtype));
+
__END_DECLS
#endif /* sys/socket.h */
diff --git a/stdio-common/printf-parse.h b/stdio-common/printf-parse.h
index a7960e6..9a5cfba 100644
--- a/stdio-common/printf-parse.h
+++ b/stdio-common/printf-parse.h
@@ -97,6 +97,8 @@ find_spec (const char *format, mbstate_t *ps)
{
int len;
+ /* Remove any hints of a wrong encoding. */
+ ps->count = 0;
if (isascii (*format) || (len = mbrlen (format, MB_CUR_MAX, ps)) <= 0)
++format;
else
diff --git a/stdio-common/printf-prs.c b/stdio-common/printf-prs.c
index d0756de..a15be55 100644
--- a/stdio-common/printf-prs.c
+++ b/stdio-common/printf-prs.c
@@ -81,7 +81,6 @@ parse_printf_format (fmt, n, argtypes)
nargs = 0;
max_ref_arg = 0;
- mbstate = 0;
/* Search for format specifications. */
for (fmt = find_spec (fmt, &mbstate); *fmt != '\0'; fmt = spec.next_fmt)
@@ -90,14 +89,14 @@ parse_printf_format (fmt, n, argtypes)
nargs += parse_one_spec (fmt, nargs, &spec, &max_ref_arg, &mbstate);
/* If the width is determined by an argument this is an int. */
- if (spec.width_arg != -1 && spec.width_arg < n)
+ if (spec.width_arg != -1 && (size_t) spec.width_arg < n)
argtypes[spec.width_arg] = PA_INT;
/* If the precision is determined by an argument this is an int. */
- if (spec.prec_arg != -1 && spec.prec_arg < n)
+ if (spec.prec_arg != -1 && (size_t) spec.prec_arg < n)
argtypes[spec.prec_arg] = PA_INT;
- if (spec.data_arg < n)
+ if ((size_t) spec.data_arg < n)
switch (spec.ndata_args)
{
case 0: /* No arguments. */
diff --git a/stdio-common/vfprintf.c b/stdio-common/vfprintf.c
index 8031b99..3fa53a6 100644
--- a/stdio-common/vfprintf.c
+++ b/stdio-common/vfprintf.c
@@ -735,16 +735,14 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap)
else \
{ \
const wchar_t *s2 = (const wchar_t *) string; \
- mbstate_t mbstate = 0; \
+ mbstate_t mbstate; \
\
- len = wcsrtombs (NULL, &s2, prec != -1 ? prec : UINT_MAX, \
- &mbstate); \
+ len = wcsrtombs (NULL, &s2, 0, &mbstate); \
if (len == (size_t) -1) \
/* Illegal wide-character string. */ \
return -1; \
\
s2 = (const wchar_t *) string; \
- mbstate = 0; \
string = alloca (len + 1); \
(void) wcsrtombs (string, &s2, prec != -1 ? prec : UINT_MAX, \
&mbstate); \
@@ -841,7 +839,6 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap)
/* Initialize local variables. */
done = 0;
grouping = (const char *) -1;
- mbstate = 0;
ap_save = ap;
nspecs_done = 0;
diff --git a/sysdeps/generic/machine-gmon.h b/sysdeps/generic/machine-gmon.h
index 80ee97f..115962a 100644
--- a/sysdeps/generic/machine-gmon.h
+++ b/sysdeps/generic/machine-gmon.h
@@ -38,6 +38,8 @@ void _mcount (void);
weak_alias (_mcount, mcount)
#endif
+static void mcount_internal (u_long frompc, u_long selfpc);
+
#define _MCOUNT_DECL(frompc, selfpc) \
static inline void mcount_internal (frompc, selfpc)
diff --git a/sysdeps/generic/sys/socket.h b/sysdeps/generic/socketbits.h
index 6ee3ebe..770f011 100644
--- a/sysdeps/generic/sys/socket.h
+++ b/sysdeps/generic/socketbits.h
@@ -1,4 +1,5 @@
-/* Copyright (C) 1991, 92, 94, 95, 96 Free Software Foundation, Inc.
+/* System-specific socket constants and types. Generic/4.3 BSD version.
+Copyright (C) 1991, 92, 94, 95, 96 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,9 +17,9 @@ License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
-#ifndef _SYS_SOCKET_H
+#ifndef _SOCKETBITS_H
-#define _SYS_SOCKET_H 1
+#define _SOCKETBITS_H 1
#include <features.h>
__BEGIN_DECLS
@@ -115,75 +116,6 @@ struct sockaddr
char sa_data[14]; /* Address data. */
};
-/* This is the type we use for generic socket address arguments.
-
- With GCC 2.7 and later, the funky union causes redeclarations or uses with
- any of the listed types to be allowed without complaint. */
-#if (!defined (__GNUC__) || __GNUC__ < 2 || \
- (__GNUC__ == 2 && __GNUC_MINOR__ < 7))
-#define __SOCKADDR_ARG struct sockaddr *
-#define __CONST_SOCKADDR_ARG __const struct sockaddr *
-#else
-/* Add more `struct sockaddr_AF' types here as necessary.
- These are all the ones I found on NetBSD and Linux. */
-#define __SOCKADDR_ALLTYPES \
- __SOCKADDR_ONETYPE (sockaddr) \
- __SOCKADDR_ONETYPE (sockaddr_at) \
- __SOCKADDR_ONETYPE (sockaddr_ax25) \
- __SOCKADDR_ONETYPE (sockaddr_dl) \
- __SOCKADDR_ONETYPE (sockaddr_eon) \
- __SOCKADDR_ONETYPE (sockaddr_in) \
- __SOCKADDR_ONETYPE (sockaddr_in6) \
- __SOCKADDR_ONETYPE (sockaddr_inarp) \
- __SOCKADDR_ONETYPE (sockaddr_ipx) \
- __SOCKADDR_ONETYPE (sockaddr_iso) \
- __SOCKADDR_ONETYPE (sockaddr_ns) \
- __SOCKADDR_ONETYPE (sockaddr_un) \
- __SOCKADDR_ONETYPE (sockaddr_x25)
-
-#define __SOCKADDR_ONETYPE(type) struct type *__##type##__;
-typedef union { __SOCKADDR_ALLTYPES
- } __SOCKADDR_ARG __attribute__ ((__transparent_union__));
-#undef __SOCKADDR_ONETYPE
-#define __SOCKADDR_ONETYPE(type) __const struct type *__##type##__;
-typedef union { __SOCKADDR_ALLTYPES
- } __CONST_SOCKADDR_ARG __attribute__ ((__transparent_union__));
-#undef __SOCKADDR_ONETYPE
-#endif
-
-
-/* Create a new socket of type TYPE in domain DOMAIN, using
- protocol PROTOCOL. If PROTOCOL is zero, one is chosen automatically.
- Returns a file descriptor for the new socket, or -1 for errors. */
-extern int socket __P ((int __domain, enum __socket_type __type,
- int __protocol));
-
-/* Create two new sockets, of type TYPE in domain DOMAIN and using
- protocol PROTOCOL, which are connected to each other, and put file
- descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero,
- one will be chosen automatically. Returns 0 on success, -1 for errors. */
-extern int socketpair __P ((int __domain, enum __socket_type __type,
- int __protocol, int __fds[2]));
-
-/* Give the socket FD the local address ADDR (which is LEN bytes long). */
-extern int bind __P ((int __fd, __CONST_SOCKADDR_ARG __addr, size_t __len));
-
-/* Put the local address of FD into *ADDR and its length in *LEN. */
-extern int getsockname __P ((int __fd, __SOCKADDR_ARG __addr,
- size_t *__len));
-
-/* Open a connection on socket FD to peer at ADDR (which LEN bytes long).
- For connectionless socket types, just set the default address to send to
- and the only address from which to accept transmissions.
- Return 0 on success, -1 for errors. */
-extern int connect __P ((int __fd,
- __CONST_SOCKADDR_ARG __addr, size_t __len));
-
-/* Put the address of the peer connected to socket FD into *ADDR
- (which is *LEN bytes long), and its actual length into *LEN. */
-extern int getpeername __P ((int __fd, __SOCKADDR_ARG __addr,
- size_t *__len));
-
/* Bits in the FLAGS argument to `send', `recv', et al. */
enum
@@ -198,26 +130,6 @@ enum
MSG_DONTWAIT = 0x80, /* This message should be nonblocking. */
};
-/* Send N bytes of BUF to socket FD. Returns the number sent or -1. */
-extern int send __P ((int __fd, __ptr_t __buf, size_t __n, int __flags));
-
-/* Read N bytes into BUF from socket FD.
- Returns the number read or -1 for errors. */
-extern int recv __P ((int __fd, __ptr_t __buf, size_t __n, int __flags));
-
-/* Send N bytes of BUF on socket FD to peer at address ADDR (which is
- ADDR_LEN bytes long). Returns the number sent, or -1 for errors. */
-extern int sendto __P ((int __fd, __ptr_t __buf, size_t __n, int __flags,
- __CONST_SOCKADDR_ARG __addr, size_t __addr_len));
-
-/* Read N bytes into BUF through socket FD.
- If ADDR is not NULL, fill in *ADDR_LEN bytes of it with tha address of
- the sender, and store the actual size of the address in *ADDR_LEN.
- Returns the number of bytes read or -1 for errors. */
-extern int recvfrom __P ((int __fd, __ptr_t __buf, size_t __n, int __flags,
- __SOCKADDR_ARG __addr, size_t *__addr_len));
-
-
/* Structure describing messages sent by
`sendmsg' and received by `recvmsg'. */
@@ -233,15 +145,6 @@ struct msghdr
size_t msg_accrightslen; /* Length of access rights information. */
};
-/* Send a message described MESSAGE on socket FD.
- Returns the number of bytes sent, or -1 for errors. */
-extern int sendmsg __P ((int __fd, __const struct msghdr *__message,
- int __flags));
-
-/* Receive a message as described by MESSAGE from socket FD.
- Returns the number of bytes read or -1 for errors. */
-extern int recvmsg __P ((int __fd, struct msghdr *__message, int __flags));
-
/* Protocol number used to manipulate socket-level options
with `getsockopt' and `setsockopt'. */
@@ -285,46 +188,6 @@ struct linger
int l_linger; /* Time to linger. */
};
-
-/* Put the current value for socket FD's option OPTNAME at protocol level LEVEL
- into OPTVAL (which is *OPTLEN bytes long), and set *OPTLEN to the value's
- actual length. Returns 0 on success, -1 for errors. */
-extern int getsockopt __P ((int __fd, int __level, int __optname,
- __ptr_t __optval, size_t *__optlen));
-
-/* Set socket FD's option OPTNAME at protocol level LEVEL
- to *OPTVAL (which is OPTLEN bytes long).
- Returns 0 on success, -1 for errors. */
-extern int setsockopt __P ((int __fd, int __level, int __optname,
- __ptr_t __optval, size_t __optlen));
-
-
-/* Prepare to accept connections on socket FD.
- N connection requests will be queued before further requests are refused.
- Returns 0 on success, -1 for errors. */
-extern int listen __P ((int __fd, unsigned int __n));
-
-/* Await a connection on socket FD.
- When a connection arrives, open a new socket to communicate with it,
- set *ADDR (which is *ADDR_LEN bytes long) to the address of the connecting
- peer and *ADDR_LEN to the address's actual length, and return the
- new socket's descriptor, or -1 for errors. */
-extern int accept __P ((int __fd, __SOCKADDR_ARG __addr,
- size_t *__addr_len));
-
-/* Shut down all or part of the connection open on socket FD.
- HOW determines what to shut down:
- 0 = No more receptions;
- 1 = No more transmissions;
- 2 = No more receptions or transmissions.
- Returns 0 on success, -1 for errors. */
-extern int shutdown __P ((int __fd, int __how));
-
-
-/* Determine whether FILDES if the property identified by the value if
- FDTYPE. */
-extern int isfdtype __P ((int __fildes, int __fdtype));
-
__END_DECLS
-#endif /* sys/socket.h */
+#endif /* socketbits.h */
diff --git a/sysdeps/mach/hurd/connect.c b/sysdeps/mach/hurd/connect.c
index 0ecf736..f55a2ae 100644
--- a/sysdeps/mach/hurd/connect.c
+++ b/sysdeps/mach/hurd/connect.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+/* Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -30,7 +30,7 @@ Cambridge, MA 02139, USA. */
and the only address from which to accept transmissions.
Return 0 on success, -1 for errors. */
int
-DEFUN(connect, (fd, addr, len),
+DEFUN(__connect, (fd, addr, len),
int fd AND const struct sockaddr_un *addr AND size_t len)
{
error_t err;
@@ -72,3 +72,4 @@ DEFUN(connect, (fd, addr, len),
return err ? __hurd_dfail (fd, err) : 0;
}
+weak_alias (__connect, connect)
diff --git a/sysdeps/mach/hurd/send.c b/sysdeps/mach/hurd/send.c
index 153ee93..17fa663 100644
--- a/sysdeps/mach/hurd/send.c
+++ b/sysdeps/mach/hurd/send.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1994 Free Software Foundation, Inc.
+/* Copyright (C) 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -25,7 +25,7 @@ Cambridge, MA 02139, USA. */
/* Send N bytes of BUF to socket FD. Returns the number sent or -1. */
int
-DEFUN(send, (fd, buf, n, flags),
+DEFUN(__send, (fd, buf, n, flags),
int fd AND PTR buf AND size_t n AND int flags)
{
error_t err;
@@ -38,3 +38,4 @@ DEFUN(send, (fd, buf, n, flags),
return err ? __hurd_dfail (fd, err) : wrote;
}
+weak_alias (__send, send)
diff --git a/sysdeps/unix/inet/syscalls.list b/sysdeps/unix/inet/syscalls.list
index 3b6ac59..661a843 100644
--- a/sysdeps/unix/inet/syscalls.list
+++ b/sysdeps/unix/inet/syscalls.list
@@ -2,7 +2,7 @@
accept - accept 3 accept
bind - bind 3 bind
-connect - connect 3 connect
+__connect - connect 3 connect connect
gethostid - gethostid 0 gethostid
gethostname - gethostname 2 __gethostname gethostname
getpeername - getpeername 3 getpeername
@@ -12,7 +12,7 @@ listen - listen 2 listen
recv - recv 4 recv
recvfrom - recvfrom 6 recvfrom
recvmsg - recvmsg 3 recvmsg
-send - send 4 send
+__send - send 4 send send
sendmsg - sendmsg 3 sendmsg
sendto - sendto 6 sendto
sethostid - sethostid 1 sethostid
diff --git a/sysdeps/unix/sysv/linux/configure b/sysdeps/unix/sysv/linux/configure
index 7dbb0f9..fd72c09 100644
--- a/sysdeps/unix/sysv/linux/configure
+++ b/sysdeps/unix/sysv/linux/configure
@@ -8,7 +8,7 @@ test $stdio = default && stdio=libio
inhibit_glue=yes
echo $ac_n "checking installed Linux kernel header files""... $ac_c" 1>&6
-if eval "test \"`echo '$''{'libc_cv_linux'+set}'`\" = set"; then
+if eval "test \"`echo '$''{'libc_cv_linux201'+set}'`\" = set"; then
echo $ac_n "(cached) $ac_c" 1>&6
else
cat > conftest.$ac_ext <<EOF
@@ -24,17 +24,17 @@ eat flaming death
EOF
if { (eval echo configure:26: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
rm -rf conftest*
- libc_cv_linux='2.0.1 or later'
+ libc_cv_linux201='2.0.1 or later'
else
rm -rf conftest*
- libc_cv_linux='TOO OLD!'
+ libc_cv_linux201='TOO OLD!'
fi
rm -f conftest*
fi
-echo "$ac_t""$libc_cv_linux" 1>&6
-if test "$libc_cv_linux" != '2.0.1 or later'; then
+echo "$ac_t""$libc_cv_linux201" 1>&6
+if test "$libc_cv_linux201" != '2.0.1 or later'; then
{ echo "configure: error: GNU libc requires kernel header files from Linux 2.0.1
or later to be installed before configuring. The kernel header files
are found usually in /usr/include/asm and /usr/include/linux; make sure
diff --git a/sysdeps/unix/sysv/linux/socketbits.h b/sysdeps/unix/sysv/linux/socketbits.h
new file mode 100644
index 0000000..1c7cee0
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/socketbits.h
@@ -0,0 +1,2 @@
+/* Get Linux specific constants and data types kernel header. */
+#include <linux/socket.h>
diff --git a/wcsmbs/btowc.c b/wcsmbs/btowc.c
index 062be7e..2f13cc7 100644
--- a/wcsmbs/btowc.c
+++ b/wcsmbs/btowc.c
@@ -21,16 +21,14 @@ Boston, MA 02111-1307, USA. */
#include <wchar.h>
+/* We use UTF8 encoding for multibyte strings and therefore a valid
+ one byte multibyte string only can have a value from 0 to 0x7f. */
wint_t
btowc (c)
int c;
{
- /*************************************************************\
- |* This is no complete implementation. While the multi-byte *|
- |* character handling is not finished this will do. *|
- \*************************************************************/
- if (WEOF != (wint_t) EOF)
+ if (WEOF != (wint_t) EOF || c < 0 || c > 0x7f)
return WEOF;
else
- return c;
+ return (wint_t) c;
}
diff --git a/wcsmbs/mbrlen.c b/wcsmbs/mbrlen.c
index a50631e..c5a2711 100644
--- a/wcsmbs/mbrlen.c
+++ b/wcsmbs/mbrlen.c
@@ -26,10 +26,11 @@ static mbstate_t internal;
size_t
-mbrlen (s, n, ps)
+__mbrlen (s, n, ps)
const char *s;
size_t n;
mbstate_t *ps;
{
return mbrtowc (NULL, s, n, ps ?: &internal);
}
+weak_alias (__mbrlen, mbrlen)
diff --git a/wcsmbs/mbrtowc.c b/wcsmbs/mbrtowc.c
index 2c4b077..9e70a0b 100644
--- a/wcsmbs/mbrtowc.c
+++ b/wcsmbs/mbrtowc.c
@@ -1,6 +1,6 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
This file is part of the GNU C Library.
-Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>
+Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
@@ -17,50 +17,115 @@ License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
+#include <errno.h>
#include <wchar.h>
+#ifndef EILSEQ
+#define EILSEQ EINVAL
+#endif
+
static mbstate_t internal;
size_t
-mbrtowc (pwc, s, n, ps)
- wchar_t *pwc;
- const char *s;
- size_t n;
- mbstate_t *ps;
+mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
{
wchar_t to_wide;
+ size_t used = 0;
if (ps == NULL)
ps = &internal;
- /*************************************************************\
- |* This is no complete implementation. While the multi-byte *|
- |* character handling is not finished this will do. *|
- \*************************************************************/
-
if (s == NULL)
{
+ /* See first paragraph of description in 7.16.6.3.2. */
pwc = NULL;
s = "";
n = 1;
}
- if (n == 0)
- return (size_t) -2;
+ if (n > 0)
+ {
+ if (ps->count == 0)
+ {
+ unsigned char byte = (unsigned char) *s++;
+ ++used;
- /* For now. */
- to_wide = (wchar_t) *s;
+ /* We must look for a possible first byte of a UTF8 sequence. */
+ if (byte < 0x80)
+ {
+ /* One byte sequence. */
+ if (pwc != NULL)
+ *pwc = (wchar_t) byte;
+ return byte ? used : 0;
+ }
- if (pwc != NULL)
- *pwc = to_wide;
+ if ((byte & 0xc0) == 0x80 || (byte & 0xfe) == 0xfe)
+ {
+ /* Oh, oh. An encoding error. */
+ errno = EILSEQ;
+ return (size_t) -1;
+ }
- if (pwc == L'\0')
- {
- *ps = 0; /* This is required. */
- return 0;
+ if ((byte & 0xe0) == 0xc0)
+ {
+ /* We expect two bytes. */
+ ps->count = 1;
+ ps->value = byte & 0x1f;
+ }
+ else if ((byte & 0xf0) == 0xe0)
+ {
+ /* We expect three bytes. */
+ ps->count = 2;
+ ps->value = byte & 0x0f;
+ }
+ else if ((byte & 0xf8) == 0xf0)
+ {
+ /* We expect four bytes. */
+ ps->count = 3;
+ ps->value = byte & 0x07;
+ }
+ else if ((byte & 0xfc) == 0xf8)
+ {
+ /* We expect five bytes. */
+ ps->count = 4;
+ ps->value = byte & 0x03;
+ }
+ else
+ {
+ /* We expect six bytes. */
+ ps->count = 5;
+ ps->value = byte & 0x01;
+ }
+ }
+
+ /* We know we have to handle a multibyte character and there are
+ some more bytes to read. */
+ while (used < n)
+ {
+ /* The second to sixths byte must be of the form 10xxxxxx. */
+ unsigned char byte = (unsigned char) *s++;
+ ++used;
+
+ if ((byte & 0xc0) != 0x80)
+ {
+ /* Oh, oh. An encoding error. */
+ errno = EILSEQ;
+ return (size_t) -1;
+ }
+
+ ps->value <<= 6;
+ ps->value |= byte & 0x3f;
+
+ if (--ps->count == 0)
+ {
+ /* The character is finished. */
+ if (pwc != NULL)
+ *pwc = (wchar_t) ps->value;
+ return ps->value ? used : 0;
+ }
+ }
}
- /* Return code (size_t)-1 cannot happend for now. */
- return 1;
+ return (size_t) -2;
}
diff --git a/wcsmbs/mbsinit.c b/wcsmbs/mbsinit.c
index efbfd09..f56ce20 100644
--- a/wcsmbs/mbsinit.c
+++ b/wcsmbs/mbsinit.c
@@ -1,6 +1,6 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
This file is part of the GNU C Library.
-Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>
+Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
@@ -20,15 +20,17 @@ Boston, MA 02111-1307, USA. */
#include <string.h>
#include <wchar.h>
-
+/* In GNU libc the conversion functions only can convert between the
+ fixed wide character representation and the multibyte
+ representation of the same character set. Since we use ISO 10646
+ in UCS4 encoding for wide characters the best solution for
+ multibyte characters is the UTF8 encoding. I.e., the only state
+ information is a counter of the processed bytes so far and the
+ value collected so far. Especially, we don't have different shift
+ states. */
int
mbsinit (ps)
const mbstate_t *ps;
{
- /*************************************************************\
- |* This is no complete implementation. While the multi-byte *|
- |* character handling is not finished this will do. *|
- \*************************************************************/
-
- return ps == NULL || *ps == 0;
+ return ps == NULL || ps->count == 0;
}
diff --git a/wcsmbs/mbsrtowcs.c b/wcsmbs/mbsrtowcs.c
index dc026b7..712b199 100644
--- a/wcsmbs/mbsrtowcs.c
+++ b/wcsmbs/mbsrtowcs.c
@@ -1,6 +1,6 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
This file is part of the GNU C Library.
-Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>
+Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
@@ -17,9 +17,16 @@ License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
+#include <errno.h>
#include <wchar.h>
+#ifndef EILSEQ
+#define EILSEQ EINVAL
+#endif
+
+/* We don't need the state really because we don't have shift states
+ to maintain between calls to this function. */
static mbstate_t internal;
size_t
@@ -29,35 +36,102 @@ mbsrtowcs (dst, src, len, ps)
size_t len;
mbstate_t *ps;
{
- size_t result = 0;
+ size_t written = 0;
+ const char *run = *src;
if (ps == NULL)
ps = &internal;
- /*************************************************************\
- |* This is no complete implementation. While the multi-byte *|
- |* character handling is not finished this will do. *|
- \*************************************************************/
+ if (dst == NULL)
+ /* The LEN parameter has to be ignored if we don't actually write
+ anything. */
+ len = ~0;
- while (len > 0 && **src != '\0')
+ /* Copy all words. */
+ while (written < len)
{
- /* For now there is no possibly illegal MB char sequence. */
- if (dst != NULL)
- dst[result] = (wchar_t) **src;
- ++result;
- ++(*src);
- --len;
- }
+ wchar_t value;
+ size_t count;
+ unsigned char byte = *run++;
- if (len > 0)
- {
+ /* We expect a start of a new multibyte character. */
+ if (byte < 0x80)
+ {
+ /* One byte sequence. */
+ count = 0;
+ value = byte;
+ }
+ else if ((byte & 0xe0) == 0xc0)
+ {
+ count = 1;
+ value = byte & 0x1f;
+ }
+ else if ((byte & 0xf0) == 0xe0)
+ {
+ /* We expect three bytes. */
+ count = 2;
+ value = byte & 0x0f;
+ }
+ else if ((byte & 0xf8) == 0xf0)
+ {
+ /* We expect four bytes. */
+ count = 3;
+ value = byte & 0x07;
+ }
+ else if ((byte & 0xfc) == 0xf8)
+ {
+ /* We expect five bytes. */
+ count = 4;
+ value = byte & 0x03;
+ }
+ else if ((byte & 0xfe) == 0xfc)
+ {
+ /* We expect six bytes. */
+ count = 5;
+ value = byte & 0x01;
+ }
+ else
+ {
+ /* This is an illegal encoding. */
+ errno = EILSEQ;
+ return (size_t) -1;
+ }
+
+ /* Read the possible remaining bytes. */
+ while (count-- > 0)
+ {
+ byte = *run++;
+
+ if ((byte & 0xc0) != 0x80)
+ {
+ /* This is an illegal encoding. */
+ errno = EILSEQ;
+ return (size_t) -1;
+ }
+
+ value <<= 6;
+ value |= byte & 0x3f;
+ }
+
+ /* Store value is required. */
if (dst != NULL)
+ *dst++ = value;
+
+ /* The whole sequence is read. Check whether end of string is
+ reached. */
+ if (value == L'\0')
{
- dst[result] = L'\0';
- *ps = 0;
+ /* Found the end of the string. */
+ *src = NULL;
+ return written;
}
- *src = NULL;
+
+ /* Increment counter of produced words. */
+ ++written;
}
- return result;
+ /* Store address of next byte to process. */
+ *src = run;
+
+ return written;
}
diff --git a/wcsmbs/wchar.h b/wcsmbs/wchar.h
index cc821b8..806bafa 100644
--- a/wcsmbs/wchar.h
+++ b/wcsmbs/wchar.h
@@ -48,7 +48,11 @@ typedef unsigned int wint_t;
/* Conversion state information. */
-typedef int mbstate_t; /* FIXME */
+typedef struct
+{
+ int count; /* Number of bytes needed for the current character. */
+ wint_t value; /* Value so far. */
+} mbstate_t;
#define WCHAR_MIN ((wchar_t) 0)
#define WCHAR_MAX (~WCHAR_MIN)
@@ -145,9 +149,6 @@ extern int wctob __P ((wint_t __c));
state. */
extern int mbsinit __P ((__const mbstate_t *__ps));
-/* Return number of bytes in multibyte character pointed to by S. */
-extern size_t mbrlen __P ((__const char *__s, size_t __n, mbstate_t *ps));
-
/* Write wide character representation of multibyte character pointed
to by S to PWC. */
extern size_t mbrtowc __P ((wchar_t *__pwc, __const char *__s, size_t __n,
@@ -156,6 +157,17 @@ extern size_t mbrtowc __P ((wchar_t *__pwc, __const char *__s, size_t __n,
/* Write multibyte representation of wide character WC to S. */
extern size_t wcrtomb __P ((char *__s, wchar_t __wc, mbstate_t *__ps));
+/* Return number of bytes in multibyte character pointed to by S. */
+extern size_t __mbrlen __P ((__const char *__s, size_t __n, mbstate_t *__ps));
+extern size_t mbrlen __P ((__const char *__s, size_t __n, mbstate_t *__ps));
+
+#if defined (__OPTIMIZE__) \
+ && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 7))
+/* Define inline function as optimization. */
+extern __inline size_t mbrlen (__const char *s, size_t n, mbstate_t *ps)
+{ return ps != NULL ? mbrtowc (NULL, s, n, ps) : __mbrlen (s, n, NULL); }
+#endif
+
/* Write wide character representation of multibyte chracter string SRC
to DST. */
extern size_t mbsrtowcs __P ((wchar_t *__dst, __const char **__src,
diff --git a/wcsmbs/wcrtomb.c b/wcsmbs/wcrtomb.c
index 9069fb1..eb007a6 100644
--- a/wcsmbs/wcrtomb.c
+++ b/wcsmbs/wcrtomb.c
@@ -1,6 +1,6 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
This file is part of the GNU C Library.
-Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>
+Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
@@ -24,46 +24,68 @@ Boston, MA 02111-1307, USA. */
#define EILSEQ EINVAL
#endif
+static const wchar_t encoding_mask[] =
+{
+ ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
+};
+
+static const unsigned char encoding_byte[] =
+{
+ 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
+};
+/* The state is for this UTF8 encoding not used. */
static mbstate_t internal;
size_t
-wcrtomb (s, wc, ps)
- char *s;
- wchar_t wc;
- mbstate_t *ps;
+wcrtomb (char *s, wchar_t wc, mbstate_t *ps)
{
char fake[1];
+ size_t written = 0;
if (ps == NULL)
ps = &internal;
- /*************************************************************\
- |* This is no complete implementation. While the multi-byte *|
- |* character handling is not finished this will do. *|
- \*************************************************************/
-
if (s == NULL)
{
s = fake;
wc = L'\0';
}
- if (wc == L'\0')
+ /* Store the UTF8 representation of WC. */
+ if (wc < 0 || wc > 0x7fffffff)
{
- /* FIXME Write any shift sequence to get to *PS == NULL. */
- *ps = 0;
- *s = '\0';
+ /* This is no correct ISO 10646 character. */
+ errno = EILSEQ;
+ return (size_t) -1;
+ }
+
+ if (wc < 0x80)
+ {
+ /* It's a one byte sequence. */
+ if (s != NULL)
+ *s = (char) wc;
return 1;
}
- /* FIXME For now we don't handle real multi-byte encodings. */
- if ((wc & ~0xff) != 0)
+ for (written = 2; written < 6; ++written)
+ if ((wc & encoding_mask[written - 2]) == 0)
+ break;
+
+ if (s != NULL)
{
- errno = EILSEQ;
- return (size_t) -1;
+ size_t cnt = written;
+ s[0] = encoding_byte[cnt - 2];
+
+ --cnt;
+ do
+ {
+ s[cnt] = 0x80 | (wc & 0x3f);
+ wc >>= 6;
+ }
+ while (--cnt > 0);
+ s[0] |= wc;
}
- *s = (char) wc;
- return 1;
+ return written;
}
diff --git a/wcsmbs/wcsrtombs.c b/wcsmbs/wcsrtombs.c
index 9f10009..99ca6ac 100644
--- a/wcsmbs/wcsrtombs.c
+++ b/wcsmbs/wcsrtombs.c
@@ -1,6 +1,6 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
This file is part of the GNU C Library.
-Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>
+Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
@@ -25,6 +25,18 @@ Boston, MA 02111-1307, USA. */
#endif
+static const wchar_t encoding_mask[] =
+{
+ ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
+};
+
+static const unsigned char encoding_byte[] =
+{
+ 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
+};
+
+/* We don't need the state really because we don't have shift states
+ to maintain between calls to this function. */
static mbstate_t internal;
size_t
@@ -34,40 +46,79 @@ wcsrtombs (dst, src, len, ps)
size_t len;
mbstate_t *ps;
{
- size_t result = 0;
+ size_t written = 0;
+ const wchar_t *run = *src;
if (ps == NULL)
ps = &internal;
- /*************************************************************\
- |* This is no complete implementation. While the multi-byte *|
- |* character handling is not finished this will do. *|
- \*************************************************************/
+ if (dst == NULL)
+ /* The LEN parameter has to be ignored if we don't actually write
+ anything. */
+ len = ~0;
- while (len > 0 && **src != L'\0')
+ while (written < len)
{
- if ((**src & ~0xff) != 0)
+ wchar_t wc = *run++;
+
+ if (wc < 0 || wc > 0x7fffffff)
{
+ /* This is no correct ISO 10646 character. */
errno = EILSEQ;
return (size_t) -1;
}
- if (dst != NULL)
- dst[result] = (char) **src;
- ++result;
- ++(*src);
- --len;
- }
-
- if (len > 0)
- {
- if (dst != NULL)
+ if (wc == L'\0')
+ {
+ /* Found the end. */
+ if (dst != NULL)
+ *dst = '\0';
+ *src = NULL;
+ return written;
+ }
+ else if (wc < 0x80)
{
- dst[result] = '\0';
- *ps = 0;
+ /* It's an one byte sequence. */
+ if (dst != NULL)
+ *dst++ = (char) wc;
+ ++written;
+ }
+ else
+ {
+ size_t step;
+
+ for (step = 2; step < 6; ++step)
+ if ((wc & encoding_mask[step - 2]) == 0)
+ break;
+
+ if (written + step >= len)
+ /* Too long. */
+ break;
+
+ if (dst != NULL)
+ {
+ size_t cnt = step;
+
+ dst[0] = encoding_byte[cnt - 2];
+
+ --cnt;
+ do
+ {
+ dst[cnt] = 0x80 | (wc & 0x3f);
+ wc >>= 6;
+ }
+ while (--cnt > 0);
+ dst[0] |= wc;
+
+ dst += step;
+ }
+
+ written += step;
}
- *src = NULL;
}
- return result;
+ /* Store position of first unprocessed word. */
+ *src = run;
+
+ return written;
}
diff --git a/wcsmbs/wctob.c b/wcsmbs/wctob.c
index c27bd6b..f541a2e 100644
--- a/wcsmbs/wctob.c
+++ b/wcsmbs/wctob.c
@@ -21,14 +21,11 @@ Boston, MA 02111-1307, USA. */
#include <wchar.h>
+/* We use UTF8 encoding for multibyte strings and therefore a valid
+ one byte multibyte string only can have a value from 0 to 0x7f. */
int
wctob (c)
wint_t c;
{
- /*************************************************************\
- |* This is no complete implementation. While the multi-byte *|
- |* character handling is not finished this will do. *|
- \*************************************************************/
-
- return (c & ~0xff) == 0 ? c : EOF;
+ return (c >= 0 && c <= 0x7f) ? c : EOF;
}