aboutsummaryrefslogtreecommitdiff
path: root/iconv
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2001-02-05 05:57:24 +0000
committerUlrich Drepper <drepper@redhat.com>2001-02-05 05:57:24 +0000
commit93693c4d820dac2f218e144f5126a5a761f1cfbf (patch)
tree7371d326f24a3c9d73fed75e7792133d0d654a50 /iconv
parent377c725f8e2dba91af36af27206e6deb85cf6e6f (diff)
downloadglibc-93693c4d820dac2f218e144f5126a5a761f1cfbf.zip
glibc-93693c4d820dac2f218e144f5126a5a761f1cfbf.tar.gz
glibc-93693c4d820dac2f218e144f5126a5a761f1cfbf.tar.bz2
Update.
2001-02-04 Ulrich Drepper <drepper@redhat.com> * iconv/Makefile (iconv_prog-modules): Define. Add vpath to find files in locale/programs. Add CFLAGS definition to allow compiling localedef files. * iconv/dummy-repertoire.c: New file. * iconv/iconv_charmap.c: New file. * iconv/iconv_prog.h: New file. * iconv/iconv_prog.c: Make verbose and omit_invalid global. (main): If parameter for -f and -t contain slashes try first to resolve the strings as filenames of charmap files. Use them for conversion in this case. * iconvdata/run-iconv-test.sh: If charmaps exist also run tests with iconv getting charmap names as parameters. * locale/programs/linereader.c (lr_token): Take extra parameters verbose and pass it to get_string. (get_string): Take extra parameters verbose. * locale/programs/charmap.c (parse_charmap): Take extra parameters verbose and be_quiet. Change all callers of lr_token and parse_charmap. * locale/programs/charmap.h: Likewise. * locale/programs/ld-address.c: Likewise. * locale/programs/ld-collate.c: Likewise. * locale/programs/ld-ctype.c: Likewise. * locale/programs/ld-identification.c: Likewise. * locale/programs/ld-measurement.c: Likewise. * locale/programs/ld-messages.c: Likewise. * locale/programs/ld-monetary.c: Likewise. * locale/programs/ld-name.c: Likewise. * locale/programs/ld-numeric.c: Likewise. * locale/programs/ld-paper.c: Likewise. * locale/programs/ld-telephone.c: Likewise. * locale/programs/ld-time.c: Likewise. * locale/programs/linereader.c: Likewise. * locale/programs/linereader.h: Likewise. * locale/programs/localedef.c: Likewise. * locale/programs/locfile.c: Likewise. * locale/programs/locfile.h: Likewise. * locale/programs/repertoire.c: Likewise.
Diffstat (limited to 'iconv')
-rw-r--r--iconv/Makefile14
-rw-r--r--iconv/dummy-repertoire.c37
-rw-r--r--iconv/iconv_charmap.c563
-rw-r--r--iconv/iconv_prog.c202
-rw-r--r--iconv/iconv_prog.h42
5 files changed, 773 insertions, 85 deletions
diff --git a/iconv/Makefile b/iconv/Makefile
index b6c4f23..e92eb0b 100644
--- a/iconv/Makefile
+++ b/iconv/Makefile
@@ -34,9 +34,19 @@ CFLAGS-gconv_db.c = -DSTATIC_GCONV
CFLAGS-gconv_simple.c = -DSTATIC_GCONV
endif
+vpath %.c ../locale/programs
+
+iconv_prog-modules = iconv_charmap charmap charmap-dir linereader \
+ dummy-repertoire simple-hash xstrdup xmalloc
+CFLAGS-iconv_prog.c = -I../locale/programs
+CFLAGS-iconv_charmap.c = -I../locale/programs
+CFLAGS-dummy-repertoire.c = -I../locale/programs
+CFLAGS-charmap.c = -DCHARMAP_PATH='"$(i18ndir)/charmaps"' \
+ -DDEFAULT_CHARMAP=null_pointer
+
tests = tst-iconv1 tst-iconv2 tst-iconv3
-distribute = gconv_builtin.h gconv_int.h loop.c skeleton.c
+distribute = gconv_builtin.h gconv_int.h loop.c skeleton.c iconv_prog.h
others = iconv_prog
install-others = $(inst_bindir)/iconv
@@ -47,3 +57,5 @@ include ../Rules
$(inst_bindir)/iconv: $(objpfx)iconv_prog $(+force)
$(do-install-program)
+
+$(objpfx)iconv_prog: $(iconv_prog-modules:%=$(objpfx)%.o)
diff --git a/iconv/dummy-repertoire.c b/iconv/dummy-repertoire.c
new file mode 100644
index 0000000..d3b455e
--- /dev/null
+++ b/iconv/dummy-repertoire.c
@@ -0,0 +1,37 @@
+/* Copyright (C) 2001 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@redhat.com>, 2001.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* For iconv we don't have to handle repertoire maps. Provide dummy
+ definitions to allow the use of linereader.c unchanged. */
+#include <repertoire.h>
+
+
+uint32_t
+repertoire_find_value (const struct repertoire_t *repertoire, const char *name,
+ size_t len)
+{
+ return ILLEGAL_CHAR_VALUE;
+}
+
+
+const char *
+repertoire_find_symbol (const struct repertoire_t *repertoire, uint32_t ucs)
+{
+ return NULL;
+}
diff --git a/iconv/iconv_charmap.c b/iconv/iconv_charmap.c
new file mode 100644
index 0000000..4b72b1b
--- /dev/null
+++ b/iconv/iconv_charmap.c
@@ -0,0 +1,563 @@
+/* Convert using charmaps and possibly iconv().
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@redhat.com>, 2001.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <iconv.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include "iconv_prog.h"
+
+
+/* Prototypes for a few program-wide used functions. */
+extern void *xmalloc (size_t __n);
+extern void *xcalloc (size_t __n, size_t __s);
+
+
+struct convtable
+{
+ int term[256 / 8];
+ union
+ {
+ struct convtable *sub;
+ struct charseq *out;
+ } val[256];
+};
+
+
+static inline struct convtable *
+allocate_table (void)
+{
+ return (struct convtable *) xcalloc (1, sizeof (struct convtable));
+}
+
+
+static inline int
+is_term (struct convtable *tbl, unsigned int idx)
+{
+ return tbl->term[idx / 8] & (1 << (idx % 8));
+}
+
+
+static inline void
+clear_term (struct convtable *tbl, unsigned int idx)
+{
+ tbl->term[idx / 8] &= ~(1 << (idx % 8));
+}
+
+
+static inline void
+set_term (struct convtable *tbl, unsigned int idx)
+{
+ tbl->term[idx / 8] |= 1 << (idx % 8);
+}
+
+
+/* Generate the conversion table. */
+static struct convtable *use_from_charmap (struct charmap_t *from_charmap,
+ const char *to_code);
+static struct convtable *use_to_charmap (const char *from_code,
+ struct charmap_t *to_charmap);
+static struct convtable *use_both_charmaps (struct charmap_t *from_charmap,
+ struct charmap_t *to_charmap);
+
+/* Prototypes for the functions doing the actual work. */
+static int process_block (struct convtable *tbl, char *addr, size_t len,
+ FILE *output);
+static int process_fd (struct convtable *tbl, int fd, FILE *output);
+static int process_file (struct convtable *tbl, FILE *input, FILE *output);
+
+
+int
+charmap_conversion (const char *from_code, struct charmap_t *from_charmap,
+ const char *to_code, struct charmap_t *to_charmap,
+ int argc, int remaining, char *argv[], FILE *output)
+{
+ struct convtable *cvtbl;
+ int status = EXIT_SUCCESS;
+
+ /* We have three different cases to handle:
+
+ - both, from_charmap and to_charmap, are available. This means we
+ can assume that the symbolic names match and use them to create
+ the mapping.
+
+ - only from_charmap is available. In this case we can only hope that
+ the symbolic names used are of the <Uxxxx> form in which case we
+ can use a UCS4->"to_code" iconv() conversion for the second step.
+
+ - only to_charmap is available. This is similar, only that we would
+ use iconv() for the "to_code"->UCS4 conversion.
+
+ We first create a table which maps input bytes into output bytes.
+ Once this is done we can handle all three of the cases above
+ equally. */
+ if (from_charmap != NULL)
+ {
+ if (to_charmap == NULL)
+ cvtbl = use_from_charmap (from_charmap, to_code);
+ else
+ cvtbl = use_both_charmaps (from_charmap, to_charmap);
+ }
+ else
+ {
+ assert (to_charmap != NULL);
+ cvtbl = use_to_charmap (from_code, to_charmap);
+ }
+
+ /* If we couldn't generate a table stop now. */
+ if (cvtbl == NULL)
+ return EXIT_FAILURE;
+
+ /* We can now start the conversion. */
+ if (remaining == argc)
+ {
+ if (process_file (cvtbl, stdin, output) != 0)
+ status = EXIT_FAILURE;
+ }
+ else
+ do
+ {
+ struct stat st;
+ char *addr;
+ int fd;
+
+ if (verbose)
+ printf ("%s:\n", argv[remaining]);
+ if (strcmp (argv[remaining], "-") == 0)
+ fd = 0;
+ else
+ {
+ fd = open (argv[remaining], O_RDONLY);
+
+ if (fd == -1)
+ {
+ error (0, errno, _("cannot open input file `%s'"),
+ argv[remaining]);
+ status = EXIT_FAILURE;
+ continue;
+ }
+ }
+
+#ifdef _POSIX_MAPPED_FILES
+ /* We have possibilities for reading the input file. First try
+ to mmap() it since this will provide the fastest solution. */
+ if (fstat (fd, &st) == 0
+ && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE,
+ fd, 0)) != MAP_FAILED))
+ {
+ /* Yes, we can use mmap(). The descriptor is not needed
+ anymore. */
+ if (close (fd) != 0)
+ error (EXIT_FAILURE, errno,
+ _("error while closing input `%s'"), argv[remaining]);
+
+ if (process_block (cvtbl, addr, st.st_size, output) < 0)
+ {
+ /* Something went wrong. */
+ status = EXIT_FAILURE;
+
+ /* We don't need the input data anymore. */
+ munmap ((void *) addr, st.st_size);
+
+ /* We cannot go on with producing output since it might
+ lead to problem because the last output might leave
+ the output stream in an undefined state. */
+ break;
+ }
+
+ /* We don't need the input data anymore. */
+ munmap ((void *) addr, st.st_size);
+ }
+ else
+#endif /* _POSIX_MAPPED_FILES */
+ {
+ /* Read the file in pieces. */
+ if (process_fd (cvtbl, fd, output) != 0)
+ {
+ /* Something went wrong. */
+ status = EXIT_FAILURE;
+
+ /* We don't need the input file anymore. */
+ close (fd);
+
+ /* We cannot go on with producing output since it might
+ lead to problem because the last output might leave
+ the output stream in an undefined state. */
+ break;
+ }
+
+ /* Now close the file. */
+ close (fd);
+ }
+ }
+ while (++remaining < argc);
+
+ /* All done. */
+ return status;
+}
+
+
+static void
+add_bytes (struct convtable *tbl, struct charseq *in, struct charseq *out)
+{
+ int n = 0;
+ unsigned int byte;
+
+ assert (in->nbytes > 0);
+
+ byte = ((unsigned char *) in->bytes)[n];
+ while (n + 1 < in->nbytes)
+ {
+ if (is_term (tbl, byte) || tbl->val[byte].sub == NULL)
+ {
+ /* Note that we simply ignore a definition for a byte sequence
+ which is also the prefix for a longer one. */
+ clear_term (tbl, byte);
+ tbl->val[byte].sub =
+ (struct convtable *) xcalloc (1, sizeof (struct convtable));
+ }
+
+ tbl = tbl->val[byte].sub;
+
+ byte = ((unsigned char *) in->bytes)[++n];
+ }
+
+ /* Only add the new sequence if there is none yet and the byte sequence
+ is not part of an even longer one. */
+ if (! is_term (tbl, byte) && tbl->val[byte].sub == NULL)
+ {
+ set_term (tbl, byte);
+ tbl->val[byte].out = out;
+ }
+}
+
+
+static struct convtable *
+use_from_charmap (struct charmap_t *from_charmap, const char *to_code)
+{
+ /* We iterate over all entries in the from_charmap and for those which
+ have a known UCS4 representation we use an iconv() call to determine
+ the mapping to the to_code charset. */
+ struct convtable *rettbl;
+ iconv_t cd;
+ void *ptr = NULL;
+ const void *key;
+ size_t keylen;
+ void *data;
+
+ cd = iconv_open (to_code, "WCHAR_T");
+ if (cd == (iconv_t) -1)
+ /* We cannot do anything. */
+ return NULL;
+
+ rettbl = allocate_table ();
+
+ while (iterate_table (&from_charmap->char_table, &ptr, &key, &keylen, &data)
+ >= 0)
+ {
+ struct charseq *in = (struct charseq *) data;
+
+ if (in->ucs4 != UNINITIALIZED_CHAR_VALUE)
+ {
+ /* There is a chance. Try the iconv module. */
+ wchar_t inbuf[1] = { in->ucs4 };
+ unsigned char outbuf[64];
+ char *inptr = (char *) inbuf;
+ size_t inlen = sizeof (inbuf);
+ char *outptr = (char *) outbuf;
+ size_t outlen = sizeof (outbuf);
+
+ (void) iconv (cd, &inptr, &inlen, &outptr, &outlen);
+
+ if (outptr != (char *) outbuf)
+ {
+ /* We got some output. Good, use it. */
+ struct charseq *newp;
+
+ outlen = sizeof (outbuf) - outlen;
+ assert ((char *) outbuf + outlen == outptr);
+
+ newp = (struct charseq *) xmalloc (sizeof (struct charseq)
+ + outlen);
+ newp->name = in->name;
+ newp->ucs4 = in->ucs4;
+ newp->nbytes = outlen;
+ memcpy (newp->bytes, outbuf, outlen);
+
+ add_bytes (rettbl, in, newp);
+ }
+
+ /* Clear any possible state left behind. */
+ (void) iconv (cd, NULL, NULL, NULL, NULL);
+ }
+ }
+
+ iconv_close (cd);
+
+ return rettbl;
+}
+
+
+static struct convtable *
+use_to_charmap (const char *from_code, struct charmap_t *to_charmap)
+{
+ /* We iterate over all entries in the to_charmap and for those which
+ have a known UCS4 representation we use an iconv() call to determine
+ the mapping to the from_code charset. */
+ struct convtable *rettbl;
+ iconv_t cd;
+ void *ptr = NULL;
+ const void *key;
+ size_t keylen;
+ void *data;
+
+ /* Note that the conversion we use here is the reverse direction. Without
+ exhaustive search we cannot figure out which input yields the UCS4
+ character we are looking for. Therefore we determine it the other
+ way round. */
+ cd = iconv_open (from_code, "WCHAR_T");
+ if (cd == (iconv_t) -1)
+ /* We cannot do anything. */
+ return NULL;
+
+ rettbl = allocate_table ();
+
+ while (iterate_table (&to_charmap->char_table, &ptr, &key, &keylen, &data)
+ >= 0)
+ {
+ struct charseq *out = (struct charseq *) data;
+
+ if (out->ucs4 != UNINITIALIZED_CHAR_VALUE)
+ {
+ /* There is a chance. Try the iconv module. */
+ wchar_t inbuf[1] = { out->ucs4 };
+ unsigned char outbuf[64];
+ char *inptr = (char *) inbuf;
+ size_t inlen = sizeof (inbuf);
+ char *outptr = (char *) outbuf;
+ size_t outlen = sizeof (outbuf);
+
+ (void) iconv (cd, &inptr, &inlen, &outptr, &outlen);
+
+ if (outptr != (char *) outbuf)
+ {
+ /* We got some output. Good, use it. */
+ struct charseq *newp;
+
+ outlen = sizeof (outbuf) - outlen;
+ assert ((char *) outbuf + outlen == outptr);
+
+ newp = (struct charseq *) xmalloc (sizeof (struct charseq)
+ + outlen);
+ newp->name = out->name;
+ newp->ucs4 = out->ucs4;
+ newp->nbytes = outlen;
+ memcpy (newp->bytes, outbuf, outlen);
+
+ add_bytes (rettbl, newp, out);
+ }
+
+ /* Clear any possible state left behind. */
+ (void) iconv (cd, NULL, NULL, NULL, NULL);
+ }
+ }
+
+ iconv_close (cd);
+
+ return rettbl;
+}
+
+
+static struct convtable *
+use_both_charmaps (struct charmap_t *from_charmap,
+ struct charmap_t *to_charmap)
+{
+ /* In this case we iterate over all the entries in the from_charmap,
+ determine the internal name, and find an appropriate entry in the
+ to_charmap (if it exists). */
+ struct convtable *rettbl = allocate_table ();
+ void *ptr = NULL;
+ const void *key;
+ size_t keylen;
+ void *data;
+
+ while (iterate_table (&from_charmap->char_table, &ptr, &key, &keylen, &data)
+ >= 0)
+ {
+ struct charseq *in = (struct charseq *) data;
+ struct charseq *out = charmap_find_value (to_charmap, key, keylen);
+
+ if (out != NULL)
+ add_bytes (rettbl, in, out);
+ }
+
+ return rettbl;
+}
+
+
+static int
+process_block (struct convtable *tbl, char *addr, size_t len, FILE *output)
+{
+ size_t n = 0;
+
+ while (n < len)
+ {
+ struct convtable *cur = tbl;
+ unsigned char *curp = (unsigned char *) addr;
+ unsigned int byte = *curp;
+ int cnt;
+ struct charseq *out;
+
+ while (! is_term (cur, byte))
+ if (cur->val[byte].sub == NULL)
+ {
+ /* This is a invalid sequence. Skip the first byte if we are
+ ignoring errors. Otherwise punt. */
+ if (! omit_invalid)
+ {
+ error (0, 0, _("illegal input sequence at position %Zd"), n);
+ return -1;
+ }
+
+ n -= curp - (unsigned char *) addr;
+
+ byte = *(curp = (unsigned char *) ++addr);
+ if (++n >= len)
+ /* All converted. */
+ return 0;
+
+ cur = tbl;
+ }
+ else
+ {
+ cur = cur->val[byte].sub;
+
+ if (++n >= len)
+ {
+ error (0, 0, _("\
+incomplete character or shift sequence at end of buffer"));
+ return -1;
+ }
+
+ byte = *++curp;
+ }
+
+ /* We found a final byte. Write the output bytes. */
+ out = cur->val[byte].out;
+ for (cnt = 0; cnt < out->nbytes; ++cnt)
+ fputc_unlocked (out->bytes[cnt], output);
+
+ addr = (char *) curp + 1;
+ ++n;
+ }
+
+ return 0;
+}
+
+
+static int
+process_fd (struct convtable *tbl, int fd, FILE *output)
+{
+ /* we have a problem with reading from a desriptor since we must not
+ provide the iconv() function an incomplete character or shift
+ sequence at the end of the buffer. Since we have to deal with
+ arbitrary encodings we must read the whole text in a buffer and
+ process it in one step. */
+ static char *inbuf = NULL;
+ static size_t maxlen = 0;
+ char *inptr = NULL;
+ size_t actlen = 0;
+
+ while (actlen < maxlen)
+ {
+ ssize_t n = read (fd, inptr, maxlen - actlen);
+
+ if (n == 0)
+ /* No more text to read. */
+ break;
+
+ if (n == -1)
+ {
+ /* Error while reading. */
+ error (0, errno, _("error while reading the input"));
+ return -1;
+ }
+
+ inptr += n;
+ actlen += n;
+ }
+
+ if (actlen == maxlen)
+ while (1)
+ {
+ ssize_t n;
+
+ /* Increase the buffer. */
+ maxlen += 32768;
+ inbuf = realloc (inbuf, maxlen);
+ if (inbuf == NULL)
+ error (0, errno, _("unable to allocate buffer for input"));
+ inptr = inbuf + actlen;
+
+ do
+ {
+ n = read (fd, inptr, maxlen - actlen);
+
+ if (n == 0)
+ /* No more text to read. */
+ break;
+
+ if (n == -1)
+ {
+ /* Error while reading. */
+ error (0, errno, _("error while reading the input"));
+ return -1;
+ }
+
+ inptr += n;
+ actlen += n;
+ }
+ while (actlen < maxlen);
+
+ if (n == 0)
+ /* Break again so we leave both loops. */
+ break;
+ }
+
+ /* Now we have all the input in the buffer. Process it in one run. */
+ return process_block (tbl, inbuf, actlen, output);
+}
+
+
+static int
+process_file (struct convtable *tbl, FILE *input, FILE *output)
+{
+ /* This should be safe since we use this function only for `stdin' and
+ we haven't read anything so far. */
+ return process_fd (tbl, fileno (input), output);
+}
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
index 6b9930e..24521c0 100644
--- a/iconv/iconv_prog.c
+++ b/iconv/iconv_prog.c
@@ -36,7 +36,9 @@
#ifdef _POSIX_MAPPED_FILES
# include <sys/mman.h>
#endif
+#include <charmap.h>
#include <gconv_int.h>
+#include "iconv_prog.h"
/* Get libc version number. */
#include "../version.h"
@@ -94,13 +96,13 @@ static const char *to_code;
static const char *output_file;
/* Nonzero if verbose ouput is wanted. */
-static int verbose;
+int verbose;
/* Nonzero if list of all coded character sets is wanted. */
static int list;
/* If nonzero omit invalid character from output. */
-static int omit_invalid;
+int omit_invalid;
/* Prototypes for the functions doing the actual work. */
static int process_block (iconv_t cd, char *addr, size_t len, FILE *output);
@@ -117,6 +119,8 @@ main (int argc, char *argv[])
FILE *output;
iconv_t cd;
const char *orig_to_code;
+ struct charmap_t *from_charmap = NULL;
+ struct charmap_t *to_charmap = NULL;
/* Set locale via LC_ALL. */
setlocale (LC_ALL, "");
@@ -179,18 +183,23 @@ main (int argc, char *argv[])
to_code = newp;
}
- /* Let's see whether we have these coded character sets. */
- cd = iconv_open (to_code, from_code);
- if (cd == (iconv_t) -1)
- {
- if (errno == EINVAL)
- error (EXIT_FAILURE, 0,
- _("conversion from `%s' to `%s' not supported"),
- from_code, orig_to_code);
- else
- error (EXIT_FAILURE, errno,
- _("failed to start conversion processing"));
- }
+ /* POSIX 1003.2b introduces a silly thing: the arguments to -t anf -f
+ can be file names of charmaps. In this case iconv will have to read
+ those charmaps and use them to do the conversion. But there are
+ holes in the specification. There is nothing said that if -f is a
+ charmap filename that -t must be, too. And vice versa. There is
+ also no word about the symbolic names used. What if they don't
+ match? */
+ if (strchr (from_code, '/') != NULL)
+ /* The from-name might be a charmap file name. Try reading the
+ file. */
+ from_charmap = charmap_read (from_code, /*0, 1*/1, 0, 0);
+
+ if (strchr (orig_to_code, '/') != NULL)
+ /* The to-name might be a charmap file name. Try reading the
+ file. */
+ to_charmap = charmap_read (orig_to_code, /*0, 1,*/1,0, 0);
+
/* Determine output file. */
if (output_file != NULL && strcmp (output_file, "-") != 0)
@@ -202,92 +211,117 @@ main (int argc, char *argv[])
else
output = stdout;
- /* Now process the remaining files. Write them to stdout or the file
- specified with the `-o' parameter. If we have no file given as
- the parameter process all from stdin. */
- if (remaining == argc)
- {
- if (process_file (cd, stdin, output) != 0)
- status = EXIT_FAILURE;
- }
+ /* At this point we have to handle two cases. The first one is
+ where a charmap is used for the from- or to-charset, or both. We
+ handle this special since it is very different from the sane way of
+ doing things. The other case allows converting using the iconv()
+ function. */
+ if (from_charmap != NULL || to_charmap != NULL)
+ /* Construct the conversion table and do the conversion. */
+ status = charmap_conversion (from_code, from_charmap, to_code, to_charmap,
+ argc, remaining, argv, output);
else
- do
- {
- struct stat st;
- char *addr;
- int fd;
-
+ {
+ /* Let's see whether we have these coded character sets. */
+ cd = iconv_open (to_code, from_code);
+ if (cd == (iconv_t) -1)
+ {
+ if (errno == EINVAL)
+ error (EXIT_FAILURE, 0,
+ _("conversion from `%s' to `%s' not supported"),
+ from_code, orig_to_code);
+ else
+ error (EXIT_FAILURE, errno,
+ _("failed to start conversion processing"));
+ }
- if (verbose)
- printf ("%s:\n", argv[remaining]);
- if (strcmp (argv[remaining], "-") == 0)
- fd = 0;
- else
+ /* Now process the remaining files. Write them to stdout or the file
+ specified with the `-o' parameter. If we have no file given as
+ the parameter process all from stdin. */
+ if (remaining == argc)
+ {
+ if (process_file (cd, stdin, output) != 0)
+ status = EXIT_FAILURE;
+ }
+ else
+ do
{
- fd = open (argv[remaining], O_RDONLY);
-
- if (fd == -1)
+ struct stat st;
+ char *addr;
+ int fd;
+
+ if (verbose)
+ printf ("%s:\n", argv[remaining]);
+ if (strcmp (argv[remaining], "-") == 0)
+ fd = 0;
+ else
{
- error (0, errno, _("cannot open input file `%s'"),
- argv[remaining]);
- status = EXIT_FAILURE;
- continue;
+ fd = open (argv[remaining], O_RDONLY);
+
+ if (fd == -1)
+ {
+ error (0, errno, _("cannot open input file `%s'"),
+ argv[remaining]);
+ status = EXIT_FAILURE;
+ continue;
+ }
}
- }
#ifdef _POSIX_MAPPED_FILES
- /* We have possibilities for reading the input file. First try
- to mmap() it since this will provide the fastest solution. */
- if (fstat (fd, &st) == 0
- && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0))
- != MAP_FAILED))
- {
- /* Yes, we can use mmap(). The descriptor is not needed
- anymore. */
- if (close (fd) != 0)
- error (EXIT_FAILURE, errno, _("error while closing input `%s'"),
- argv[remaining]);
-
- if (process_block (cd, addr, st.st_size, output) < 0)
+ /* We have possibilities for reading the input file. First try
+ to mmap() it since this will provide the fastest solution. */
+ if (fstat (fd, &st) == 0
+ && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE,
+ fd, 0)) != MAP_FAILED))
{
- /* Something went wrong. */
- status = EXIT_FAILURE;
+ /* Yes, we can use mmap(). The descriptor is not needed
+ anymore. */
+ if (close (fd) != 0)
+ error (EXIT_FAILURE, errno,
+ _("error while closing input `%s'"),
+ argv[remaining]);
+
+ if (process_block (cd, addr, st.st_size, output) < 0)
+ {
+ /* Something went wrong. */
+ status = EXIT_FAILURE;
+
+ /* We don't need the input data anymore. */
+ munmap ((void *) addr, st.st_size);
+
+ /* We cannot go on with producing output since it might
+ lead to problem because the last output might leave
+ the output stream in an undefined state. */
+ break;
+ }
/* We don't need the input data anymore. */
munmap ((void *) addr, st.st_size);
-
- /* We cannot go on with producing output since it might
- lead to problem because the last output might leave
- the output stream in an undefined state. */
- break;
}
-
- /* We don't need the input data anymore. */
- munmap ((void *) addr, st.st_size);
- }
- else
+ else
#endif /* _POSIX_MAPPED_FILES */
- {
- /* Read the file in pieces. */
- if (process_fd (cd, fd, output) != 0)
{
- /* Something went wrong. */
- status = EXIT_FAILURE;
-
- /* We don't need the input file anymore. */
+ /* Read the file in pieces. */
+ if (process_fd (cd, fd, output) != 0)
+ {
+ /* Something went wrong. */
+ status = EXIT_FAILURE;
+
+ /* We don't need the input file anymore. */
+ close (fd);
+
+ /* We cannot go on with producing output since it might
+ lead to problem because the last output might leave
+ the output stream in an undefined state. */
+ break;
+ }
+
+ /* Now close the file. */
close (fd);
-
- /* We cannot go on with producing output since it might
- lead to problem because the last output might leave
- the output stream in an undefined state. */
- break;
}
-
- /* Now close the file. */
- close (fd);
}
- }
- while (++remaining < argc);
+ while (++remaining < argc);
+ }
/* Close the output file now. */
if (fclose (output))
@@ -402,7 +436,7 @@ conversion stopped due to problem in writing the output"));
character sets we have to flush the state now. */
outptr = outbuf;
outlen = OUTBUF_SIZE;
- n = iconv (cd, NULL, NULL, &outptr, &outlen);
+ (void) iconv (cd, NULL, NULL, &outptr, &outlen);
if (outptr != outbuf)
{
diff --git a/iconv/iconv_prog.h b/iconv/iconv_prog.h
new file mode 100644
index 0000000..dbb4a0b
--- /dev/null
+++ b/iconv/iconv_prog.h
@@ -0,0 +1,42 @@
+/* Copyright (C) 2001 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@redhat.com>, 2001.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef _ICONV_PROG_H
+#define _ICONV_PROG_H 1
+
+#include <stdio.h>
+#include <charmap.h>
+
+
+/* Nonzero if verbose ouput is wanted. */
+extern int verbose;
+
+/* If nonzero omit invalid character from output. */
+extern int omit_invalid;
+
+/* Perform the conversion using a charmap or two. */
+extern int charmap_conversion (const char *from_code,
+ struct charmap_t *from_charmap,
+ const char *to_code,
+ struct charmap_t *to_charmap,
+ int argc, int remaining, char *argv[],
+ FILE *output);
+
+
+#endif /* iconv_prog.h */