aboutsummaryrefslogtreecommitdiff
path: root/iconv
diff options
context:
space:
mode:
Diffstat (limited to 'iconv')
-rw-r--r--iconv/Makefile40
-rw-r--r--iconv/gconv_int.h30
-rw-r--r--iconv/gconv_simple.c18
-rw-r--r--iconv/gconv_trans.c4
-rw-r--r--iconv/iconv_prog.c547
-rw-r--r--iconv/loop.c5
-rw-r--r--iconv/tst-iconv-sticky-input-error.c135
-rw-r--r--iconv/tst-iconv_prog-buffer.sh306
-rw-r--r--iconv/tst-translit-locale10
-rw-r--r--iconv/tst-translit-mchar.c48
-rw-r--r--iconv/tst-translit-mchar.sh51
11 files changed, 976 insertions, 218 deletions
diff --git a/iconv/Makefile b/iconv/Makefile
index 63afc85..de9d964 100644
--- a/iconv/Makefile
+++ b/iconv/Makefile
@@ -57,6 +57,14 @@ tests = \
tst-iconv-opt \
# tests
+test-srcs := \
+ tst-translit-mchar \
+ # test-srcs
+
+tests-internal = \
+ tst-iconv-sticky-input-error \
+ # tests-internal
+
others = iconv_prog iconvconfig
install-others-programs = $(inst_bindir)/iconv
install-sbin = iconvconfig
@@ -72,7 +80,13 @@ include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left))
ifeq ($(run-built-tests),yes)
xtests-special += $(objpfx)test-iconvconfig.out
-tests-special += $(objpfx)tst-iconv_prog.out
+tests-special += \
+ $(objpfx)tst-iconv_prog-buffer-large.out \
+ $(objpfx)tst-iconv_prog-buffer-tiny.out \
+ $(objpfx)tst-iconv_prog-buffer.out \
+ $(objpfx)tst-iconv_prog.out \
+ $(objpfx)tst-translit-mchar.out \
+ # tests-special
endif
# Make a copy of the file because gconv module names are constructed
@@ -92,6 +106,8 @@ $(objpfx)tst-gconv-init-failure.out: \
$(objpfx)gconv-modules $(objpfx)tst-gconv-init-failure-mod.so
endif
+generated-dirs += tst-translit
+
include ../Rules
ifeq ($(run-built-tests),yes)
@@ -126,3 +142,25 @@ $(objpfx)tst-iconv_prog.out: tst-iconv_prog.sh $(objpfx)iconv_prog
$(BASH) $< $(common-objdir) '$(test-wrapper-env)' \
'$(run-program-env)' > $@; \
$(evaluate-test)
+
+$(objpfx)tst-translit-mchar.out: tst-translit-mchar.sh \
+ $(objpfx)tst-translit-mchar \
+ tst-translit-locale
+ $(SHELL) $< $(common-objpfx) '$(run-program-prefix-before-env)' \
+ '$(run-program-env)' '$(run-program-prefix-after-env)' \
+ $< > $@; \
+ $(evaluate-test)
+
+$(objpfx)tst-iconv_prog-buffer.out: \
+ tst-iconv_prog-buffer.sh $(objpfx)iconv_prog
+ $(BASH) $< $(common-objdir) '$(run-program-prefix)' > $@; \
+ $(evaluate-test)
+$(objpfx)tst-iconv_prog-buffer-tiny.out: \
+ tst-iconv_prog-buffer.sh $(objpfx)iconv_prog
+ $(BASH) $< $(common-objdir) '$(run-program-prefix)' \
+ '--buffer-size=1' > $@; \
+ $(evaluate-test)
+$(objpfx)tst-iconv_prog-buffer-large.out: \
+ tst-iconv_prog-buffer.sh $(objpfx)iconv_prog
+ $(BASH) $< $(common-objdir) '$(run-program-prefix)' '' '22' > $@; \
+ $(evaluate-test)
diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
index 9fece3e..cd452d9 100644
--- a/iconv/gconv_int.h
+++ b/iconv/gconv_int.h
@@ -331,4 +331,34 @@ extern wint_t __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c);
__END_DECLS
+/* Internal extensions for <gconv.h>. */
+
+/* Internal flags for __flags in struct __gconv_step_data. Overlaps
+ with flags for __gconv_open. */
+enum
+ {
+ /* The conversion encountered an illegal input character at one
+ point. */
+ __GCONV_ENCOUNTERED_ILLEGAL_INPUT = 1U << 30,
+ };
+
+/* Mark *STEP_DATA as having seen illegal input, and return
+ __GCONV_ILLEGAL_INPUT. */
+static inline int
+__gconv_mark_illegal_input (struct __gconv_step_data *step_data)
+{
+ step_data->__flags |= __GCONV_ENCOUNTERED_ILLEGAL_INPUT;
+ return __GCONV_ILLEGAL_INPUT;
+}
+
+/* Returns true if any of the conversion steps encountered illegal input. */
+static _Bool __attribute__ ((unused))
+__gconv_has_illegal_input (__gconv_t cd)
+{
+ for (size_t i = 0; i < cd->__nsteps; ++i)
+ if (cd->__data[i].__flags & __GCONV_ENCOUNTERED_ILLEGAL_INPUT)
+ return true;
+ return false;
+}
+
#endif /* gconv_int.h */
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index 257be2f..f22002c 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -207,7 +207,7 @@ ucs4_internal_loop (struct __gconv_step *step,
UCS4 does not allow such values. */
if (irreversible == NULL)
/* We are transliterating, don't try to correct anything. */
- return __GCONV_ILLEGAL_INPUT;
+ return __gconv_mark_illegal_input (step_data);
if (flags & __GCONV_IGNORE_ERRORS)
{
@@ -218,7 +218,7 @@ ucs4_internal_loop (struct __gconv_step *step,
*inptrp = inptr;
*outptrp = outptr;
- return __GCONV_ILLEGAL_INPUT;
+ return __gconv_mark_illegal_input (step_data);
}
put32 (outptr, inval);
@@ -276,7 +276,7 @@ ucs4_internal_loop_single (struct __gconv_step *step,
if (!(flags & __GCONV_IGNORE_ERRORS))
{
*inptrp -= cnt - (state->__count & 7);
- return __GCONV_ILLEGAL_INPUT;
+ return __gconv_mark_illegal_input (step_data);
}
}
else
@@ -453,7 +453,7 @@ ucs4le_internal_loop (struct __gconv_step *step,
UCS4 does not allow such values. */
if (irreversible == NULL)
/* We are transliterating, don't try to correct anything. */
- return __GCONV_ILLEGAL_INPUT;
+ return __gconv_mark_illegal_input (step_data);
if (flags & __GCONV_IGNORE_ERRORS)
{
@@ -464,7 +464,7 @@ ucs4le_internal_loop (struct __gconv_step *step,
*inptrp = inptr;
*outptrp = outptr;
- return __GCONV_ILLEGAL_INPUT;
+ return __gconv_mark_illegal_input (step_data);
}
put32 (outptr, inval);
@@ -523,7 +523,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
represent the result. This is a genuine bug in the input since
UCS4 does not allow such values. */
if (!(flags & __GCONV_IGNORE_ERRORS))
- return __GCONV_ILLEGAL_INPUT;
+ return __gconv_mark_illegal_input (step_data);
}
else
{
@@ -969,7 +969,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
surrogates pass through, attackers could make a security \
hole exploit by synthesizing any desired plane 1-16 \
character. */ \
- result = __GCONV_ILLEGAL_INPUT; \
+ result = __gconv_mark_illegal_input (step_data); \
if (! ignore_errors_p ()) \
break; \
inptr += 4; \
@@ -1012,7 +1012,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
them. (Catching this here is not security relevant.) */ \
if (! ignore_errors_p ()) \
{ \
- result = __GCONV_ILLEGAL_INPUT; \
+ result = __gconv_mark_illegal_input (step_data); \
break; \
} \
inptr += 2; \
@@ -1061,7 +1061,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
character. */ \
if (! ignore_errors_p ()) \
{ \
- result = __GCONV_ILLEGAL_INPUT; \
+ result = __gconv_mark_illegal_input (step_data); \
break; \
} \
inptr += 4; \
diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c
index 08b7a3f..54c4f3a 100644
--- a/iconv/gconv_trans.c
+++ b/iconv/gconv_trans.c
@@ -150,7 +150,7 @@ __gconv_transliterate (struct __gconv_step *step,
/* Nothing found, continue searching. */
}
- else if (cnt > 0)
+ else if (cnt > 0 && winbuf + cnt == winbufend)
/* This means that the input buffer contents matches a prefix of
an entry. Since we cannot match it unless we get more input,
we will tell the caller about it. */
@@ -232,6 +232,6 @@ __gconv_transliterate (struct __gconv_step *step,
}
/* Haven't found a match. */
- return __GCONV_ILLEGAL_INPUT;
+ return __gconv_mark_illegal_input (step_data);
}
libc_hidden_def (__gconv_transliterate)
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
index a765b1a..a2f1d34 100644
--- a/iconv/iconv_prog.c
+++ b/iconv/iconv_prog.c
@@ -31,9 +31,6 @@
#include <string.h>
#include <unistd.h>
#include <libintl.h>
-#ifdef _POSIX_MAPPED_FILES
-# include <sys/mman.h>
-#endif
#include <charmap.h>
#include <gconv_int.h>
#include "iconv_prog.h"
@@ -50,7 +47,11 @@
static void print_version (FILE *stream, struct argp_state *state);
void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
-#define OPT_VERBOSE 1000
+enum
+ {
+ OPT_VERBOSE = 1000,
+ OPT_BUFFER_SIZE,
+ };
#define OPT_LIST 'l'
/* Definitions of arguments for argp functions. */
@@ -66,6 +67,10 @@ static const struct argp_option options[] =
{ "output", 'o', N_("FILE"), 0, N_("output file") },
{ "silent", 's', NULL, 0, N_("suppress warnings") },
{ "verbose", OPT_VERBOSE, NULL, 0, N_("print progress information") },
+ /* This is an internal option intended for testing only. Very small
+ buffers do not work with all character sets. */
+ { "buffer-size", OPT_BUFFER_SIZE, N_("BYTE-COUNT"), OPTION_HIDDEN,
+ N_("size of in-memory scratch buffer") },
{ NULL, 0, NULL, 0, NULL }
};
@@ -103,13 +108,21 @@ static int list;
/* If nonzero omit invalid character from output. */
int omit_invalid;
+/* Current index in argv (after command line processing) with the
+ input file name. */
+static int current_input_file_index;
+
+/* Size of the temporary, in-memory buffer. Exceeding it needs
+ spooling to disk in a temporary file. Controlled by --buffer_size. */
+static size_t output_buffer_size = 1024 * 1024;
+
/* Prototypes for the functions doing the actual work. */
-static int process_block (iconv_t cd, char *addr, size_t len, FILE **output,
- const char *output_file);
-static int process_fd (iconv_t cd, int fd, FILE **output,
- const char *output_file);
-static int process_file (iconv_t cd, FILE *input, FILE **output,
- const char *output_file);
+static void prepare_output_file (char **argv);
+static void close_output_file (__gconv_t cd, int status);
+static int process_block (iconv_t cd, char **addr, size_t *len,
+ off64_t file_offset, bool *incomplete);
+static int process_fd (iconv_t cd, int fd);
+static int process_file (iconv_t cd, FILE *input);
static void print_known_names (void);
@@ -117,7 +130,6 @@ int
main (int argc, char *argv[])
{
int status = EXIT_SUCCESS;
- int remaining;
__gconv_t cd;
struct charmap_t *from_charmap = NULL;
struct charmap_t *to_charmap = NULL;
@@ -129,7 +141,7 @@ main (int argc, char *argv[])
textdomain (_libc_intl_domainname);
/* Parse and process arguments. */
- argp_parse (&argp, argc, argv, 0, &remaining, NULL);
+ argp_parse (&argp, argc, argv, 0, &current_input_file_index, NULL);
/* List all coded character sets if wanted. */
if (list)
@@ -164,7 +176,8 @@ main (int argc, char *argv[])
if (from_charmap != NULL || to_charmap != NULL)
/* Construct the conversion table and do the conversion. */
status = charmap_conversion (from_code, from_charmap, to_code, to_charmap,
- argc, remaining, argv, output_file);
+ argc, current_input_file_index, argv,
+ output_file);
else
{
struct gconv_spec conv_spec;
@@ -238,83 +251,45 @@ conversions from `%s' and to `%s' are not supported"),
_("failed to start conversion processing"));
}
- /* The output file. Will be opened when we are ready to produce
- output. */
- FILE *output = NULL;
+ prepare_output_file (argv);
/* Now process the remaining files. Write them to stdout or the file
specified with the `-o' parameter. If we have no file given as
the parameter process all from stdin. */
- if (remaining == argc)
+ if (current_input_file_index == argc)
{
- if (process_file (cd, stdin, &output, output_file) != 0)
+ if (process_file (cd, stdin) != 0)
status = EXIT_FAILURE;
}
else
do
{
-#ifdef _POSIX_MAPPED_FILES
- struct stat64 st;
- char *addr;
-#endif
int fd, ret;
if (verbose)
- fprintf (stderr, "%s:\n", argv[remaining]);
- if (strcmp (argv[remaining], "-") == 0)
- fd = 0;
+ fprintf (stderr, "%s:\n", argv[current_input_file_index]);
+ if (strcmp (argv[current_input_file_index], "-") == 0)
+ fd = STDIN_FILENO;
else
{
- fd = open (argv[remaining], O_RDONLY);
+ fd = open (argv[current_input_file_index], O_RDONLY);
if (fd == -1)
{
error (0, errno, _("cannot open input file `%s'"),
- argv[remaining]);
+ argv[current_input_file_index]);
status = EXIT_FAILURE;
continue;
}
}
-#ifdef _POSIX_MAPPED_FILES
- /* We have possibilities for reading the input file. First try
- to mmap() it since this will provide the fastest solution. */
- if (fstat64 (fd, &st) == 0
- && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE,
- fd, 0)) != MAP_FAILED))
- {
- /* Yes, we can use mmap(). The descriptor is not needed
- anymore. */
- if (close (fd) != 0)
- error (EXIT_FAILURE, errno,
- _("error while closing input `%s'"),
- argv[remaining]);
-
- ret = process_block (cd, addr, st.st_size, &output,
- output_file);
-
- /* We don't need the input data anymore. */
- munmap ((void *) addr, st.st_size);
-
- if (ret != 0)
- {
- status = EXIT_FAILURE;
-
- if (ret < 0)
- /* We cannot go on with producing output since it might
- lead to problem because the last output might leave
- the output stream in an undefined state. */
- break;
- }
- }
- else
-#endif /* _POSIX_MAPPED_FILES */
{
/* Read the file in pieces. */
- ret = process_fd (cd, fd, &output, output_file);
+ ret = process_fd (cd, fd);
/* Now close the file. */
- close (fd);
+ if (fd != STDIN_FILENO)
+ close (fd);
if (ret != 0)
{
@@ -329,11 +304,15 @@ conversions from `%s' and to `%s' are not supported"),
}
}
}
- while (++remaining < argc);
+ while (++current_input_file_index < argc);
+
+ /* Ensure that iconv -c still exits with failure if iconv (the
+ function) has failed with E2BIG instead of EILSEQ. */
+ if (__gconv_has_illegal_input (cd))
+ status = EXIT_FAILURE;
/* Close the output file now. */
- if (output != NULL && fclose (output))
- error (EXIT_FAILURE, errno, _("error while closing output file"));
+ close_output_file (cd, status);
}
return status;
@@ -363,6 +342,14 @@ parse_opt (int key, char *arg, struct argp_state *state)
/* Omit invalid characters from output. */
omit_invalid = 1;
break;
+ case OPT_BUFFER_SIZE:
+ {
+ int i = atoi (arg);
+ if (i <= 0)
+ error (EXIT_FAILURE, 0, _("invalid buffer size: %s"), arg);
+ output_buffer_size = i;
+ }
+ break;
case OPT_VERBOSE:
verbose = 1;
break;
@@ -409,115 +396,299 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
}
+/* Command line index of the last input file that overlaps with the
+ output file. Zero means no temporary file is ever required. */
+static int last_overlapping_file_index;
-static int
-write_output (const char *outbuf, const char *outptr, FILE **output,
- const char *output_file)
+/* This is set to true if the output is written to a temporary file. */
+static bool output_using_temporary_file;
+
+/* This is the file descriptor that will be used by write_output. */
+static int output_fd = -1;
+
+/* Pointers at the start and end of the fixed-size output buffer. */
+static char *output_buffer_start;
+
+/* Current write position in the output buffer. */
+static char *output_buffer_current;
+
+/* Remaining bytes after output_buffer_current in the output buffer. */
+static size_t output_buffer_remaining;
+
+
+/* Reduce the buffer size when writing directly to the output file, to
+ reduce cache utilization. */
+static size_t copy_buffer_size = BUFSIZ;
+
+static void
+output_error (void)
{
- /* We have something to write out. */
- int errno_save = errno;
+ error (EXIT_FAILURE, errno, _("cannot open output file"));
+}
- if (*output == NULL)
+static void
+input_error (const char *path)
+{
+ error (0, errno, _("cannot open input file `%s'"), path);
+}
+
+/* Opens output_file for writing, truncating it. */
+static void
+open_output_direct (void)
+{
+ output_fd = open64 (output_file, O_WRONLY | O_CREAT | O_TRUNC, 0777);
+ if (output_fd < 0)
+ output_error ();
+}
+
+static void
+prepare_output_file (char **argv)
+{
+ if (copy_buffer_size > output_buffer_size)
+ copy_buffer_size = output_buffer_size;
+
+ if (output_file == NULL || strcmp (output_file, "-") == 0)
{
- /* Determine output file. */
- if (output_file != NULL && strcmp (output_file, "-") != 0)
+ /* No buffering is required when writing to standard output
+ because input overlap is expected to be solved externally. */
+ output_fd = STDOUT_FILENO;
+ output_buffer_size = copy_buffer_size;
+ }
+ else
+ {
+ /* If iconv creates the output file, no overlap is possible. */
+ output_fd = open64 (output_file, O_WRONLY | O_CREAT | O_EXCL, 0777);
+ if (output_fd >= 0)
+ output_buffer_size = copy_buffer_size;
+ else
{
- *output = fopen (output_file, "w");
- if (*output == NULL)
- error (EXIT_FAILURE, errno, _("cannot open output file"));
+ /* Otherwise, check if any of the input files overlap with the
+ output file. */
+ struct statx st;
+ if (statx (AT_FDCWD, output_file, 0, STATX_INO | STATX_MODE, &st)
+ != 0)
+ output_error ();
+ uint32_t out_dev_minor = st.stx_dev_minor;
+ uint32_t out_dev_major = st.stx_dev_major;
+ uint64_t out_ino = st.stx_ino;
+
+ int idx = current_input_file_index;
+ while (true)
+ {
+ /* Special case: no input files means standard input. */
+ if (argv[idx] == NULL && idx != current_input_file_index)
+ break;
+
+ int ret;
+ if (argv[idx] == NULL || strcmp (argv[idx], "-") == 0)
+ ret = statx (STDIN_FILENO, "", AT_EMPTY_PATH, STATX_INO, &st);
+ else
+ ret = statx (AT_FDCWD, argv[idx], 0, STATX_INO, &st);
+ if (ret != 0)
+ {
+ input_error (argv[idx]);
+ exit (EXIT_FAILURE);
+ }
+ if (out_dev_minor == st.stx_dev_minor
+ && out_dev_major == st.stx_dev_major
+ && out_ino == st.stx_ino)
+ {
+ if (argv[idx] == NULL)
+ /* Corner case: index of NULL would be larger than
+ idx while converting, triggering a switch away
+ from the temporary file. */
+ last_overlapping_file_index = INT_MAX;
+ else
+ last_overlapping_file_index = idx;
+ }
+
+ if (argv[idx] == NULL)
+ break;
+ ++idx;
+ }
+
+ /* If there is no overlap, avoid using a temporary file. */
+ if (last_overlapping_file_index == 0)
+ {
+ open_output_direct ();
+ output_buffer_size = copy_buffer_size;
+ }
}
- else
- *output = stdout;
}
- if (fwrite (outbuf, 1, outptr - outbuf, *output) < (size_t) (outptr - outbuf)
- || ferror (*output))
+ output_buffer_start = malloc (output_buffer_size);
+ if (output_buffer_start == NULL)
+ output_error ();
+ output_buffer_current = output_buffer_start;
+ output_buffer_remaining = output_buffer_size;
+}
+
+/* Write out the range [first, last), terminating the process on write
+ error. */
+static void
+write_fully (int fd, const char *first, const char *last)
+{
+ while (first < last)
{
- /* Error occurred while printing the result. */
- error (0, 0, _("\
+ ssize_t ret = write (fd, first, last - first);
+ if (ret == 0)
+ {
+ errno = ENOSPC;
+ output_error ();
+ }
+ if (ret < 0)
+ error (EXIT_FAILURE, errno, _("\
conversion stopped due to problem in writing the output"));
- return -1;
+ first += ret;
+ }
+}
+
+static void
+flush_output (void)
+{
+ bool temporary_file_not_needed
+ = current_input_file_index > last_overlapping_file_index;
+ if (output_fd < 0)
+ {
+ if (temporary_file_not_needed)
+ open_output_direct ();
+ else
+ {
+ /* Create an anonymous temporary file. */
+ FILE *fp = tmpfile ();
+ if (fp == NULL)
+ output_error ();
+ output_fd = dup (fileno (fp));
+ if (output_fd < 0)
+ output_error ();
+ fclose (fp);
+ output_using_temporary_file = true;
+ }
+ /* Either way, no longer use a memory-only staging buffer. */
+ output_buffer_size = copy_buffer_size;
}
+ else if (output_using_temporary_file && temporary_file_not_needed)
+ {
+ /* The temporary file is no longer needed. Switch to direct
+ output, replacing output_fd. */
+ int temp_fd = output_fd;
+ open_output_direct ();
+
+ /* Copy over the data spooled to the temporary file. */
+ if (lseek (temp_fd, 0, SEEK_SET) < 0)
+ output_error ();
+ while (true)
+ {
+ char buf[BUFSIZ];
+ ssize_t ret = read (temp_fd, buf, sizeof (buf));
+ if (ret < 0)
+ output_error ();
+ if (ret == 0)
+ break;
+ write_fully (output_fd, buf, buf + ret);
+ }
+ close (temp_fd);
- errno = errno_save;
+ /* No longer using a temporary file from now on. */
+ output_using_temporary_file = false;
+ output_buffer_size = copy_buffer_size;
+ }
- return 0;
+ write_fully (output_fd, output_buffer_start, output_buffer_current);
+ output_buffer_current = output_buffer_start;
+ output_buffer_remaining = output_buffer_size;
}
+static void
+close_output_file (__gconv_t cd, int status)
+{
+ /* Do not perform a flush if a temporary file or the in-memory
+ buffer is in use and there was an error. It would clobber the
+ overlapping input file. */
+ if (status != EXIT_SUCCESS && !omit_invalid &&
+ (output_using_temporary_file || output_fd < 0))
+ return;
+
+ /* All the input text is processed. For state-dependent character
+ sets we have to flush the state now.
+
+ The current_input_file_index variable is now larger than
+ last_overlapping_file_index, so the flush_output calls switch
+ away from the temporary file. */
+ size_t n = iconv (cd, NULL, NULL,
+ &output_buffer_current, &output_buffer_remaining);
+ if (n == (size_t) -1 && errno == E2BIG)
+ {
+ /* Try again if the state flush exceeded the buffer space. */
+ flush_output ();
+ n = iconv (cd, NULL, NULL,
+ &output_buffer_current, &output_buffer_remaining);
+ }
+ int saved_errno = errno;
+ flush_output ();
+ if (n == (size_t) -1 && !omit_invalid)
+ {
+ errno = saved_errno;
+ output_error ();
+ }
+
+ if (output_fd == STDOUT_FILENO)
+ {
+ /* Close standard output in safe manner, to report certain
+ ENOSPC errors. */
+ output_fd = dup (output_fd);
+ if (output_fd < 0)
+ output_error ();
+ }
+ if (close (output_fd) < 0)
+ output_error ();
+}
+/* CD is the iconv handle. Input processing starts at *ADDR, and
+ consumes upto *LEN bytes. *ADDR and *LEN are updated. FILE_OFFSET
+ is the file offset of the data initially at ADDR. *INCOMPLETE is
+ set to true if conversion stops due to an incomplete input
+ sequence. */
static int
-process_block (iconv_t cd, char *addr, size_t len, FILE **output,
- const char *output_file)
+process_block (iconv_t cd, char **addr, size_t *len, off64_t file_offset,
+ bool *incomplete)
{
-#define OUTBUF_SIZE 32768
- const char *start = addr;
- char outbuf[OUTBUF_SIZE];
- char *outptr;
- size_t outlen;
+ const char *start = *addr;
size_t n;
int ret = 0;
- while (len > 0)
+ while (*len > 0)
{
- outptr = outbuf;
- outlen = OUTBUF_SIZE;
- n = iconv (cd, &addr, &len, &outptr, &outlen);
+ n = iconv (cd, addr, len,
+ &output_buffer_current, &output_buffer_remaining);
if (n == (size_t) -1 && omit_invalid && errno == EILSEQ)
{
ret = 1;
- if (len == 0)
+ if (*len == 0)
n = 0;
else
errno = E2BIG;
}
- if (outptr != outbuf)
- {
- ret = write_output (outbuf, outptr, output, output_file);
- if (ret != 0)
- break;
- }
-
if (n != (size_t) -1)
- {
- /* All the input test is processed. For state-dependent
- character sets we have to flush the state now. */
- outptr = outbuf;
- outlen = OUTBUF_SIZE;
- n = iconv (cd, NULL, NULL, &outptr, &outlen);
-
- if (outptr != outbuf)
- {
- ret = write_output (outbuf, outptr, output, output_file);
- if (ret != 0)
- break;
- }
-
- if (n != (size_t) -1)
- break;
-
- if (omit_invalid && errno == EILSEQ)
- {
- ret = 1;
- break;
- }
- }
+ break;
- if (errno != E2BIG)
+ if (errno == E2BIG)
+ flush_output ();
+ else
{
/* iconv() ran into a problem. */
switch (errno)
{
case EILSEQ:
if (! omit_invalid)
- error (0, 0, _("illegal input sequence at position %ld"),
- (long int) (addr - start));
+ error (0, 0, _("illegal input sequence at position %lld"),
+ (long long int) (file_offset + (*addr - start)));
break;
case EINVAL:
- error (0, 0, _("\
-incomplete character or shift sequence at end of buffer"));
- break;
+ *incomplete = true;
+ return ret;
case EBADF:
error (0, 0, _("internal error (illegal descriptor)"));
break;
@@ -535,90 +706,60 @@ incomplete character or shift sequence at end of buffer"));
static int
-process_fd (iconv_t cd, int fd, FILE **output, const char *output_file)
+process_fd (iconv_t cd, int fd)
{
- /* we have a problem with reading from a descriptor since we must not
- provide the iconv() function an incomplete character or shift
- sequence at the end of the buffer. Since we have to deal with
- arbitrary encodings we must read the whole text in a buffer and
- process it in one step. */
- static char *inbuf = NULL;
- static size_t maxlen = 0;
- char *inptr = NULL;
- size_t actlen = 0;
-
- while (actlen < maxlen)
+ char inbuf[BUFSIZ];
+ char *inbuf_end = inbuf + sizeof (inbuf);
+ size_t inbuf_used = 0;
+ off64_t file_offset = 0;
+ int status = 0;
+ bool incomplete = false;
+
+ while (true)
{
- ssize_t n = read (fd, inptr, maxlen - actlen);
-
- if (n == 0)
- /* No more text to read. */
- break;
-
- if (n == -1)
+ char *p = inbuf + inbuf_used;
+ ssize_t read_ret = read (fd, p, inbuf_end - p);
+ if (read_ret == 0)
+ {
+ /* On EOF, check if the previous iconv invocation saw an
+ incomplete sequence. */
+ if (incomplete)
+ {
+ error (0, 0, _("\
+incomplete character or shift sequence at end of buffer"));
+ return 1;
+ }
+ return 0;
+ }
+ if (read_ret < 0)
{
- /* Error while reading. */
error (0, errno, _("error while reading the input"));
return -1;
}
-
- inptr += n;
- actlen += n;
+ inbuf_used += read_ret;
+ incomplete = false;
+ p = inbuf;
+ int ret = process_block (cd, &p, &inbuf_used, file_offset, &incomplete);
+ if (ret != 0)
+ {
+ status = ret;
+ if (ret < 0)
+ break;
+ }
+ /* The next loop iteration consumes the leftover bytes. */
+ memmove (inbuf, p, inbuf_used);
+ file_offset += read_ret - inbuf_used;
}
-
- if (actlen == maxlen)
- while (1)
- {
- ssize_t n;
- char *new_inbuf;
-
- /* Increase the buffer. */
- new_inbuf = (char *) realloc (inbuf, maxlen + 32768);
- if (new_inbuf == NULL)
- {
- error (0, errno, _("unable to allocate buffer for input"));
- return -1;
- }
- inbuf = new_inbuf;
- maxlen += 32768;
- inptr = inbuf + actlen;
-
- do
- {
- n = read (fd, inptr, maxlen - actlen);
-
- if (n == 0)
- /* No more text to read. */
- break;
-
- if (n == -1)
- {
- /* Error while reading. */
- error (0, errno, _("error while reading the input"));
- return -1;
- }
-
- inptr += n;
- actlen += n;
- }
- while (actlen < maxlen);
-
- if (n == 0)
- /* Break again so we leave both loops. */
- break;
- }
-
- /* Now we have all the input in the buffer. Process it in one run. */
- return process_block (cd, inbuf, actlen, output, output_file);
+ return status;
}
static int
-process_file (iconv_t cd, FILE *input, FILE **output, const char *output_file)
+process_file (iconv_t cd, FILE *input)
{
/* This should be safe since we use this function only for `stdin' and
we haven't read anything so far. */
- return process_fd (cd, fileno (input), output, output_file);
+ return process_fd (cd, fileno (input));
}
diff --git a/iconv/loop.c b/iconv/loop.c
index 5340daf..199fb28 100644
--- a/iconv/loop.c
+++ b/iconv/loop.c
@@ -123,8 +123,7 @@
`continue' must reach certain points. */
#define STANDARD_FROM_LOOP_ERR_HANDLER(Incr) \
{ \
- result = __GCONV_ILLEGAL_INPUT; \
- \
+ result = __gconv_mark_illegal_input (step_data); \
if (! ignore_errors_p ()) \
break; \
\
@@ -142,7 +141,7 @@
points. */
#define STANDARD_TO_LOOP_ERR_HANDLER(Incr) \
{ \
- result = __GCONV_ILLEGAL_INPUT; \
+ result = __gconv_mark_illegal_input (step_data); \
\
if (irreversible == NULL) \
/* This means we are in call from __gconv_transliterate. In this \
diff --git a/iconv/tst-iconv-sticky-input-error.c b/iconv/tst-iconv-sticky-input-error.c
new file mode 100644
index 0000000..34a245f
--- /dev/null
+++ b/iconv/tst-iconv-sticky-input-error.c
@@ -0,0 +1,135 @@
+/* Test __GCONV_ENCOUNTERED_ILLEGAL_INPUT, as used by iconv -c (bug 32046).
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+
+#include <array_length.h>
+#include <errno.h>
+#include <gconv_int.h>
+#include <iconv.h>
+#include <stdbool.h>
+#include <support/check.h>
+#include <support/support.h>
+#include <support/test-driver.h>
+#include <stdio.h>
+
+/* FROM is the input character set, TO the output character set. If
+ IGNORE is true, the iconv descriptor is set up in the same way as
+ iconv -c would. INPUT is the input string, EXPECTED_OUTPUT the
+ output. OUTPUT_LIMIT is a byte count, specifying how many input
+ bytes are passed to the iconv function on each invocation. */
+static void
+one_direction (const char *from, const char *to, bool ignore,
+ const char *input, const char *expected_output,
+ size_t output_limit)
+{
+ if (test_verbose)
+ {
+ char *quoted_input = support_quote_string (input);
+ char *quoted_output = support_quote_string (expected_output);
+ printf ("info: testing from=\"%s\" to=\"%s\" ignore=%d input=\"%s\""
+ " expected_output=\"%s\" output_limit=%zu\n",
+ from, to, (int) ignore, quoted_input,
+ quoted_output, output_limit);
+ free (quoted_output);
+ free (quoted_input);
+ }
+
+ __gconv_t cd;
+ if (ignore)
+ {
+ struct gconv_spec conv_spec;
+ TEST_VERIFY_EXIT (__gconv_create_spec (&conv_spec, from, to)
+ == &conv_spec);
+ conv_spec.ignore = true;
+ cd = (iconv_t) -1;
+ TEST_COMPARE (__gconv_open (&conv_spec, &cd, 0), __GCONV_OK);
+ __gconv_destroy_spec (&conv_spec);
+ }
+ else
+ cd = iconv_open (to, from);
+ TEST_VERIFY_EXIT (cd != (iconv_t) -1);
+
+ char *input_ptr = (char *) input;
+ size_t input_len = strlen (input);
+ char output_buf[20];
+ char *output_ptr = output_buf;
+ size_t output_len;
+ do
+ {
+ output_len = array_end (output_buf) - output_ptr;
+ if (output_len > output_limit)
+ /* Limit the buffer size as requested by the caller. */
+ output_len = output_limit;
+ TEST_VERIFY_EXIT (output_len > 0);
+ if (input_len == 0)
+ /* Trigger final flush. */
+ input_ptr = NULL;
+ char *old_input_ptr = input_ptr;
+ size_t ret = iconv (cd, &input_ptr, &input_len,
+ &output_ptr, &output_len);
+ if (ret == (size_t) -1)
+ {
+ if (errno != EILSEQ)
+ TEST_COMPARE (errno, E2BIG);
+ }
+
+ if (input_ptr == old_input_ptr)
+ /* Avoid endless loop if stuck on an invalid input character. */
+ break;
+ }
+ while (input_ptr != NULL);
+
+ /* Test the sticky illegal input bit. */
+ TEST_VERIFY (__gconv_has_illegal_input (cd));
+
+ TEST_COMPARE_BLOB (expected_output, strlen (expected_output),
+ output_buf, output_ptr - output_buf);
+
+ TEST_COMPARE (iconv_close (cd), 0);
+}
+
+static int
+do_test (void)
+{
+ static const char charsets[][14] =
+ {
+ "ASCII",
+ "ASCII//IGNORE",
+ "UTF-8",
+ "UTF-8//IGNORE",
+ };
+
+ for (size_t from_idx = 0; from_idx < array_length (charsets); ++from_idx)
+ for (size_t to_idx = 0; to_idx < array_length (charsets); ++to_idx)
+ for (int do_ignore = 0; do_ignore < 2; ++do_ignore)
+ for (int limit = 1; limit < 5; ++limit)
+ for (int skip = 0; skip < 3; ++skip)
+ {
+ const char *expected_output;
+ if (do_ignore || strstr (charsets[to_idx], "//IGNORE") != NULL)
+ expected_output = "ABXY" + skip;
+ else
+ expected_output = "AB" + skip;
+ one_direction (charsets[from_idx], charsets[to_idx], do_ignore,
+ "AB\xffXY" + skip, expected_output, limit);
+ }
+
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh
new file mode 100644
index 0000000..23098ac
--- /dev/null
+++ b/iconv/tst-iconv_prog-buffer.sh
@@ -0,0 +1,306 @@
+#!/bin/bash
+# Test for iconv (the program) buffer management.
+# Copyright (C) 2024 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+# Arguments:
+# root of the build tree ($(objpfx-common))
+# test command wrapper (for running on the board/with new ld.so)
+# extra flags to pass to iconv
+# number of times to double the input files in size (default: 0)
+
+exec 2>&1
+set -e
+
+exec {logfd}>&1
+
+codir=$1
+test_program_prefix="$2"
+
+# Use internal converters to avoid issues with module loading.
+iconv_args="-f ASCII -t UTF-8 $3"
+
+file_size_doublings=${4-0}
+
+failure=false
+
+tmp=`mktemp -d`
+trap 'rm -rf "$tmp"' 0
+echo ABC > "$tmp/abc"
+echo DEF > "$tmp/def"
+echo GGG > "$tmp/ggg"
+echo HH > "$tmp/hh"
+echo XY > "$tmp/xy"
+echo ZT > "$tmp/zt"
+echo OUT > "$tmp/out-template"
+: > "$tmp/empty"
+printf '\xff' > "$tmp/0xff"
+
+# Length should be a prime number, to help with buffer alignment testing.
+printf '\xc3\xa4\xe2\x80\x94\xe2\x80\x94\xc3\xa4\n' > "$tmp/utf8-sequence"
+
+# Double all files to produce larger buffers.
+for p in "$tmp"/* ; do
+ i=0
+ while test $i -lt $file_size_doublings; do
+ cat "$p" "$p" > "$tmp/scratch"
+ mv "$tmp/scratch" "$p"
+ i=$(($i + 1))
+ done
+done
+
+cat "$tmp/xy" "$tmp/0xff" "$tmp/zt" > "$tmp/0xff-wrapped"
+
+run_iconv () {
+ local c=0
+ if test "${FUNCNAME[2]}" = main; then
+ c=1
+ fi
+ echo "${BASH_SOURCE[$c]}:${BASH_LINENO[$c]}: iconv $iconv_args $@" >&$logfd
+ $test_program_prefix $codir/iconv/iconv_prog $iconv_args "$@"
+}
+
+check_out_expected () {
+ if ! cmp -s "$tmp/out" "$tmp/expected" ; then
+ echo "error: iconv output difference" >&$logfd
+ echo "*** expected ***" >&$logfd
+ cat "$tmp/expected" >&$logfd
+ echo "*** actual ***" >&$logfd
+ cat "$tmp/out" >&$logfd
+ failure=true
+ fi
+}
+
+expect_files () {
+ local f
+ ! test -z "$1"
+ cp "$tmp/$1" "$tmp/expected"
+ shift
+ for f in "$@" ; do
+ cat "$tmp/$f" >> "$tmp/expected"
+ done
+ check_out_expected
+}
+
+check_out () {
+ cat > "$tmp/expected"
+ check_out_expected
+}
+
+expect_exit () {
+ local expected=$1
+ shift
+ # Prevent failure for stopping the script.
+ if "$@" ; then
+ actual=$?
+ else
+ actual=$?
+ fi
+ if test "$actual" -ne "$expected"; then
+ echo "error: expected exit status $expected, not $actual" >&$logfd
+ exit 1
+ fi
+}
+
+ignore_failure () {
+ set +e
+ "$@"
+ status=$?
+ set -e
+}
+
+# Concatentation test.
+run_iconv -o "$tmp/out" "$tmp/abc" "$tmp/def"
+expect_files abc def
+
+# Single-file in-place conversion.
+run_iconv -o "$tmp/out" "$tmp/out"
+expect_files abc def
+
+# Multiple input files with in-place conversion.
+
+run_iconv -o "$tmp/out" "$tmp/out" "$tmp/abc"
+expect_files abc def abc
+
+run_iconv -o "$tmp/out" "$tmp/ggg" "$tmp/out"
+expect_files ggg abc def abc
+
+run_iconv -o "$tmp/out" "$tmp/hh" "$tmp/out" "$tmp/hh"
+expect_files hh ggg abc def abc hh
+
+cp "$tmp/out-template" "$tmp/out"
+run_iconv -o "$tmp/out" "$tmp/ggg" "$tmp/out" "$tmp/out" "$tmp/ggg"
+expect_files ggg out-template out-template ggg
+
+cp "$tmp/out-template" "$tmp/out"
+run_iconv -o "$tmp/out" "$tmp/ggg" "$tmp/out" "$tmp/hh" "$tmp/out" "$tmp/ggg"
+expect_files ggg out-template hh out-template ggg
+
+# Empty output should truncate the output file if exists.
+
+cp "$tmp/out-template" "$tmp/out"
+run_iconv -o "$tmp/out" </dev/null
+expect_files empty
+
+cp "$tmp/out-template" "$tmp/out"
+run_iconv -o "$tmp/out" - </dev/null
+expect_files empty
+
+cp "$tmp/out-template" "$tmp/out"
+run_iconv -o "$tmp/out" /dev/null
+expect_files empty
+
+cp "$tmp/out-template" "$tmp/out"
+expect_exit 1 run_iconv -c -o "$tmp/out" "$tmp/0xff"
+expect_files empty
+
+# But not if we are writing to standard output.
+
+cp "$tmp/out-template" "$tmp/out"
+run_iconv </dev/null >>"$tmp/out"
+expect_files out-template
+
+cp "$tmp/out-template" "$tmp/out"
+run_iconv - </dev/null >>"$tmp/out"
+expect_files out-template
+
+cp "$tmp/out-template" "$tmp/out"
+run_iconv /dev/null >>"$tmp/out"
+expect_files out-template
+
+# Conversion errors should avoid clobbering an existing file if
+# it is also an input file.
+
+cp "$tmp/0xff" "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" "$tmp/out"
+expect_files 0xff
+
+cp "$tmp/0xff" "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" < "$tmp/out"
+expect_files 0xff
+
+cp "$tmp/0xff" "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" - < "$tmp/out"
+expect_files 0xff
+
+cp "$tmp/0xff-wrapped" "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" "$tmp/out"
+expect_files 0xff-wrapped
+
+cp "$tmp/0xff-wrapped" "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" < "$tmp/out"
+expect_files 0xff-wrapped
+
+cp "$tmp/0xff-wrapped" "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" - < "$tmp/out"
+expect_files 0xff-wrapped
+
+cp "$tmp/0xff-wrapped" "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" "$tmp/out"
+expect_files 0xff-wrapped
+
+cp "$tmp/0xff-wrapped" "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" - < "$tmp/out"
+expect_files 0xff-wrapped
+
+# If errors are ignored, the file should be overwritten.
+
+cp "$tmp/0xff-wrapped" "$tmp/out"
+expect_exit 1 run_iconv -c -o "$tmp/out" "$tmp/out"
+expect_files xy zt
+
+cp "$tmp/0xff" "$tmp/out"
+expect_exit 1 run_iconv -c -o "$tmp/out" "$tmp/abc" "$tmp/out" "$tmp/def"
+expect_files abc def
+
+cp "$tmp/out-template" "$tmp/out"
+expect_exit 1 \
+ run_iconv -c -o "$tmp/out" "$tmp/abc" "$tmp/0xff" "$tmp/def" 2>"$tmp/err"
+! test -s "$tmp/err"
+expect_files abc def
+
+cp "$tmp/out-template" "$tmp/out"
+expect_exit 1 run_iconv -c -o "$tmp/out" \
+ "$tmp/abc" "$tmp/0xff-wrapped" "$tmp/def" 2>"$tmp/err"
+! test -s "$tmp/err"
+expect_files abc xy zt def
+
+cp "$tmp/0xff-wrapped" "$tmp/out"
+expect_exit 1 run_iconv -c -o "$tmp/out" "$tmp/out" "$tmp/abc" "$tmp/out" "$tmp/def"
+expect_files xy zt abc xy zt def
+
+cp "$tmp/0xff-wrapped" "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" \
+ "$tmp/out" "$tmp/abc" "$tmp/out" "$tmp/def"
+expect_files 0xff-wrapped
+
+cp "$tmp/0xff-wrapped" "$tmp/out"
+expect_exit 1 run_iconv -c -o "$tmp/out" \
+ "$tmp/abc" "$tmp/out" "$tmp/def" "$tmp/out"
+expect_files abc xy zt def xy zt
+
+# If the file does not exist yet, it should not be created on error.
+
+rm "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" "$tmp/0xff"
+! test -e "$tmp/out"
+
+expect_exit 1 run_iconv -o "$tmp/out" < "$tmp/0xff"
+! test -e "$tmp/out"
+
+expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" "$tmp/0xff" "$tmp/def"
+! test -e "$tmp/out"
+
+expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" - < "$tmp/0xff" "$tmp/def"
+! test -e "$tmp/out"
+
+# Listing standard input multiple times should not fail (bug 32050).
+
+run_iconv -o "$tmp/out" "$tmp/xy" - - "$tmp/zt" < "$tmp/abc"
+expect_files xy abc zt
+
+# NB: Extra iconv args are ignored after this point. Actual
+# multi-byte conversion does not work with tiny buffers.
+iconv_args="-f UTF-8 -t ASCII"
+
+printf 'x\n\xc3' > "$tmp/incomplete"
+expect_exit 1 run_iconv -o "$tmp/out" "$tmp/incomplete"
+check_out <<EOF
+x
+EOF
+
+# Test buffering behavior if the buffer ends with an incomplete
+# multi-byte sequence.
+prefix=""
+prefix_length=0
+while test $prefix_length -lt 12; do
+ echo "info: testing prefix length $prefix_length" 2>&$logfd
+ printf "%s" "$prefix" > "$tmp/prefix"
+ cat "$tmp/prefix" "$tmp/utf8-sequence" > "$tmp/tmp"
+ iconv_args="-f UTF-8 -t UCS-4"
+ run_iconv -o "$tmp/out1" "$tmp/tmp"
+ iconv_args="-f UCS-4 -t UTF-8"
+ run_iconv -o "$tmp/out" "$tmp/out1"
+ expect_files prefix utf8-sequence
+
+ prefix="$prefix@"
+ prefix_length=$(($prefix_length + 1))
+done
+
+if $failure ; then
+ exit 1
+fi
diff --git a/iconv/tst-translit-locale b/iconv/tst-translit-locale
new file mode 100644
index 0000000..712b086
--- /dev/null
+++ b/iconv/tst-translit-locale
@@ -0,0 +1,10 @@
+# Test multi-character transliteration rule
+
+LC_CTYPE
+copy "POSIX"
+
+translit_start
+"ÄÄ" "AA"
+translit_end
+
+END LC_CTYPE
diff --git a/iconv/tst-translit-mchar.c b/iconv/tst-translit-mchar.c
new file mode 100644
index 0000000..7d432ea
--- /dev/null
+++ b/iconv/tst-translit-mchar.c
@@ -0,0 +1,48 @@
+/* Test multi-character transliterations.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <locale.h>
+#include <iconv.h>
+#include <support/support.h>
+#include <support/check.h>
+
+static int
+do_test (void)
+{
+ iconv_t cd;
+ /* An input sequence that shares a common prefix with a transliteration
+ rule. */
+ char input[] = "ÄÅ";
+ char *inptr = input;
+ char outbuf[10];
+ char *outptr = outbuf;
+ size_t inlen = sizeof (input), outlen = sizeof (outbuf);
+ size_t n;
+
+ xsetlocale (LC_CTYPE, "tst-translit");
+
+ cd = iconv_open ("ASCII//TRANSLIT", "UTF-8");
+ TEST_VERIFY (cd != (iconv_t) -1);
+
+ /* This call used to loop infinitely. */
+ n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
+ TEST_VERIFY (iconv_close (cd) == 0);
+ return n == 0;
+}
+
+#include <support/test-driver.c>
diff --git a/iconv/tst-translit-mchar.sh b/iconv/tst-translit-mchar.sh
new file mode 100644
index 0000000..ab7a7f8
--- /dev/null
+++ b/iconv/tst-translit-mchar.sh
@@ -0,0 +1,51 @@
+#!/bin/sh
+# Testing of multi-character transliterations
+# Copyright (C) 2024 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+set -e
+
+common_objpfx=$1
+run_program_prefix_before_env=$2
+run_program_env=$3
+run_program_prefix_after_env=$4
+
+# Generate data files.
+# The locale only defines the LC_CTYPE category, so we expect a failure
+# due to warnings.
+ret=0
+${run_program_prefix_before_env} \
+${run_program_env} \
+I18NPATH=../localedata \
+${run_program_prefix_after_env} ${common_objpfx}locale/localedef \
+--quiet -i tst-translit-locale -f UTF-8 ${common_objpfx}iconv/tst-translit || ret=$?
+if [ $ret -gt 1 ]; then
+ echo "FAIL: Locale compilation for tst-translit-locale failed (error $ret)."
+ exit 1
+fi
+
+set -x
+
+# Run the test.
+${run_program_prefix_before_env} \
+${run_program_env} \
+LOCPATH=${common_objpfx}iconv \
+${run_program_prefix_after_env} ${common_objpfx}iconv/tst-translit-mchar
+
+# Local Variables:
+# mode:shell-script
+# End: