From 0c34593491e4ea2de79ae85fedb26252529b5f35 Mon Sep 17 00:00:00 2001 From: Florian Weimer Date: Tue, 17 May 2022 11:38:29 +0200 Subject: locale: localdef input files are now encoded in UTF-8 --- locale/programs/linereader.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/locale/programs/linereader.c b/locale/programs/linereader.c index ca4abb0..485ccaf 100644 --- a/locale/programs/linereader.c +++ b/locale/programs/linereader.c @@ -688,7 +688,11 @@ get_string (struct linereader *lr, const struct charmap_t *charmap, buf2 = NULL; while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF) - addc (&lrb, ch); + { + if (ch >= 0x80) + lr_error (lr, _("illegal 8-bit character in untranslated string")); + addc (&lrb, ch); + } /* Catch errors with trailing escape character. */ if (lrb.act > 0 && lrb.buf[lrb.act - 1] == lr->escape_char @@ -733,13 +737,35 @@ get_string (struct linereader *lr, const struct charmap_t *charmap, if (ch == lr->escape_char) { ch = lr_getc (lr); + if (ch >= 0x80) + { + lr_error (lr, _("illegal 8-bit escape sequence")); + illegal_string = true; + break; + } if (ch == '\n' || ch == EOF) break; } + else if (ch < 0x80) + { + wch = ch; + addc (&lrb, ch); + } + else /* UTF-8 sequence. */ + { + if (!get_string_decode_utf8 (lr, ch, &wch)) + { + illegal_string = true; + break; + } + get_string_U_char (locale, charmap, repertoire, wch, + &lrb, &illegal_string); + if (illegal_string) + break; + } - addc (&lrb, ch); if (return_widestr) - ADDWC ((uint32_t) ch); + ADDWC (wch); continue; } -- cgit v1.1