diff options
author | Heinrich Schuchardt <xypron.glpk@gmx.de> | 2021-02-27 14:08:37 +0100 |
---|---|---|
committer | Heinrich Schuchardt <xypron.glpk@gmx.de> | 2021-03-07 17:37:13 +0100 |
commit | ddbaff53da5b99563fa371db0b09544e139fdabb (patch) | |
tree | 6f471341b3c6e6ae0fb98ad1193553dacf08825e | |
parent | 73bb90cabcdffcd528d1002a12779779196bf200 (diff) | |
download | u-boot-ddbaff53da5b99563fa371db0b09544e139fdabb.zip u-boot-ddbaff53da5b99563fa371db0b09544e139fdabb.tar.gz u-boot-ddbaff53da5b99563fa371db0b09544e139fdabb.tar.bz2 |
lib/charset: utf8_get() should return error
utf8_get() should return an error if hitting an illegal UTF-8 sequence and
not silently convert the input to a question mark.
Correct utf_8() and the its unit test.
console_read_unicode() now will ignore illegal UTF-8 sequences.
Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
-rw-r--r-- | lib/charset.c | 25 | ||||
-rw-r--r-- | test/unicode_ut.c | 7 |
2 files changed, 23 insertions, 9 deletions
diff --git a/lib/charset.c b/lib/charset.c index 1345c8f..946d5ee 100644 --- a/lib/charset.c +++ b/lib/charset.c @@ -32,7 +32,7 @@ static struct capitalization_table capitalization_table[] = * * @read_u8: - stream reader * @src: - string buffer passed to stream reader, optional - * Return: - Unicode code point + * Return: - Unicode code point, or -1 */ static int get_code(u8 (*read_u8)(void *data), void *data) { @@ -78,7 +78,7 @@ static int get_code(u8 (*read_u8)(void *data), void *data) } return ch; error: - return '?'; + return -1; } /** @@ -120,14 +120,21 @@ static u8 read_console(void *data) int console_read_unicode(s32 *code) { - if (!tstc()) { - /* No input available */ - return 1; - } + for (;;) { + s32 c; - /* Read Unicode code */ - *code = get_code(read_console, NULL); - return 0; + if (!tstc()) { + /* No input available */ + return 1; + } + + /* Read Unicode code */ + c = get_code(read_console, NULL); + if (c > 0) { + *code = c; + return 0; + } + } } s32 utf8_get(const char **src) diff --git a/test/unicode_ut.c b/test/unicode_ut.c index 2cc6b5f..154361a 100644 --- a/test/unicode_ut.c +++ b/test/unicode_ut.c @@ -52,6 +52,7 @@ static const char d4[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96, static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00}; static const char j2[] = {0x6a, 0x32, 0xc3, 0xc3, 0x6c, 0x00}; static const char j3[] = {0x6a, 0x33, 0xf0, 0x90, 0xf0, 0x00}; +static const char j4[] = {0xa1, 0x00}; static int unicode_test_u16_strlen(struct unit_test_state *uts) { @@ -165,6 +166,12 @@ static int unicode_test_utf8_get(struct unit_test_state *uts) ut_asserteq(0x0001048d, code); ut_asserteq_ptr(s, d4 + 4); + /* Check illegal character */ + s = j4; + code = utf8_get((const char **)&s); + ut_asserteq(-1, code); + ut_asserteq_ptr(j4 + 1, s); + return 0; } UNICODE_TEST(unicode_test_utf8_get); |