diff options
author | Tom Rini <trini@konsulko.com> | 2021-03-07 19:43:00 -0500 |
---|---|---|
committer | Tom Rini <trini@konsulko.com> | 2021-03-07 19:43:00 -0500 |
commit | 90964ab5acb29ec6617a9ff340886230b2fce8c7 (patch) | |
tree | fab87d3307e5c4902c07ff4ab630a9a709bc6daa | |
parent | e4dba4ba6f61e8128be0b4200ca2d8cebf62180b (diff) | |
parent | 7d3eff3412886f277c724a9effcbe545c4cdd5b5 (diff) | |
download | u-boot-WIP/07Mar2021.zip u-boot-WIP/07Mar2021.tar.gz u-boot-WIP/07Mar2021.tar.bz2 |
Merge tag 'efi-2021-04-rc3-3' of https://source.denx.de/u-boot/custodians/u-boot-efiWIP/07Mar2021
Pull request for efi-2021-04-rc3-3
New:
* Provide library functions for converting UTF-8 streams either to code
page 437 or Unicode code points.
Bug fixes:
* Fix the capsule update unit tests.
* Use the terminal size of the video console if it is the primary output.
-rw-r--r-- | include/charset.h | 34 | ||||
-rw-r--r-- | lib/charset.c | 96 | ||||
-rw-r--r-- | lib/efi_loader/efi_console.c | 2 | ||||
-rw-r--r-- | lib/efi_loader/efi_unicode_collation.c | 21 | ||||
-rw-r--r-- | test/py/tests/test_efi_capsule/uboot_bin_env.its | 4 | ||||
-rw-r--r-- | test/unicode_ut.c | 114 |
6 files changed, 242 insertions, 29 deletions
diff --git a/include/charset.h b/include/charset.h index cc650a2..a911160 100644 --- a/include/charset.h +++ b/include/charset.h @@ -14,6 +14,11 @@ #define MAX_UTF8_PER_UTF16 3 /** + * codepage_437 - Unicode to codepage 437 translation table + */ +extern const u16 codepage_437[128]; + +/** * console_read_unicode() - read Unicode code point from console * * @code: pointer to store Unicode code point @@ -270,4 +275,33 @@ u16 *u16_strdup(const void *src); */ uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size); +/** + * utf_to_cp() - translate Unicode code point to 8bit codepage + * + * Codepoints that do not exist in the codepage are rendered as question mark. + * + * @c: pointer to Unicode code point to be translated + * @codepage: Unicode to codepage translation table + * Return: 0 on success, -ENOENT if codepoint cannot be translated + */ +int utf_to_cp(s32 *c, const u16 *codepage); + +/** + * utf8_to_cp437_stream() - convert UTF-8 stream to codepage 437 + * + * @c: next UTF-8 character to convert + * @buffer: buffer, at least 5 characters + * Return: next codepage 437 character or 0 + */ +int utf8_to_cp437_stream(u8 c, char *buffer); + +/** + * utf8_to_utf32_stream() - convert UTF-8 stream to UTF-32 + * + * @c: next UTF-8 character to convert + * @buffer: buffer, at least 5 characters + * Return: next codepage 437 character or 0 + */ +int utf8_to_utf32_stream(u8 c, char *buffer); + #endif /* __CHARSET_H_ */ diff --git a/lib/charset.c b/lib/charset.c index 2177014..f44c58d 100644 --- a/lib/charset.c +++ b/lib/charset.c @@ -8,9 +8,16 @@ #include <common.h> #include <charset.h> #include <capitalization.h> +#include <cp437.h> #include <efi_loader.h> +#include <errno.h> #include <malloc.h> +/** + * codepage_437 - Unicode to codepage 437 translation table + */ +const u16 codepage_437[128] = CP437; + static struct capitalization_table capitalization_table[] = #ifdef CONFIG_EFI_UNICODE_CAPITALIZATION UNICODE_CAPITALIZATION_TABLE; @@ -25,7 +32,7 @@ static struct capitalization_table capitalization_table[] = * * @read_u8: - stream reader * @src: - string buffer passed to stream reader, optional - * Return: - Unicode code point + * Return: - Unicode code point, or -1 */ static int get_code(u8 (*read_u8)(void *data), void *data) { @@ -71,7 +78,7 @@ static int get_code(u8 (*read_u8)(void *data), void *data) } return ch; error: - return '?'; + return -1; } /** @@ -113,14 +120,21 @@ static u8 read_console(void *data) int console_read_unicode(s32 *code) { - if (!tstc()) { - /* No input available */ - return 1; - } + for (;;) { + s32 c; - /* Read Unicode code */ - *code = get_code(read_console, NULL); - return 0; + if (!tstc()) { + /* No input available */ + return 1; + } + + /* Read Unicode code */ + c = get_code(read_console, NULL); + if (c > 0) { + *code = c; + return 0; + } + } } s32 utf8_get(const char **src) @@ -466,3 +480,67 @@ uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size) return dest; } + +int utf_to_cp(s32 *c, const u16 *codepage) +{ + if (*c >= 0x80) { + int j; + + /* Look up codepage translation */ + for (j = 0; j < 0x80; ++j) { + if (*c == codepage[j]) { + *c = j + 0x80; + return 0; + } + } + *c = '?'; + return -ENOENT; + } + return 0; +} + +int utf8_to_cp437_stream(u8 c, char *buffer) +{ + char *end; + const char *pos; + s32 s; + int ret; + + for (;;) { + pos = buffer; + end = buffer + strlen(buffer); + *end++ = c; + *end = 0; + s = utf8_get(&pos); + if (s > 0) { + *buffer = 0; + ret = utf_to_cp(&s, codepage_437); + return s; + } + if (pos == end) + return 0; + *buffer = 0; + } +} + +int utf8_to_utf32_stream(u8 c, char *buffer) +{ + char *end; + const char *pos; + s32 s; + + for (;;) { + pos = buffer; + end = buffer + strlen(buffer); + *end++ = c; + *end = 0; + s = utf8_get(&pos); + if (s > 0) { + *buffer = 0; + return s; + } + if (pos == end) + return 0; + *buffer = 0; + } +} diff --git a/lib/efi_loader/efi_console.c b/lib/efi_loader/efi_console.c index edcfce7..c400355 100644 --- a/lib/efi_loader/efi_console.c +++ b/lib/efi_loader/efi_console.c @@ -311,7 +311,7 @@ static void query_console_size(void) const char *stdout_name = env_get("stdout"); int rows = 25, cols = 80; - if (stdout_name && !strcmp(stdout_name, "vidconsole") && + if (stdout_name && !strncmp(stdout_name, "vidconsole", 10) && IS_ENABLED(CONFIG_DM_VIDEO)) { struct stdio_dev *stdout_dev = stdio_get_by_name("vidconsole"); diff --git a/lib/efi_loader/efi_unicode_collation.c b/lib/efi_loader/efi_unicode_collation.c index f6c875b..36be798 100644 --- a/lib/efi_loader/efi_unicode_collation.c +++ b/lib/efi_loader/efi_unicode_collation.c @@ -23,7 +23,7 @@ static const char illegal[] = "+,<=>:;\"/\\|?*[]\x7f"; static const u16 codepage[] = CP1250; #else /* Unicode code points for code page 437 characters 0x80 - 0xff */ -static const u16 codepage[] = CP437; +static const u16 *codepage = codepage_437; #endif /* GUID of the EFI_UNICODE_COLLATION_PROTOCOL2 */ @@ -300,23 +300,10 @@ static bool EFIAPI efi_str_to_fat(struct efi_unicode_collation_protocol *this, break; } c = utf_to_upper(c); - if (c >= 0x80) { - int j; - - /* Look for codepage translation */ - for (j = 0; j < 0x80; ++j) { - if (c == codepage[j]) { - c = j + 0x80; - break; - } - } - if (j >= 0x80) { - c = '_'; - ret = true; - } - } else if (c && (c < 0x20 || strchr(illegal, c))) { - c = '_'; + if (utf_to_cp(&c, codepage) || + (c && (c < 0x20 || strchr(illegal, c)))) { ret = true; + c = '_'; } fat[i] = c; diff --git a/test/py/tests/test_efi_capsule/uboot_bin_env.its b/test/py/tests/test_efi_capsule/uboot_bin_env.its index 31e2f80..fc65907 100644 --- a/test/py/tests/test_efi_capsule/uboot_bin_env.its +++ b/test/py/tests/test_efi_capsule/uboot_bin_env.its @@ -10,7 +10,7 @@ #address-cells = <2>; images { - u-boot-bin@100000 { + u-boot-bin { description = "U-Boot binary on SPI Flash"; data = /incbin/("BINFILE1"); compression = "none"; @@ -21,7 +21,7 @@ algo = "sha1"; }; }; - u-boot-env@150000 { + u-boot-env { description = "U-Boot environment on SPI Flash"; data = /incbin/("BINFILE2"); compression = "none"; diff --git a/test/unicode_ut.c b/test/unicode_ut.c index 6130ef0..6f6aea5 100644 --- a/test/unicode_ut.c +++ b/test/unicode_ut.c @@ -47,11 +47,15 @@ static const char d3[] = {0xe6, 0xbd, 0x9c, 0xe6, 0xb0, 0xb4, 0xe8, 0x89, /* Three letters translating to two utf-16 word each */ static const char d4[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96, 0xf0, 0x90, 0x92, 0x87, 0x00}; +/* Letter not in code page 437 */ +static const char d5[] = {0xCE, 0x92, 0x20, 0x69, 0x73, 0x20, 0x6E, 0x6F, + 0x74, 0x20, 0x42, 0x00}; /* Illegal utf-8 strings */ static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00}; static const char j2[] = {0x6a, 0x32, 0xc3, 0xc3, 0x6c, 0x00}; static const char j3[] = {0x6a, 0x33, 0xf0, 0x90, 0xf0, 0x00}; +static const char j4[] = {0xa1, 0x00}; static int unicode_test_u16_strlen(struct unit_test_state *uts) { @@ -165,6 +169,12 @@ static int unicode_test_utf8_get(struct unit_test_state *uts) ut_asserteq(0x0001048d, code); ut_asserteq_ptr(s, d4 + 4); + /* Check illegal character */ + s = j4; + code = utf8_get((const char **)&s); + ut_asserteq(-1, code); + ut_asserteq_ptr(j4 + 1, s); + return 0; } UNICODE_TEST(unicode_test_utf8_get); @@ -595,6 +605,110 @@ static int unicode_test_u16_strsize(struct unit_test_state *uts) } UNICODE_TEST(unicode_test_u16_strsize); +static int unicode_test_utf_to_cp(struct unit_test_state *uts) +{ + int ret; + s32 c; + + c = '\n'; + ret = utf_to_cp(&c, codepage_437); + ut_asserteq(0, ret); + ut_asserteq('\n', c); + + c = 'a'; + ret = utf_to_cp(&c, codepage_437); + ut_asserteq(0, ret); + ut_asserteq('a', c); + + c = 0x03c4; /* Greek small letter tau */ + ret = utf_to_cp(&c, codepage_437); + ut_asserteq(0, ret); + ut_asserteq(0xe7, c); + + c = 0x03a4; /* Greek capital letter tau */ + ret = utf_to_cp(&c, codepage_437); + ut_asserteq(-ENOENT, ret); + ut_asserteq('?', c); + + return 0; +} +UNICODE_TEST(unicode_test_utf_to_cp); + +static void utf8_to_cp437_stream_helper(const char *in, char *out) +{ + char buffer[5]; + int ret; + + *buffer = 0; + for (; *in; ++in) { + ret = utf8_to_cp437_stream(*in, buffer); + if (ret) + *out++ = ret; + } + *out = 0; +} + +static int unicode_test_utf8_to_cp437_stream(struct unit_test_state *uts) +{ + char buf[16]; + + utf8_to_cp437_stream_helper(d1, buf); + ut_asserteq_str("U-Boot", buf); + utf8_to_cp437_stream_helper(d2, buf); + ut_asserteq_str("kafb\xa0tur", buf); + utf8_to_cp437_stream_helper(d5, buf); + ut_asserteq_str("? is not B", buf); + utf8_to_cp437_stream_helper(j2, buf); + ut_asserteq_str("j2l", buf); + + return 0; +} +UNICODE_TEST(unicode_test_utf8_to_cp437_stream); + +static void utf8_to_utf32_stream_helper(const char *in, s32 *out) +{ + char buffer[5]; + int ret; + + *buffer = 0; + for (; *in; ++in) { + ret = utf8_to_utf32_stream(*in, buffer); + if (ret) + *out++ = ret; + } + *out = 0; +} + +static int unicode_test_utf8_to_utf32_stream(struct unit_test_state *uts) +{ + s32 buf[16]; + + const u32 u1[] = {0x55, 0x2D, 0x42, 0x6F, 0x6F, 0x74, 0x0000}; + const u32 u2[] = {0x6B, 0x61, 0x66, 0x62, 0xE1, 0x74, 0x75, 0x72, 0x00}; + const u32 u3[] = {0x0392, 0x20, 0x69, 0x73, 0x20, 0x6E, 0x6F, 0x74, + 0x20, 0x42, 0x00}; + const u32 u4[] = {0x6A, 0x32, 0x6C, 0x00}; + + memset(buf, 0, sizeof(buf)); + utf8_to_utf32_stream_helper(d1, buf); + ut_asserteq_mem(u1, buf, sizeof(u1)); + + memset(buf, 0, sizeof(buf)); + utf8_to_utf32_stream_helper(d2, buf); + ut_asserteq_mem(u2, buf, sizeof(u2)); + + memset(buf, 0, sizeof(buf)); + utf8_to_utf32_stream_helper(d5, buf); + ut_asserteq_mem(u3, buf, sizeof(u3)); + + memset(buf, 0, sizeof(buf)); + utf8_to_utf32_stream_helper(j2, buf); + ut_asserteq_mem(u4, buf, sizeof(u4)); + + return 0; +} +UNICODE_TEST(unicode_test_utf8_to_utf32_stream); + #ifdef CONFIG_EFI_LOADER static int unicode_test_efi_create_indexed_name(struct unit_test_state *uts) { |