diff options
author | Steve Bennett <steveb@workware.net.au> | 2017-12-14 20:27:53 +1000 |
---|---|---|
committer | Steve Bennett <steveb@workware.net.au> | 2017-12-31 11:47:55 +1000 |
commit | 6fd58cfc22b0968e71f67f378555aba74e392847 (patch) | |
tree | 58c8a628858657dded758c818361af325a235c8c /utf8.c | |
parent | 2d2f74ebfeeb056130a37fec19189766a85cec81 (diff) | |
download | jimtcl-6fd58cfc22b0968e71f67f378555aba74e392847.zip jimtcl-6fd58cfc22b0968e71f67f378555aba74e392847.tar.gz jimtcl-6fd58cfc22b0968e71f67f378555aba74e392847.tar.bz2 |
utf8: Be more strict at rejecting invalid UTF-8 sequences.
RFC 3629 says:
Implementations of the decoding algorithm above MUST protect against
decoding invalid sequences
Signed-off-by: Steve Bennett <steveb@workware.net.au>
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 15 |
1 files changed, 12 insertions, 3 deletions
@@ -131,19 +131,28 @@ int utf8_tounicode(const char *str, int *uc) if (s[0] < 0xe0) { if ((s[1] & 0xc0) == 0x80) { *uc = ((s[0] & ~0xc0) << 6) | (s[1] & ~0x80); - return 2; + if (*uc >= 0x80) { + return 2; + } + /* Otherwise this is an invalid sequence */ } } else if (s[0] < 0xf0) { if (((str[1] & 0xc0) == 0x80) && ((str[2] & 0xc0) == 0x80)) { *uc = ((s[0] & ~0xe0) << 12) | ((s[1] & ~0x80) << 6) | (s[2] & ~0x80); - return 3; + if (*uc >= 0x800) { + return 3; + } + /* Otherwise this is an invalid sequence */ } } else if (s[0] < 0xf8) { if (((str[1] & 0xc0) == 0x80) && ((str[2] & 0xc0) == 0x80) && ((str[3] & 0xc0) == 0x80)) { *uc = ((s[0] & ~0xf0) << 18) | ((s[1] & ~0x80) << 12) | ((s[2] & ~0x80) << 6) | (s[3] & ~0x80); - return 4; + if (*uc >= 0x10000) { + return 4; + } + /* Otherwise this is an invalid sequence */ } } |