utf8: Be more strict at rejecting invalid UTF-8 sequences.

RFC 3629 says: Implementations of the decoding algorithm above MUST protect against decoding invalid sequences Signed-off-by: Steve Bennett <steveb@workware.net.au>
author: Steve Bennett <steveb@workware.net.au> 2017-12-14 20:27:53 +1000
committer: Steve Bennett <steveb@workware.net.au> 2017-12-31 11:47:55 +1000
commit: 6fd58cfc22b0968e71f67f378555aba74e392847 (patch)
tree: 58c8a628858657dded758c818361af325a235c8c /utf8.c
parent: 2d2f74ebfeeb056130a37fec19189766a85cec81 (diff)
download: jimtcl-6fd58cfc22b0968e71f67f378555aba74e392847.zip
jimtcl-6fd58cfc22b0968e71f67f378555aba74e392847.tar.gz
jimtcl-6fd58cfc22b0968e71f67f378555aba74e392847.tar.bz2
1 files changed, 12 insertions, 3 deletions
diff --git a/utf8.c b/utf8.c
index 2698f66..56a036b 100644
--- a/utf8.c
+++ b/utf8.c
@@ -131,19 +131,28 @@ int utf8_tounicode(const char *str, int *uc)
     if (s[0] < 0xe0) {
         if ((s[1] & 0xc0) == 0x80) {
             *uc = ((s[0] & ~0xc0) << 6) | (s[1] & ~0x80);
-            return 2;
+            if (*uc >= 0x80) {
+                return 2;
+            }
+            /* Otherwise this is an invalid sequence */
         }
     }
     else if (s[0] < 0xf0) {
         if (((str[1] & 0xc0) == 0x80) && ((str[2] & 0xc0) == 0x80)) {
             *uc = ((s[0] & ~0xe0) << 12) | ((s[1] & ~0x80) << 6) | (s[2] & ~0x80);
-            return 3;
+            if (*uc >= 0x800) {
+                return 3;
+            }
+            /* Otherwise this is an invalid sequence */
         }
     }
     else if (s[0] < 0xf8) {
         if (((str[1] & 0xc0) == 0x80) && ((str[2] & 0xc0) == 0x80) && ((str[3] & 0xc0) == 0x80)) {
             *uc = ((s[0] & ~0xf0) << 18) | ((s[1] & ~0x80) << 12) | ((s[2] & ~0x80) << 6) | (s[3] & ~0x80);
-            return 4;
+            if (*uc >= 0x10000) {
+                return 4;
+            }
+            /* Otherwise this is an invalid sequence */
         }
     }
author	Steve Bennett <steveb@workware.net.au>	2017-12-14 20:27:53 +1000
committer	Steve Bennett <steveb@workware.net.au>	2017-12-31 11:47:55 +1000
commit	6fd58cfc22b0968e71f67f378555aba74e392847 (patch)
tree	58c8a628858657dded758c818361af325a235c8c /utf8.c
parent	2d2f74ebfeeb056130a37fec19189766a85cec81 (diff)
download	jimtcl-6fd58cfc22b0968e71f67f378555aba74e392847.zip jimtcl-6fd58cfc22b0968e71f67f378555aba74e392847.tar.gz jimtcl-6fd58cfc22b0968e71f67f378555aba74e392847.tar.bz2