diff options
author | Steve Bennett <steveb@workware.net.au> | 2011-07-27 09:31:03 +1000 |
---|---|---|
committer | Steve Bennett <steveb@workware.net.au> | 2011-12-02 20:56:50 +1000 |
commit | 1d72edfab5cff0e7bbc1e1e22ae7b1b6bdc756b4 (patch) | |
tree | 5764121cb8c33a22892da4e0eb89205a600750d3 /jimregexp.c | |
parent | c2e5f7502026349106314843cad7f24020aad7fb (diff) | |
download | jimtcl-1d72edfab5cff0e7bbc1e1e22ae7b1b6bdc756b4.zip jimtcl-1d72edfab5cff0e7bbc1e1e22ae7b1b6bdc756b4.tar.gz jimtcl-1d72edfab5cff0e7bbc1e1e22ae7b1b6bdc756b4.tar.bz2 |
Extend UTF-8 support past the BMP
Now codepoints up to U+1FFFFF are supported, including
as literals with the new \u{NNNNNN} syntax (up to six hex digits)
Signed-off-by: Steve Bennett <steveb@workware.net.au>
Diffstat (limited to 'jimregexp.c')
-rw-r--r-- | jimregexp.c | 15 |
1 files changed, 13 insertions, 2 deletions
diff --git a/jimregexp.c b/jimregexp.c index 0c5a4dd..e899924 100644 --- a/jimregexp.c +++ b/jimregexp.c @@ -632,7 +632,18 @@ static int reg_decode_escape(const char *s, int *ch) case 't': *ch = '\t'; break; case 'v': *ch = '\v'; break; case 'u': - if ((n = parse_hex(s, 4, ch)) > 0) { + if (*s == '{') { + /* Expect \u{NNNN} */ + n = parse_hex(s + 1, 6, ch); + if (n > 0 && s[n + 1] == '}' && *ch >= 0 && *ch <= 0x1fffff) { + s += n + 2; + } + else { + /* Invalid, so just treat as an escaped 'u' */ + *ch = 'u'; + } + } + else if ((n = parse_hex(s, 4, ch)) > 0) { s += n; } break; @@ -1609,7 +1620,7 @@ static void regdump(regex_t *preg) int s; int op = EXACTLY; /* Arbitrary non-END op. */ int next; - char buf[4]; + char buf[MAX_UTF8_LEN + 1]; int i; for (i = 1; i < preg->p; i++) { |