diff options
author | Thomas Schwinge <thomas@codesourcery.com> | 2023-04-07 22:04:54 +0200 |
---|---|---|
committer | Thomas Schwinge <thomas_schwinge@mentor.com> | 2023-04-07 22:04:54 +0200 |
commit | b1d131f8941cd30ee7904698e7d3bcc20686b61c (patch) | |
tree | 3dcb318ea104daa2fcc771c5e3de080414c45730 /libcpp/charset.cc | |
parent | 353f5e8f00a2f897d2974f07d27dd10f79666889 (diff) | |
parent | 939fb3fffebf7be2638ddef25dbe1c15f2aaa9d6 (diff) | |
download | gcc-b1d131f8941cd30ee7904698e7d3bcc20686b61c.zip gcc-b1d131f8941cd30ee7904698e7d3bcc20686b61c.tar.gz gcc-b1d131f8941cd30ee7904698e7d3bcc20686b61c.tar.bz2 |
Merge commit '939fb3fffebf7be2638ddef25dbe1c15f2aaa9d6' into HEAD
Diffstat (limited to 'libcpp/charset.cc')
-rw-r--r-- | libcpp/charset.cc | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/libcpp/charset.cc b/libcpp/charset.cc index 3c47d4f..d7f323b 100644 --- a/libcpp/charset.cc +++ b/libcpp/charset.cc @@ -1864,6 +1864,33 @@ _cpp_valid_utf8 (cpp_reader *pfile, return true; } +/* Return true iff BUFFER of size NUM_BYTES is validly-encoded UTF-8. */ + +extern bool +cpp_valid_utf8_p (const char *buffer, size_t num_bytes) +{ + const uchar *iter = (const uchar *)buffer; + size_t bytesleft = num_bytes; + while (bytesleft > 0) + { + /* one_utf8_to_cppchar implements 5-byte and 6 byte sequences as per + RFC 2279, but this has been superceded by RFC 3629, which + restricts UTF-8 to 1-byte through 4-byte sequences, and + states "the octet values C0, C1, F5 to FF never appear". + + Reject such values. */ + if (*iter >= 0xf4) + return false; + + cppchar_t cp; + int err = one_utf8_to_cppchar (&iter, &bytesleft, &cp); + if (err) + return false; + } + /* No problems encountered. */ + return true; +} + /* Subroutine of convert_hex and convert_oct. N is the representation in the execution character set of a numeric escape; write it into the string buffer TBUF and update the end-of-string pointer therein. WIDE |