aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Benjamin <davidben@google.com>2024-03-28 23:53:10 -0400
committerBoringssl LUCI CQ <boringssl-scoped@luci-project-accounts.iam.gserviceaccount.com>2024-03-29 23:26:38 +0000
commit27e09a3277d17718902afca16cce7e2fb9a82ec2 (patch)
tree6aec43745162fba8f65c35ee0a1a138e6b067e5d
parentb2966323f10a2d42880f6aad64279a77b5441802 (diff)
downloadboringssl-27e09a3277d17718902afca16cce7e2fb9a82ec2.zip
boringssl-27e09a3277d17718902afca16cce7e2fb9a82ec2.tar.gz
boringssl-27e09a3277d17718902afca16cce7e2fb9a82ec2.tar.bz2
Document that our Unicode APIs reject noncharacters
Noncharacters are weird. They're code points and generally expected to pass through string APIs and such, but they're also not meant to be used for "open interchange". We reject them, while most Unicode APIs accept them. They're public API nowadays, so document this. Change-Id: I56aa436ae954b591d9a00b6560617e1ad5c26d95 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/67568 Auto-Submit: David Benjamin <davidben@google.com> Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com> Commit-Queue: David Benjamin <davidben@google.com>
-rw-r--r--crypto/bytestring/unicode.c5
-rw-r--r--include/openssl/bytestring.h7
2 files changed, 9 insertions, 3 deletions
diff --git a/crypto/bytestring/unicode.c b/crypto/bytestring/unicode.c
index 10fba07..76f37f9 100644
--- a/crypto/bytestring/unicode.c
+++ b/crypto/bytestring/unicode.c
@@ -18,11 +18,12 @@
static int is_valid_code_point(uint32_t v) {
- // References in the following are to Unicode 9.0.0.
+ // References in the following are to Unicode 15.0.0.
if (// The Unicode space runs from zero to 0x10ffff (3.4 D9).
v > 0x10ffff ||
// Values 0x...fffe, 0x...ffff, and 0xfdd0-0xfdef are permanently reserved
- // (3.4 D14)
+ // as noncharacters (3.4 D14). See also 23.7. As our APIs are intended for
+ // "open interchange", such as ASN.1, we reject them.
(v & 0xfffe) == 0xfffe ||
(v >= 0xfdd0 && v <= 0xfdef) ||
// Surrogate code points are invalid (3.2 C1).
diff --git a/include/openssl/bytestring.h b/include/openssl/bytestring.h
index 0d48628..961b7e3 100644
--- a/include/openssl/bytestring.h
+++ b/include/openssl/bytestring.h
@@ -639,6 +639,9 @@ OPENSSL_EXPORT int CBB_flush_asn1_set_of(CBB *cbb);
// Unicode utilities.
+//
+// These functions consider noncharacters (see section 23.7 from Unicode 15.0.0)
+// to be invalid code points and will treat them as an error condition.
// The following functions read one Unicode code point from |cbs| with the
// corresponding encoding and store it in |*out|. They return one on success and
@@ -653,7 +656,9 @@ OPENSSL_EXPORT int CBS_get_utf32_be(CBS *cbs, uint32_t *out);
OPENSSL_EXPORT size_t CBB_get_utf8_len(uint32_t u);
// The following functions encode |u| to |cbb| with the corresponding
-// encoding. They return one on success and zero on error.
+// encoding. They return one on success and zero on error. Error conditions
+// include |u| being an invalid code point, or |u| being unencodable in the
+// specified encoding.
OPENSSL_EXPORT int CBB_add_utf8(CBB *cbb, uint32_t u);
OPENSSL_EXPORT int CBB_add_latin1(CBB *cbb, uint32_t u);
OPENSSL_EXPORT int CBB_add_ucs2_be(CBB *cbb, uint32_t u);