aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGreg Hudson <ghudson@mit.edu>2009-05-14 16:16:32 +0000
committerGreg Hudson <ghudson@mit.edu>2009-05-14 16:16:32 +0000
commitee699ef91ba36719e50ce9dc5d54dd3896740917 (patch)
tree07979e330dc4412208e29eef89a02e6d7f515306 /src
parentd8c1fcc9fc7690629d10536750e6a4f873c2cbc1 (diff)
downloadkrb5-ee699ef91ba36719e50ce9dc5d54dd3896740917.zip
krb5-ee699ef91ba36719e50ce9dc5d54dd3896740917.tar.gz
krb5-ee699ef91ba36719e50ce9dc5d54dd3896740917.tar.bz2
UCS2 support doesn't handle upper half of BMP
Make krb5_ucs2 an unsigned type. Eliminate the need for distinguished values for ucs2 and ucs4 characters by changing the API of the single- character conversion routines. ticket: 6489 tags: pullup target_version: 1.7 git-svn-id: svn://anonsvn.mit.edu/krb5/trunk@22350 dc483132-0cff-0310-8789-dd5450dbe970
Diffstat (limited to 'src')
-rw-r--r--src/include/k5-utf8.h11
-rw-r--r--src/lib/krb5/unicode/ucstr.c6
-rw-r--r--src/util/support/utf8.c66
3 files changed, 43 insertions, 40 deletions
diff --git a/src/include/k5-utf8.h b/src/include/k5-utf8.h
index b5a3945..e3f134b 100644
--- a/src/include/k5-utf8.h
+++ b/src/include/k5-utf8.h
@@ -84,9 +84,9 @@
#endif
#if INT_MAX == 0x7fff
-typedef int krb5_ucs2;
+typedef unsigned int krb5_ucs2;
#elif SHRT_MAX == 0x7fff
-typedef short krb5_ucs2;
+typedef unsigned short krb5_ucs2;
#else
#error undefined 16 bit type
#endif
@@ -101,15 +101,12 @@ typedef short krb5_ucs4;
#error: undefined 32 bit type
#endif
-#define KRB5_UCS2_INVALID ((krb5_ucs2)0x8000)
-#define KRB5_UCS4_INVALID ((krb5_ucs4)0x80000000)
-
#define KRB5_MAX_UTF8_LEN (sizeof(krb5_ucs2) * 3/2)
-krb5_ucs2 krb5int_utf8_to_ucs2(const char *p);
+int krb5int_utf8_to_ucs2(const char *p, krb5_ucs2 *out);
size_t krb5int_ucs2_to_utf8(krb5_ucs2 c, char *buf);
-krb5_ucs4 krb5int_utf8_to_ucs4(const char *p);
+int krb5int_utf8_to_ucs4(const char *p, krb5_ucs4 *out);
size_t krb5int_ucs4_to_utf8(krb5_ucs4 c, char *buf);
int
diff --git a/src/lib/krb5/unicode/ucstr.c b/src/lib/krb5/unicode/ucstr.c
index e3c3c80..ec23688 100644
--- a/src/lib/krb5/unicode/ucstr.c
+++ b/src/lib/krb5/unicode/ucstr.c
@@ -397,8 +397,7 @@ krb5int_utf8_normcmp(
/* convert and normalize 1st string */
for (i = 0, ulen = 0; i < l1; i += len, ulen++) {
- ucs[ulen] = krb5int_utf8_to_ucs4(s1 + i);
- if (ucs[ulen] == KRB5_UCS4_INVALID) {
+ if (krb5int_utf8_to_ucs4(s1 + i, &ucs[ulen]) == -1) {
free(ucs);
return -1; /* what to do??? */
}
@@ -420,8 +419,7 @@ krb5int_utf8_normcmp(
/* convert and normalize 2nd string */
for (i = 0, ulen = 0; i < l2; i += len, ulen++) {
- ucs[ulen] = krb5int_utf8_to_ucs4(s2 + i);
- if (ucs[ulen] == KRB5_UCS4_INVALID) {
+ if (krb5int_utf8_to_ucs4(s2 + i, &ucs[ulen]) == -1) {
free(ucsout1);
free(ucs);
return 1; /* what to do??? */
diff --git a/src/util/support/utf8.c b/src/util/support/utf8.c
index f0d764e..4468673 100644
--- a/src/util/support/utf8.c
+++ b/src/util/support/utf8.c
@@ -159,7 +159,11 @@ int krb5int_utf8_charlen2(const char *p)
return i;
}
-krb5_ucs4 krb5int_utf8_to_ucs4(const char *p)
+/*
+ * Convert a UTF8 character to a UCS4 character. Return 0 on success,
+ * -1 on failure.
+ */
+int krb5int_utf8_to_ucs4(const char *p, krb5_ucs4 *out)
{
const unsigned char *c = (const unsigned char *) p;
krb5_ucs4 ch;
@@ -167,33 +171,35 @@ krb5_ucs4 krb5int_utf8_to_ucs4(const char *p)
static unsigned char mask[] = {
0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
+ *out = 0;
len = KRB5_UTF8_CHARLEN2(p, len);
if (len == 0)
- return KRB5_UCS4_INVALID;
+ return -1;
ch = c[0] & mask[len];
for (i = 1; i < len; i++) {
- if ((c[i] & 0xc0) != 0x80) {
- return KRB5_UCS4_INVALID;
- }
+ if ((c[i] & 0xc0) != 0x80)
+ return -1;
ch <<= 6;
ch |= c[i] & 0x3f;
}
- return ch;
+ *out = ch;
+ return 0;
}
-krb5_ucs2 krb5int_utf8_to_ucs2(const char *p)
+int krb5int_utf8_to_ucs2(const char *p, krb5_ucs2 *out)
{
- krb5_ucs4 ch = krb5int_utf8_to_ucs4(p);
-
- if (ch == KRB5_UCS4_INVALID || ch > SHRT_MAX)
- return KRB5_UCS2_INVALID;
+ krb5_ucs4 ch;
- return (krb5_ucs2)ch;
+ *out = 0;
+ if (krb5int_utf8_to_ucs4(p, &ch) == -1 || ch > 0xFFFF)
+ return -1;
+ *out = (krb5_ucs2) ch;
+ return 0;
}
/* conv UCS-2 to UTF-8, not used */
@@ -446,10 +452,13 @@ int krb5int_utf8_isupper(const char * p)
/* like strchr() */
char *krb5int_utf8_strchr(const char *str, const char *chr)
{
+ krb5_ucs4 chs, ch;
+
+ if (krb5int_utf8_to_ucs4(chr, &ch) == -1)
+ return NULL;
for ( ; *str != '\0'; KRB5_UTF8_INCR(str)) {
- if (krb5int_utf8_to_ucs4(str) == krb5int_utf8_to_ucs4(chr)) {
+ if (krb5int_utf8_to_ucs4(str, &chs) == 0 && chs == ch)
return (char *)str;
- }
}
return NULL;
@@ -458,14 +467,14 @@ char *krb5int_utf8_strchr(const char *str, const char *chr)
/* like strcspn() but returns number of bytes, not characters */
size_t krb5int_utf8_strcspn(const char *str, const char *set)
{
- const char *cstr;
- const char *cset;
+ const char *cstr, *cset;
+ krb5_ucs4 chstr, chset;
for (cstr = str; *cstr != '\0'; KRB5_UTF8_INCR(cstr)) {
for (cset = set; *cset != '\0'; KRB5_UTF8_INCR(cset)) {
- if (krb5int_utf8_to_ucs4(cstr) == krb5int_utf8_to_ucs4(cset)) {
+ if (krb5int_utf8_to_ucs4(cstr, &chstr) == 0
+ && krb5int_utf8_to_ucs4(cset, &chset) == 0 && chstr == chset)
return cstr - str;
- }
}
}
@@ -475,18 +484,16 @@ size_t krb5int_utf8_strcspn(const char *str, const char *set)
/* like strspn() but returns number of bytes, not characters */
size_t krb5int_utf8_strspn(const char *str, const char *set)
{
- const char *cstr;
- const char *cset;
+ const char *cstr, *cset;
+ krb5_ucs4 chstr, chset;
for (cstr = str; *cstr != '\0'; KRB5_UTF8_INCR(cstr)) {
for (cset = set; ; KRB5_UTF8_INCR(cset)) {
- if (*cset == '\0') {
+ if (*cset == '\0')
return cstr - str;
- }
-
- if (krb5int_utf8_to_ucs4(cstr) == krb5int_utf8_to_ucs4(cset)) {
+ if (krb5int_utf8_to_ucs4(cstr, &chstr) == 0
+ && krb5int_utf8_to_ucs4(cset, &chset) == 0 && chstr == chset)
break;
- }
}
}
@@ -496,13 +503,14 @@ size_t krb5int_utf8_strspn(const char *str, const char *set)
/* like strpbrk(), replaces strchr() as well */
char *krb5int_utf8_strpbrk(const char *str, const char *set)
{
- for ( ; *str != '\0'; KRB5_UTF8_INCR(str)) {
- const char *cset;
+ const char *cset;
+ krb5_ucs4 chstr, chset;
+ for ( ; *str != '\0'; KRB5_UTF8_INCR(str)) {
for (cset = set; *cset != '\0'; KRB5_UTF8_INCR(cset)) {
- if (krb5int_utf8_to_ucs4(str) == krb5int_utf8_to_ucs4(cset)) {
+ if (krb5int_utf8_to_ucs4(str, &chstr) == 0
+ && krb5int_utf8_to_ucs4(cset, &chset) == 0 && chstr == chset)
return (char *)str;
- }
}
}