aboutsummaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c22
1 files changed, 22 insertions, 0 deletions
diff --git a/utf8.c b/utf8.c
index 3be9899..3b32a7f 100644
--- a/utf8.c
+++ b/utf8.c
@@ -10,6 +10,7 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
+#include <assert.h>
#include "utf8.h"
/* This one is always implemented */
@@ -87,6 +88,27 @@ int utf8_charequal(const char *s1, const char *s2)
return c1 == c2;
}
+int utf8_prev_len(const char *str, int len)
+{
+ int n = 1;
+
+ assert(len > 0);
+
+ /* Look up to len chars backward for a start-of-char byte */
+ while (--len) {
+ if ((str[-n] & 0x80) == 0) {
+ /* Start of a 1-byte char */
+ break;
+ }
+ if ((str[-n] & 0xc0) == 0xc0) {
+ /* Start of a multi-byte char */
+ break;
+ }
+ n++;
+ }
+ return n;
+}
+
int utf8_tounicode(const char *str, int *uc)
{
unsigned const char *s = (unsigned const char *)str;