1 files changed, 69 insertions, 22 deletions
diff --git a/libcpp/charset.cc b/libcpp/charset.cc
index d4f573e..7b625c9 100644
--- a/libcpp/charset.cc
+++ b/libcpp/charset.cc
@@ -1891,7 +1891,7 @@ cpp_valid_utf8_p (const char *buffer, size_t num_bytes)
 	 invalid because they cannot be represented in UTF-16.
 
 	 Reject such values.*/
-      if (cp >= UCS_LIMIT)
+      if (cp > UCS_LIMIT)
 	return false;
     }
   /* No problems encountered.  */
@@ -3154,34 +3154,26 @@ cpp_display_column_to_byte_column (const char *data, int data_length,
   return dw.bytes_processed () + MAX (0, display_col - avail_display);
 }
 
-/* Our own version of wcwidth().  We don't use the actual wcwidth() in glibc,
-   because that will inspect the user's locale, and in particular in an ASCII
-   locale, it will not return anything useful for extended characters.  But GCC
-   in other respects (see e.g. _cpp_default_encoding()) behaves as if
-   everything is UTF-8.  We also make some tweaks that are useful for the way
-   GCC needs to use this data, e.g. tabs and other control characters should be
-   treated as having width 1.  The lookup tables are generated from
-   contrib/unicode/gen_wcwidth.py and were made by simply calling glibc
-   wcwidth() on all codepoints, then applying the small tweaks.  These tables
-   are not highly optimized, but for the present purpose of outputting
-   diagnostics, they are sufficient.  */
-
-#include "generated_cpp_wcwidth.h"
-int cpp_wcwidth (cppchar_t c)
+template <typename PropertyType>
+PropertyType
+get_cppchar_property (cppchar_t c,
+		      const cppchar_t *range_ends,
+		      const PropertyType *range_values,
+		      size_t num_ranges,
+		      PropertyType default_value)
 {
-  if (__builtin_expect (c <= wcwidth_range_ends[0], true))
-    return wcwidth_widths[0];
+  if (__builtin_expect (c <= range_ends[0], true))
+    return range_values[0];
 
   /* Binary search the tables.  */
   int begin = 1;
-  static const int end
-      = sizeof wcwidth_range_ends / sizeof (*wcwidth_range_ends);
+  static const int end = num_ranges;
   int len = end - begin;
   do
     {
       int half = len/2;
       int middle = begin + half;
-      if (c > wcwidth_range_ends[middle])
+      if (c > range_ends[middle])
 	{
 	  begin = middle + 1;
 	  len -= half + 1;
@@ -3191,6 +3183,61 @@ int cpp_wcwidth (cppchar_t c)
     } while (len);
 
   if (__builtin_expect (begin != end, true))
-    return wcwidth_widths[begin];
-  return 1;
+    return range_values[begin];
+
+  return default_value;
+}
+
+/* Our own version of wcwidth().  We don't use the actual wcwidth() in glibc,
+   because that will inspect the user's locale, and in particular in an ASCII
+   locale, it will not return anything useful for extended characters.  But GCC
+   in other respects (see e.g. _cpp_default_encoding()) behaves as if
+   everything is UTF-8.  We also make some tweaks that are useful for the way
+   GCC needs to use this data, e.g. tabs and other control characters should be
+   treated as having width 1.  The lookup tables are generated from
+   contrib/unicode/gen_wcwidth.py and were made by simply calling glibc
+   wcwidth() on all codepoints, then applying the small tweaks.  These tables
+   are not highly optimized, but for the present purpose of outputting
+   diagnostics, they are sufficient.  */
+
+#include "generated_cpp_wcwidth.h"
+
+int
+cpp_wcwidth (cppchar_t c)
+{
+  const size_t num_ranges
+    = sizeof wcwidth_range_ends / sizeof (*wcwidth_range_ends);
+  return get_cppchar_property<unsigned char > (c,
+					       &wcwidth_range_ends[0],
+					       &wcwidth_widths[0],
+					       num_ranges,
+					       1);
+}
+
+#include "combining-chars.inc"
+
+bool
+cpp_is_combining_char (cppchar_t c)
+{
+  const size_t num_ranges
+    = sizeof combining_range_ends / sizeof (*combining_range_ends);
+  return get_cppchar_property<bool> (c,
+				     &combining_range_ends[0],
+				     &is_combining[0],
+				     num_ranges,
+				     false);
+}
+
+#include "printable-chars.inc"
+
+bool
+cpp_is_printable_char (cppchar_t c)
+{
+  const size_t num_ranges
+    = sizeof printable_range_ends / sizeof (*printable_range_ends);
+  return get_cppchar_property<bool> (c,
+				     &printable_range_ends[0],
+				     &is_printable[0],
+				     num_ranges,
+				     false);
 }